diff options
Diffstat (limited to 'qemu/tests/image-fuzzer/qcow2/layout.py')
-rw-r--r-- | qemu/tests/image-fuzzer/qcow2/layout.py | 612 |
1 files changed, 612 insertions, 0 deletions
diff --git a/qemu/tests/image-fuzzer/qcow2/layout.py b/qemu/tests/image-fuzzer/qcow2/layout.py new file mode 100644 index 000000000..63e801f4e --- /dev/null +++ b/qemu/tests/image-fuzzer/qcow2/layout.py @@ -0,0 +1,612 @@ +# Generator of fuzzed qcow2 images +# +# Copyright (C) 2014 Maria Kustova <maria.k@catit.be> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import random +import struct +import fuzz +from math import ceil +from os import urandom +from itertools import chain + +MAX_IMAGE_SIZE = 10 * (1 << 20) +# Standard sizes +UINT32_S = 4 +UINT64_S = 8 + + +class Field(object): + + """Atomic image element (field). + + The class represents an image field as quadruple of a data format + of value necessary for its packing to binary form, an offset from + the beginning of the image, a value and a name. + + The field can be iterated as a list [format, offset, value, name]. + """ + + __slots__ = ('fmt', 'offset', 'value', 'name') + + def __init__(self, fmt, offset, val, name): + self.fmt = fmt + self.offset = offset + self.value = val + self.name = name + + def __iter__(self): + return iter([self.fmt, self.offset, self.value, self.name]) + + def __repr__(self): + return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \ + (self.fmt, self.offset, str(self.value), self.name) + + +class FieldsList(object): + + """List of fields. + + The class allows access to a field in the list by its name. + """ + + def __init__(self, meta_data=None): + if meta_data is None: + self.data = [] + else: + self.data = [Field(*f) + for f in meta_data] + + def __getitem__(self, name): + return [x for x in self.data if x.name == name] + + def __iter__(self): + return iter(self.data) + + def __len__(self): + return len(self.data) + + +class Image(object): + + """ Qcow2 image object. + + This class allows to create qcow2 images with random valid structures and + values, fuzz them via external qcow2.fuzz module and write the result to + a file. + """ + + def __init__(self, backing_file_name=None): + """Create a random valid qcow2 image with the correct header and stored + backing file name. + """ + cluster_bits, self.image_size = self._size_params() + self.cluster_size = 1 << cluster_bits + self.header = FieldsList() + self.backing_file_name = FieldsList() + self.backing_file_format = FieldsList() + self.feature_name_table = FieldsList() + self.end_of_extension_area = FieldsList() + self.l2_tables = FieldsList() + self.l1_table = FieldsList() + self.refcount_table = FieldsList() + self.refcount_blocks = FieldsList() + self.ext_offset = 0 + self.create_header(cluster_bits, backing_file_name) + self.set_backing_file_name(backing_file_name) + self.data_clusters = self._alloc_data(self.image_size, + self.cluster_size) + # Percentage of fields will be fuzzed + self.bias = random.uniform(0.2, 0.5) + + def __iter__(self): + return chain(self.header, self.backing_file_format, + self.feature_name_table, self.end_of_extension_area, + self.backing_file_name, self.l1_table, self.l2_tables, + self.refcount_table, self.refcount_blocks) + + def create_header(self, cluster_bits, backing_file_name=None): + """Generate a random valid header.""" + meta_header = [ + ['>4s', 0, "QFI\xfb", 'magic'], + ['>I', 4, random.randint(2, 3), 'version'], + ['>Q', 8, 0, 'backing_file_offset'], + ['>I', 16, 0, 'backing_file_size'], + ['>I', 20, cluster_bits, 'cluster_bits'], + ['>Q', 24, self.image_size, 'size'], + ['>I', 32, 0, 'crypt_method'], + ['>I', 36, 0, 'l1_size'], + ['>Q', 40, 0, 'l1_table_offset'], + ['>Q', 48, 0, 'refcount_table_offset'], + ['>I', 56, 0, 'refcount_table_clusters'], + ['>I', 60, 0, 'nb_snapshots'], + ['>Q', 64, 0, 'snapshots_offset'], + ['>Q', 72, 0, 'incompatible_features'], + ['>Q', 80, 0, 'compatible_features'], + ['>Q', 88, 0, 'autoclear_features'], + # Only refcount_order = 4 is supported by current (07.2014) + # implementation of QEMU + ['>I', 96, 4, 'refcount_order'], + ['>I', 100, 0, 'header_length'] + ] + self.header = FieldsList(meta_header) + + if self.header['version'][0].value == 2: + self.header['header_length'][0].value = 72 + else: + self.header['incompatible_features'][0].value = \ + random.getrandbits(2) + self.header['compatible_features'][0].value = random.getrandbits(1) + self.header['header_length'][0].value = 104 + # Extensions start at the header last field offset and the field size + self.ext_offset = struct.calcsize( + self.header['header_length'][0].fmt) + \ + self.header['header_length'][0].offset + end_of_extension_area_len = 2 * UINT32_S + free_space = self.cluster_size - self.ext_offset - \ + end_of_extension_area_len + # If the backing file name specified and there is enough space for it + # in the first cluster, then it's placed in the very end of the first + # cluster. + if (backing_file_name is not None) and \ + (free_space >= len(backing_file_name)): + self.header['backing_file_size'][0].value = len(backing_file_name) + self.header['backing_file_offset'][0].value = \ + self.cluster_size - len(backing_file_name) + + def set_backing_file_name(self, backing_file_name=None): + """Add the name of the backing file at the offset specified + in the header. + """ + if (backing_file_name is not None) and \ + (not self.header['backing_file_offset'][0].value == 0): + data_len = len(backing_file_name) + data_fmt = '>' + str(data_len) + 's' + self.backing_file_name = FieldsList([ + [data_fmt, self.header['backing_file_offset'][0].value, + backing_file_name, 'bf_name'] + ]) + + def set_backing_file_format(self, backing_file_fmt=None): + """Generate the header extension for the backing file format.""" + if backing_file_fmt is not None: + # Calculation of the free space available in the first cluster + end_of_extension_area_len = 2 * UINT32_S + high_border = (self.header['backing_file_offset'][0].value or + (self.cluster_size - 1)) - \ + end_of_extension_area_len + free_space = high_border - self.ext_offset + ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7) + + if free_space >= ext_size: + ext_data_len = len(backing_file_fmt) + ext_data_fmt = '>' + str(ext_data_len) + 's' + ext_padding_len = 7 - (ext_data_len - 1) % 8 + self.backing_file_format = FieldsList([ + ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'], + ['>I', self.ext_offset + UINT32_S, ext_data_len, + 'ext_length'], + [ext_data_fmt, self.ext_offset + UINT32_S * 2, + backing_file_fmt, 'bf_format'] + ]) + self.ext_offset = \ + struct.calcsize( + self.backing_file_format['bf_format'][0].fmt) + \ + ext_padding_len + \ + self.backing_file_format['bf_format'][0].offset + + def create_feature_name_table(self): + """Generate a random header extension for names of features used in + the image. + """ + def gen_feat_ids(): + """Return random feature type and feature bit.""" + return (random.randint(0, 2), random.randint(0, 63)) + + end_of_extension_area_len = 2 * UINT32_S + high_border = (self.header['backing_file_offset'][0].value or + (self.cluster_size - 1)) - \ + end_of_extension_area_len + free_space = high_border - self.ext_offset + # Sum of sizes of 'magic' and 'length' header extension fields + ext_header_len = 2 * UINT32_S + fnt_entry_size = 6 * UINT64_S + num_fnt_entries = min(10, (free_space - ext_header_len) / + fnt_entry_size) + if not num_fnt_entries == 0: + feature_tables = [] + feature_ids = [] + inner_offset = self.ext_offset + ext_header_len + feat_name = 'some cool feature' + while len(feature_tables) < num_fnt_entries * 3: + feat_type, feat_bit = gen_feat_ids() + # Remove duplicates + while (feat_type, feat_bit) in feature_ids: + feat_type, feat_bit = gen_feat_ids() + feature_ids.append((feat_type, feat_bit)) + feat_fmt = '>' + str(len(feat_name)) + 's' + feature_tables += [['B', inner_offset, + feat_type, 'feature_type'], + ['B', inner_offset + 1, feat_bit, + 'feature_bit_number'], + [feat_fmt, inner_offset + 2, + feat_name, 'feature_name'] + ] + inner_offset += fnt_entry_size + # No padding for the extension is necessary, because + # the extension length is multiple of 8 + self.feature_name_table = FieldsList([ + ['>I', self.ext_offset, 0x6803f857, 'ext_magic'], + # One feature table contains 3 fields and takes 48 bytes + ['>I', self.ext_offset + UINT32_S, + len(feature_tables) / 3 * 48, 'ext_length'] + ] + feature_tables) + self.ext_offset = inner_offset + + def set_end_of_extension_area(self): + """Generate a mandatory header extension marking end of header + extensions. + """ + self.end_of_extension_area = FieldsList([ + ['>I', self.ext_offset, 0, 'ext_magic'], + ['>I', self.ext_offset + UINT32_S, 0, 'ext_length'] + ]) + + def create_l_structures(self): + """Generate random valid L1 and L2 tables.""" + def create_l2_entry(host, guest, l2_cluster): + """Generate one L2 entry.""" + offset = l2_cluster * self.cluster_size + l2_size = self.cluster_size / UINT64_S + entry_offset = offset + UINT64_S * (guest % l2_size) + cluster_descriptor = host * self.cluster_size + if not self.header['version'][0].value == 2: + cluster_descriptor += random.randint(0, 1) + # While snapshots are not supported, bit #63 = 1 + # Compressed clusters are not supported => bit #62 = 0 + entry_val = (1 << 63) + cluster_descriptor + return ['>Q', entry_offset, entry_val, 'l2_entry'] + + def create_l1_entry(l2_cluster, l1_offset, guest): + """Generate one L1 entry.""" + l2_size = self.cluster_size / UINT64_S + entry_offset = l1_offset + UINT64_S * (guest / l2_size) + # While snapshots are not supported bit #63 = 1 + entry_val = (1 << 63) + l2_cluster * self.cluster_size + return ['>Q', entry_offset, entry_val, 'l1_entry'] + + if len(self.data_clusters) == 0: + # All metadata for an empty guest image needs 4 clusters: + # header, rfc table, rfc block, L1 table. + # Header takes cluster #0, other clusters ##1-3 can be used + l1_offset = random.randint(1, 3) * self.cluster_size + l1 = [['>Q', l1_offset, 0, 'l1_entry']] + l2 = [] + else: + meta_data = self._get_metadata() + guest_clusters = random.sample(range(self.image_size / + self.cluster_size), + len(self.data_clusters)) + # Number of entries in a L1/L2 table + l_size = self.cluster_size / UINT64_S + # Number of clusters necessary for L1 table + l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2))) + l1_start = self._get_adjacent_clusters(self.data_clusters | + meta_data, l1_size) + meta_data |= set(range(l1_start, l1_start + l1_size)) + l1_offset = l1_start * self.cluster_size + # Indices of L2 tables + l2_ids = [] + # Host clusters allocated for L2 tables + l2_clusters = [] + # L1 entries + l1 = [] + # L2 entries + l2 = [] + for host, guest in zip(self.data_clusters, guest_clusters): + l2_id = guest / l_size + if l2_id not in l2_ids: + l2_ids.append(l2_id) + l2_clusters.append(self._get_adjacent_clusters( + self.data_clusters | meta_data | set(l2_clusters), + 1)) + l1.append(create_l1_entry(l2_clusters[-1], l1_offset, + guest)) + l2.append(create_l2_entry(host, guest, + l2_clusters[l2_ids.index(l2_id)])) + self.l2_tables = FieldsList(l2) + self.l1_table = FieldsList(l1) + self.header['l1_size'][0].value = int(ceil(UINT64_S * self.image_size / + float(self.cluster_size**2))) + self.header['l1_table_offset'][0].value = l1_offset + + def create_refcount_structures(self): + """Generate random refcount blocks and refcount table.""" + def allocate_rfc_blocks(data, size): + """Return indices of clusters allocated for refcount blocks.""" + cluster_ids = set() + diff = block_ids = set([x / size for x in data]) + while len(diff) != 0: + # Allocate all yet not allocated clusters + new = self._get_available_clusters(data | cluster_ids, + len(diff)) + # Indices of new refcount blocks necessary to cover clusters + # in 'new' + diff = set([x / size for x in new]) - block_ids + cluster_ids |= new + block_ids |= diff + return cluster_ids, block_ids + + def allocate_rfc_table(data, init_blocks, block_size): + """Return indices of clusters allocated for the refcount table + and updated indices of clusters allocated for blocks and indices + of blocks. + """ + blocks = set(init_blocks) + clusters = set() + # Number of entries in one cluster of the refcount table + size = self.cluster_size / UINT64_S + # Number of clusters necessary for the refcount table based on + # the current number of refcount blocks + table_size = int(ceil((max(blocks) + 1) / float(size))) + # Index of the first cluster of the refcount table + table_start = self._get_adjacent_clusters(data, table_size + 1) + # Clusters allocated for the current length of the refcount table + table_clusters = set(range(table_start, table_start + table_size)) + # Clusters allocated for the refcount table including + # last optional one for potential l1 growth + table_clusters_allocated = set(range(table_start, table_start + + table_size + 1)) + # New refcount blocks necessary for clusters occupied by the + # refcount table + diff = set([c / block_size for c in table_clusters]) - blocks + blocks |= diff + while len(diff) != 0: + # Allocate clusters for new refcount blocks + new = self._get_available_clusters((data | clusters) | + table_clusters_allocated, + len(diff)) + # Indices of new refcount blocks necessary to cover + # clusters in 'new' + diff = set([x / block_size for x in new]) - blocks + clusters |= new + blocks |= diff + # Check if the refcount table needs one more cluster + if int(ceil((max(blocks) + 1) / float(size))) > table_size: + new_block_id = (table_start + table_size) / block_size + # Check if the additional table cluster needs + # one more refcount block + if new_block_id not in blocks: + diff.add(new_block_id) + table_clusters.add(table_start + table_size) + table_size += 1 + return table_clusters, blocks, clusters + + def create_table_entry(table_offset, block_cluster, block_size, + cluster): + """Generate a refcount table entry.""" + offset = table_offset + UINT64_S * (cluster / block_size) + return ['>Q', offset, block_cluster * self.cluster_size, + 'refcount_table_entry'] + + def create_block_entry(block_cluster, block_size, cluster): + """Generate a list of entries for the current block.""" + entry_size = self.cluster_size / block_size + offset = block_cluster * self.cluster_size + entry_offset = offset + entry_size * (cluster % block_size) + # While snapshots are not supported all refcounts are set to 1 + return ['>H', entry_offset, 1, 'refcount_block_entry'] + # Size of a block entry in bits + refcount_bits = 1 << self.header['refcount_order'][0].value + # Number of refcount entries per refcount block + # Convert self.cluster_size from bytes to bits to have the same + # base for the numerator and denominator + block_size = self.cluster_size * 8 / refcount_bits + meta_data = self._get_metadata() + if len(self.data_clusters) == 0: + # All metadata for an empty guest image needs 4 clusters: + # header, rfc table, rfc block, L1 table. + # Header takes cluster #0, other clusters ##1-3 can be used + block_clusters = set([random.choice(list(set(range(1, 4)) - + meta_data))]) + block_ids = set([0]) + table_clusters = set([random.choice(list(set(range(1, 4)) - + meta_data - + block_clusters))]) + else: + block_clusters, block_ids = \ + allocate_rfc_blocks(self.data_clusters | + meta_data, block_size) + table_clusters, block_ids, new_clusters = \ + allocate_rfc_table(self.data_clusters | + meta_data | + block_clusters, + block_ids, + block_size) + block_clusters |= new_clusters + + meta_data |= block_clusters | table_clusters + table_offset = min(table_clusters) * self.cluster_size + block_id = None + # Clusters allocated for refcount blocks + block_clusters = list(block_clusters) + # Indices of refcount blocks + block_ids = list(block_ids) + # Refcount table entries + rfc_table = [] + # Refcount entries + rfc_blocks = [] + + for cluster in sorted(self.data_clusters | meta_data): + if cluster / block_size != block_id: + block_id = cluster / block_size + block_cluster = block_clusters[block_ids.index(block_id)] + rfc_table.append(create_table_entry(table_offset, + block_cluster, + block_size, cluster)) + rfc_blocks.append(create_block_entry(block_cluster, block_size, + cluster)) + self.refcount_table = FieldsList(rfc_table) + self.refcount_blocks = FieldsList(rfc_blocks) + + self.header['refcount_table_offset'][0].value = table_offset + self.header['refcount_table_clusters'][0].value = len(table_clusters) + + def fuzz(self, fields_to_fuzz=None): + """Fuzz an image by corrupting values of a random subset of its fields. + + Without parameters the method fuzzes an entire image. + + If 'fields_to_fuzz' is specified then only fields in this list will be + fuzzed. 'fields_to_fuzz' can contain both individual fields and more + general image elements as a header or tables. + + In the first case the field will be fuzzed always. + In the second a random subset of fields will be selected and fuzzed. + """ + def coin(): + """Return boolean value proportional to a portion of fields to be + fuzzed. + """ + return random.random() < self.bias + + if fields_to_fuzz is None: + for field in self: + if coin(): + field.value = getattr(fuzz, field.name)(field.value) + else: + for item in fields_to_fuzz: + if len(item) == 1: + for field in getattr(self, item[0]): + if coin(): + field.value = getattr(fuzz, + field.name)(field.value) + else: + # If fields with the requested name were not generated + # getattr(self, item[0])[item[1]] returns an empty list + for field in getattr(self, item[0])[item[1]]: + field.value = getattr(fuzz, field.name)(field.value) + + def write(self, filename): + """Write an entire image to the file.""" + image_file = open(filename, 'w') + for field in self: + image_file.seek(field.offset) + image_file.write(struct.pack(field.fmt, field.value)) + + for cluster in sorted(self.data_clusters): + image_file.seek(cluster * self.cluster_size) + image_file.write(urandom(self.cluster_size)) + + # Align the real image size to the cluster size + image_file.seek(0, 2) + size = image_file.tell() + rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1) + if rounded > size: + image_file.seek(rounded - 1) + image_file.write("\0") + image_file.close() + + @staticmethod + def _size_params(): + """Generate a random image size aligned to a random correct + cluster size. + """ + cluster_bits = random.randrange(9, 21) + cluster_size = 1 << cluster_bits + img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size) + return (cluster_bits, img_size) + + @staticmethod + def _get_available_clusters(used, number): + """Return a set of indices of not allocated clusters. + + 'used' contains indices of currently allocated clusters. + All clusters that cannot be allocated between 'used' clusters will have + indices appended to the end of 'used'. + """ + append_id = max(used) + 1 + free = set(range(1, append_id)) - used + if len(free) >= number: + return set(random.sample(free, number)) + else: + return free | set(range(append_id, append_id + number - len(free))) + + @staticmethod + def _get_adjacent_clusters(used, size): + """Return an index of the first cluster in the sequence of free ones. + + 'used' contains indices of currently allocated clusters. 'size' is the + length of the sequence of free clusters. + If the sequence of 'size' is not available between 'used' clusters, its + first index will be append to the end of 'used'. + """ + def get_cluster_id(lst, length): + """Return the first index of the sequence of the specified length + or None if the sequence cannot be inserted in the list. + """ + if len(lst) != 0: + pairs = [] + pair = (lst[0], 1) + for i in range(1, len(lst)): + if lst[i] == lst[i-1] + 1: + pair = (lst[i], pair[1] + 1) + else: + pairs.append(pair) + pair = (lst[i], 1) + pairs.append(pair) + random.shuffle(pairs) + for x, s in pairs: + if s >= length: + return x - length + 1 + return None + + append_id = max(used) + 1 + free = list(set(range(1, append_id)) - used) + idx = get_cluster_id(free, size) + if idx is None: + return append_id + else: + return idx + + @staticmethod + def _alloc_data(img_size, cluster_size): + """Return a set of random indices of clusters allocated for guest data. + """ + num_of_cls = img_size/cluster_size + return set(random.sample(range(1, num_of_cls + 1), + random.randint(0, num_of_cls))) + + def _get_metadata(self): + """Return indices of clusters allocated for image metadata.""" + ids = set() + for x in self: + ids.add(x.offset/self.cluster_size) + return ids + + +def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None, + fields_to_fuzz=None): + """Create a fuzzed image and write it to the specified file.""" + image = Image(backing_file_name) + image.set_backing_file_format(backing_file_fmt) + image.create_feature_name_table() + image.set_end_of_extension_area() + image.create_l_structures() + image.create_refcount_structures() + image.fuzz(fields_to_fuzz) + image.write(test_img_path) + return image.image_size |