diff options
author | Gui Chen <gui.chen@intel.com> | 2012-11-28 14:07:41 +0800 |
---|---|---|
committer | Gui Chen <gui.chen@intel.com> | 2012-12-12 19:33:44 +0800 |
commit | 06fd56e63606149500dcebc72ecca1253bf16c07 (patch) | |
tree | 303507689e52d7e7c2089259ab82e0af2fc6cb8b | |
parent | 5654dbf4302621091279036e130b68bb466f16a8 (diff) | |
download | mic-06fd56e63606149500dcebc72ecca1253bf16c07.tar.gz mic-06fd56e63606149500dcebc72ecca1253bf16c07.tar.bz2 mic-06fd56e63606149500dcebc72ecca1253bf16c07.zip |
backport bmap patch from devel
Signed-off-by: Gui Chen <gui.chen@intel.com>
-rw-r--r-- | mic/imager/raw.py | 252 | ||||
-rw-r--r-- | mic/utils/misc.py | 52 | ||||
-rw-r--r-- | plugins/imager/raw_plugin.py | 6 |
3 files changed, 228 insertions, 82 deletions
diff --git a/mic/imager/raw.py b/mic/imager/raw.py index 9ac3e6e..4b8c1f7 100644 --- a/mic/imager/raw.py +++ b/mic/imager/raw.py @@ -18,8 +18,9 @@ import os import stat import shutil - -from urlgrabber import progress +from fcntl import ioctl +from struct import pack, unpack +from itertools import groupby from mic import kickstart, msger from mic.utils import fs_related, runner, misc @@ -36,7 +37,7 @@ class RawImageCreator(BaseImageCreator): subsequently be booted in a virtual machine or accessed with kpartx """ - def __init__(self, creatoropts=None, pkgmgr=None, compress_image=None): + def __init__(self, creatoropts=None, pkgmgr=None, compress_image=None, generate_bmap=None): """Initialize a ApplianceImageCreator instance. This method takes the same arguments as ImageCreator.__init__() @@ -54,6 +55,7 @@ class RawImageCreator(BaseImageCreator): self.appliance_version = None self.appliance_release = None self.compress_image = compress_image + self.bmap_needed = generate_bmap #self.getsource = False #self.listpkg = False @@ -174,6 +176,14 @@ class RawImageCreator(BaseImageCreator): return self._diskinfo + def _full_name(self, name, extention): + """ Construct full file name for a file we generate. """ + return "%s-%s.%s" % (self.name, name, extention) + + def _full_path(self, path, name, extention): + """ Construct full file path to a file we generate. """ + return os.path.join(path, self._full_name(name, extention)) + # # Actual implemention # @@ -184,15 +194,11 @@ class RawImageCreator(BaseImageCreator): #create disk for item in self.get_diskinfo(): - msger.debug("Adding disk %s as %s/%s-%s.raw with size %s bytes" % - (item['name'], self.__imgdir, self.name, item['name'], - item['size'])) - - disk = fs_related.SparseLoopbackDisk("%s/%s-%s.raw" % ( - self.__imgdir, - self.name, - item['name']), - item['size']) + full_path = self._full_path(self.__imgdir, item['name'], "raw") + msger.debug("Adding disk %s as %s with size %s bytes" \ + % (item['name'], full_path, item['size'])) + + disk = fs_related.SparseLoopbackDisk(full_path, item['size']) self.__disks[item['name']] = disk self.__instloop = PartitionedMount(self.__disks, self._instroot) @@ -240,13 +246,9 @@ class RawImageCreator(BaseImageCreator): return (bootdevnum, rootdevnum, rootdev, prefix) def _create_syslinux_config(self): - #Copy splash - splash = "%s/usr/lib/anaconda-runtime/syslinux-vesa-splash.jpg" \ - % self._instroot + splash = os.path.join(self._instroot, "boot/extlinux") if os.path.exists(splash): - shutil.copy(splash, "%s%s/splash.jpg" \ - % (self._instroot, "/boot/extlinux")) splashline = "menu background splash.jpg" else: splashline = "" @@ -426,8 +428,9 @@ class RawImageCreator(BaseImageCreator): i = 0 for name in self.__disks.keys(): - xml += " <drive disk='%s-%s.%s' target='hd%s'/>\n" \ - % (self.name,name, self.__disk_format,chr(ord('a')+i)) + full_name = self._full_name(name, self.__disk_format) + xml += " <drive disk='%s' target='hd%s'/>\n" \ + % (full_name, chr(ord('a') + i)) i = i + 1 xml += " </boot>\n" @@ -443,58 +446,27 @@ class RawImageCreator(BaseImageCreator): if self.checksum is True: for name in self.__disks.keys(): - diskpath = "%s/%s-%s.%s" \ - % (self._outdir,self.name,name, self.__disk_format) - disk_size = os.path.getsize(diskpath) - meter_ct = 0 - meter = progress.TextMeter() - meter.start(size=disk_size, - text="Generating disk signature for %s-%s.%s" \ - % (self.name, - name, - self.__disk_format)) - xml += " <disk file='%s-%s.%s' use='system' format='%s'>\n"\ - % (self.name, - name, - self.__disk_format, - self.__disk_format) - - try: - import hashlib - m1 = hashlib.sha1() - m2 = hashlib.sha256() - except: - import sha - m1 = sha.new() - m2 = None - f = open(diskpath,"r") - while 1: - chunk = f.read(65536) - if not chunk: - break - m1.update(chunk) - if m2: - m2.update(chunk) - meter.update(meter_ct) - meter_ct = meter_ct + 65536 - - sha1checksum = m1.hexdigest() - xml += " <checksum type='sha1'>%s</checksum>\n" \ - % sha1checksum + diskpath = self._full_path(self._outdir, name, \ + self.__disk_format) + full_name = self._full_name(name, self.__disk_format) + + msger.debug("Generating disk signature for %s" % full_name) - if m2: - sha256checksum = m2.hexdigest() - xml += " <checksum type='sha256'>%s</checksum>\n" \ - % sha256checksum + xml += " <disk file='%s' use='system' format='%s'>\n" \ + % (full_name, self.__disk_format) + hashes = misc.calc_hashes(diskpath, ('sha1', 'sha256')) + + xml += " <checksum type='sha1'>%s</checksum>\n" \ + % hashes[0] + xml += " <checksum type='sha256'>%s</checksum>\n" \ + % hashes[1] xml += " </disk>\n" else: for name in self.__disks.keys(): - xml += " <disk file='%s-%s.%s' use='system' format='%s'/>\n"\ - %(self.name, - name, - self.__disk_format, - self.__disk_format) + full_name = self._full_name(name, self.__disk_format) + xml += " <disk file='%s' use='system' format='%s'/>\n" \ + % (full_name, self.__disk_format) xml += " </storage>\n" xml += "</image>\n" @@ -503,3 +475,151 @@ class RawImageCreator(BaseImageCreator): cfg = open("%s/%s.xml" % (self._outdir, self.name), "w") cfg.write(xml) cfg.close() + + def _bmap_file_start(self, block_size, image_size, blocks_cnt): + """ A helper function which generates the starting contents of the + block map file: the header comment, image size, block size, etc. """ + + xml = "<?xml version=\"1.0\" ?>\n\n" + xml += "<!-- This file contains block map for an image file. The block map\n" + xml += " is basically a list of block numbers in the image file. It lists\n" + xml += " only those blocks which contain data (boot sector, partition\n" + xml += " table, file-system metadata, files, directories, extents, etc).\n" + xml += " These blocks have to be copied to the target device. The other\n" + xml += " blocks do not contain any useful data and do not have to be\n" + xml += " copied to the target device. Thus, using the block map users can\n" + xml += " flash the image fast. So the block map is just an optimization.\n" + xml += " It is OK to ignore this file and just flash the entire image to\n" + xml += " the target device if the flashing speed is not important.\n\n" + + xml += " Note, this file contains commentaries with useful information\n" + xml += " like image size in gigabytes, percentage of mapped data, etc.\n" + xml += " This data is there merely to make the XML file human-readable.\n\n" + + xml += " The 'version' attribute is the block map file format version in\n" + xml += " the 'major.minor' format. The version major number is increased\n" + xml += " whenever we make incompatible changes to the block map format,\n" + xml += " meaning that the bmap-aware flasher would have to be modified in\n" + xml += " order to support the new format. The minor version is increased\n" + xml += " in case of compatible changes. For example, if we add an attribute\n" + xml += " which is optional for the bmap-aware flasher. -->\n" + xml += "<bmap version=\"1.1\">\n" + xml += "\t<!-- Image size in bytes (%s) -->\n" \ + % misc.human_size(image_size) + xml += "\t<ImageSize> %u </ImageSize>\n\n" % image_size + + xml += "\t<!-- Size of a block in bytes -->\n" + xml += "\t<BlockSize> %u </BlockSize>\n\n" % block_size + + xml += "\t<!-- Count of blocks in the image file -->\n" + xml += "\t<BlocksCount> %u </BlocksCount>\n\n" % blocks_cnt + + xml += "\t<!-- The block map which consists of elements which may either\n" + xml += "\t be a range of blocks or a single block. The 'sha1' attribute\n" + xml += "\t is the SHA1 checksum of the this range of blocks. -->\n" + xml += "\t<BlockMap>\n" + + return xml + + def _bmap_file_end(self, mapped_cnt, block_size, blocks_cnt): + """ A helper funstion which generates the final parts of the block map + file: the ending tags and the information about the amount of mapped + blocks. """ + + xml = "\t</BlockMap>\n\n" + + size = misc.human_size(mapped_cnt * block_size) + percent = (mapped_cnt * 100.0) / blocks_cnt + xml += "\t<!-- Count of mapped blocks (%s or %.1f%% mapped) -->\n" \ + % (size, percent) + xml += "\t<MappedBlocksCount> %u </MappedBlocksCount>\n" % mapped_cnt + xml += "</bmap>" + + return xml + + def _get_ranges(self, f_image, blocks_cnt): + """ A helper for 'generate_bmap()' which generates ranges of mapped + blocks. It uses the FIBMAP ioctl to check which blocks are mapped. Of + course, the image file must have been created as a sparse file + originally, otherwise all blocks will be mapped. And it is also + essential to generate the block map before the file had been copied + anywhere or compressed, because othewise we lose the information about + unmapped blocks. """ + + def is_mapped(block): + """ Returns True if block 'block' of the image file is mapped and + False otherwise. + + Implementation details: this function uses the FIBMAP ioctl (number + 1) to get detect whether 'block' is mapped to a disk block. The ioctl + returns zero if 'block' is not mapped and non-zero disk block number + if it is mapped. """ + + return unpack('I', ioctl(f_image, 1, pack('I', block)))[0] != 0 + + for key, group in groupby(xrange(blocks_cnt), is_mapped): + if key: + # Find the first and the last elements of the group + first = group.next() + last = first + for last in group: + pass + yield first, last + + def generate_bmap(self): + """ Generate block map file for an image. The idea is that while disk + images we generate may be large (e.g., 4GiB), they may actually contain + only little real data, e.g., 512MiB. This data are files, directories, + file-system meta-data, partition table, etc. In other words, when + flashing the image to the target device, you do not have to copy all the + 4GiB of data, you can copy only 512MiB of it, which is 4 times faster. + + This function generates the block map file for an arbitrary image that + mic has generated. The block map file is basically an XML file which + contains a list of blocks which have to be copied to the target device. + The other blocks are not used and there is no need to copy them. + + This function assumes the image file was originally created as a sparse + file. To generate the block map we use the FIBMAP ioctl. """ + + if self.bmap_needed is None: + return + + msger.info("Generating the map file(s)") + + for name in self.__disks.keys(): + image = self._full_path(self.__imgdir, name, self.__disk_format) + bmap_file = self._full_path(self.__imgdir, name, "bmap") + + msger.debug("Generating block map file '%s'" % bmap_file) + + image_size = os.path.getsize(image) + + with open(bmap_file, "w") as f_bmap: + with open(image, "rb") as f_image: + # Get the block size of the host file-system for the image + # file by calling the FIGETBSZ ioctl (number 2). + block_size = unpack('I', ioctl(f_image, 2, pack('I', 0)))[0] + blocks_cnt = (image_size + block_size - 1) / block_size + + # Write general information to the block map file, without + # block map itself, which will be written next. + xml = self._bmap_file_start(block_size, image_size, + blocks_cnt) + f_bmap.write(xml) + + # Generate the block map and write it to the XML block map + # file as we go. + mapped_cnt = 0 + for first, last in self._get_ranges(f_image, blocks_cnt): + mapped_cnt += last - first + 1 + sha1 = misc.calc_hashes(image, ('sha1', ), + first * block_size, + (last + 1) * block_size) + f_bmap.write("\t\t<Range sha1=\"%s\"> %s-%s " \ + "</Range>\n" % (sha1[0], first, last)) + + # Finish the block map file + xml = self._bmap_file_end(mapped_cnt, block_size, + blocks_cnt) + f_bmap.write(xml) diff --git a/mic/utils/misc.py b/mic/utils/misc.py index 5b5a299..cb833d3 100644 --- a/mic/utils/misc.py +++ b/mic/utils/misc.py @@ -28,11 +28,6 @@ import platform import rpmmisc try: - from hashlib import md5 -except ImportError: - from md5 import md5 - -try: import sqlite3 as sqlite except ImportError: import sqlite @@ -237,17 +232,44 @@ def check_space_pre_cp(src, dst): raise CreatorError("space on %s(%s) is not enough for about %s files" % (dst, human_size(freesize), human_size(srcsize))) -def get_md5sum(fpath): - blksize = 65536 # should be optimized enough +def calc_hashes(file_path, hash_names, start = 0, end = None): + """ Calculate hashes for a file. The 'file_path' argument is the file + to calculate hash functions for, 'start' and 'end' are the starting and + ending file offset to calculate the has functions for. The 'hash_names' + argument is a list of hash names to calculate. Returns the the list + of calculated hash values in the hexadecimal form in the same order + as 'hash_names'. + """ + if end == None: + end = os.path.getsize(file_path) - md5sum = md5() - with open(fpath, 'rb') as f: - while True: - data = f.read(blksize) - if not data: - break - md5sum.update(data) - return md5sum.hexdigest() + chunk_size = 65536 + to_read = end - start; + read = 0 + + hashes = [] + for hash_name in hash_names: + hashes.append(hashlib.new(hash_name)) + + with open(file_path, "rb") as f: + f.seek(start) + + while read < to_read: + if read + chunk_size > to_read: + chunk_size = to_read - read + chunk = f.read(chunk_size) + for hash_obj in hashes: + hash_obj.update(chunk) + read += chunk_size + + result = [] + for hash_obj in hashes: + result.append(hash_obj.hexdigest()) + + return result + +def get_md5sum(fpath): + return calc_hashes(fpath, ('md5', ))[0] def normalize_ksfile(ksconf, release, arch): def _clrtempks(): diff --git a/plugins/imager/raw_plugin.py b/plugins/imager/raw_plugin.py index 4c432a6..0b80d1e 100644 --- a/plugins/imager/raw_plugin.py +++ b/plugins/imager/raw_plugin.py @@ -39,6 +39,8 @@ class RawPlugin(ImagerPlugin): @cmdln.option("--compress-image", dest="compress_image", type='choice', choices=("gz", "bz2"), default = None, help="Compress all raw images before package") + @cmdln.option("--generate-bmap", action="store_true", default = None, + help="also generate the block map file") def do_create(self, subcmd, opts, *args): """${cmd_name}: create raw image @@ -92,7 +94,8 @@ class RawPlugin(ImagerPlugin): (creatoropts['pkgmgr'], ','.join(backends.keys()))) - creator = raw.RawImageCreator(creatoropts, pkgmgr, opts.compress_image) + creator = raw.RawImageCreator(creatoropts, pkgmgr, opts.compress_image, + opts.generate_bmap) if len(recording_pkgs) > 0: creator._recording_pkgs = recording_pkgs @@ -111,6 +114,7 @@ class RawPlugin(ImagerPlugin): creator.configure(creatoropts["repomd"]) creator.copy_kernel() creator.unmount() + creator.generate_bmap() creator.package(creatoropts["outdir"]) if creatoropts['release'] is not None: creator.release_output(ksconf, creatoropts['outdir'], creatoropts['release']) |