From 06fd56e63606149500dcebc72ecca1253bf16c07 Mon Sep 17 00:00:00 2001 From: Gui Chen Date: Wed, 28 Nov 2012 14:07:41 +0800 Subject: backport bmap patch from devel Signed-off-by: Gui Chen --- mic/imager/raw.py | 252 +++++++++++++++++++++++++++++++------------ mic/utils/misc.py | 52 ++++++--- plugins/imager/raw_plugin.py | 6 +- 3 files changed, 228 insertions(+), 82 deletions(-) diff --git a/mic/imager/raw.py b/mic/imager/raw.py index 9ac3e6e..4b8c1f7 100644 --- a/mic/imager/raw.py +++ b/mic/imager/raw.py @@ -18,8 +18,9 @@ import os import stat import shutil - -from urlgrabber import progress +from fcntl import ioctl +from struct import pack, unpack +from itertools import groupby from mic import kickstart, msger from mic.utils import fs_related, runner, misc @@ -36,7 +37,7 @@ class RawImageCreator(BaseImageCreator): subsequently be booted in a virtual machine or accessed with kpartx """ - def __init__(self, creatoropts=None, pkgmgr=None, compress_image=None): + def __init__(self, creatoropts=None, pkgmgr=None, compress_image=None, generate_bmap=None): """Initialize a ApplianceImageCreator instance. This method takes the same arguments as ImageCreator.__init__() @@ -54,6 +55,7 @@ class RawImageCreator(BaseImageCreator): self.appliance_version = None self.appliance_release = None self.compress_image = compress_image + self.bmap_needed = generate_bmap #self.getsource = False #self.listpkg = False @@ -174,6 +176,14 @@ class RawImageCreator(BaseImageCreator): return self._diskinfo + def _full_name(self, name, extention): + """ Construct full file name for a file we generate. """ + return "%s-%s.%s" % (self.name, name, extention) + + def _full_path(self, path, name, extention): + """ Construct full file path to a file we generate. """ + return os.path.join(path, self._full_name(name, extention)) + # # Actual implemention # @@ -184,15 +194,11 @@ class RawImageCreator(BaseImageCreator): #create disk for item in self.get_diskinfo(): - msger.debug("Adding disk %s as %s/%s-%s.raw with size %s bytes" % - (item['name'], self.__imgdir, self.name, item['name'], - item['size'])) - - disk = fs_related.SparseLoopbackDisk("%s/%s-%s.raw" % ( - self.__imgdir, - self.name, - item['name']), - item['size']) + full_path = self._full_path(self.__imgdir, item['name'], "raw") + msger.debug("Adding disk %s as %s with size %s bytes" \ + % (item['name'], full_path, item['size'])) + + disk = fs_related.SparseLoopbackDisk(full_path, item['size']) self.__disks[item['name']] = disk self.__instloop = PartitionedMount(self.__disks, self._instroot) @@ -240,13 +246,9 @@ class RawImageCreator(BaseImageCreator): return (bootdevnum, rootdevnum, rootdev, prefix) def _create_syslinux_config(self): - #Copy splash - splash = "%s/usr/lib/anaconda-runtime/syslinux-vesa-splash.jpg" \ - % self._instroot + splash = os.path.join(self._instroot, "boot/extlinux") if os.path.exists(splash): - shutil.copy(splash, "%s%s/splash.jpg" \ - % (self._instroot, "/boot/extlinux")) splashline = "menu background splash.jpg" else: splashline = "" @@ -426,8 +428,9 @@ class RawImageCreator(BaseImageCreator): i = 0 for name in self.__disks.keys(): - xml += " \n" \ - % (self.name,name, self.__disk_format,chr(ord('a')+i)) + full_name = self._full_name(name, self.__disk_format) + xml += " \n" \ + % (full_name, chr(ord('a') + i)) i = i + 1 xml += " \n" @@ -443,58 +446,27 @@ class RawImageCreator(BaseImageCreator): if self.checksum is True: for name in self.__disks.keys(): - diskpath = "%s/%s-%s.%s" \ - % (self._outdir,self.name,name, self.__disk_format) - disk_size = os.path.getsize(diskpath) - meter_ct = 0 - meter = progress.TextMeter() - meter.start(size=disk_size, - text="Generating disk signature for %s-%s.%s" \ - % (self.name, - name, - self.__disk_format)) - xml += " \n"\ - % (self.name, - name, - self.__disk_format, - self.__disk_format) - - try: - import hashlib - m1 = hashlib.sha1() - m2 = hashlib.sha256() - except: - import sha - m1 = sha.new() - m2 = None - f = open(diskpath,"r") - while 1: - chunk = f.read(65536) - if not chunk: - break - m1.update(chunk) - if m2: - m2.update(chunk) - meter.update(meter_ct) - meter_ct = meter_ct + 65536 - - sha1checksum = m1.hexdigest() - xml += " %s\n" \ - % sha1checksum + diskpath = self._full_path(self._outdir, name, \ + self.__disk_format) + full_name = self._full_name(name, self.__disk_format) + + msger.debug("Generating disk signature for %s" % full_name) - if m2: - sha256checksum = m2.hexdigest() - xml += " %s\n" \ - % sha256checksum + xml += " \n" \ + % (full_name, self.__disk_format) + hashes = misc.calc_hashes(diskpath, ('sha1', 'sha256')) + + xml += " %s\n" \ + % hashes[0] + xml += " %s\n" \ + % hashes[1] xml += " \n" else: for name in self.__disks.keys(): - xml += " \n"\ - %(self.name, - name, - self.__disk_format, - self.__disk_format) + full_name = self._full_name(name, self.__disk_format) + xml += " \n" \ + % (full_name, self.__disk_format) xml += " \n" xml += "\n" @@ -503,3 +475,151 @@ class RawImageCreator(BaseImageCreator): cfg = open("%s/%s.xml" % (self._outdir, self.name), "w") cfg.write(xml) cfg.close() + + def _bmap_file_start(self, block_size, image_size, blocks_cnt): + """ A helper function which generates the starting contents of the + block map file: the header comment, image size, block size, etc. """ + + xml = "\n\n" + xml += "\n" + xml += "\n" + xml += "\t\n" \ + % misc.human_size(image_size) + xml += "\t %u \n\n" % image_size + + xml += "\t\n" + xml += "\t %u \n\n" % block_size + + xml += "\t\n" + xml += "\t %u \n\n" % blocks_cnt + + xml += "\t\n" + xml += "\t\n" + + return xml + + def _bmap_file_end(self, mapped_cnt, block_size, blocks_cnt): + """ A helper funstion which generates the final parts of the block map + file: the ending tags and the information about the amount of mapped + blocks. """ + + xml = "\t\n\n" + + size = misc.human_size(mapped_cnt * block_size) + percent = (mapped_cnt * 100.0) / blocks_cnt + xml += "\t\n" \ + % (size, percent) + xml += "\t %u \n" % mapped_cnt + xml += "" + + return xml + + def _get_ranges(self, f_image, blocks_cnt): + """ A helper for 'generate_bmap()' which generates ranges of mapped + blocks. It uses the FIBMAP ioctl to check which blocks are mapped. Of + course, the image file must have been created as a sparse file + originally, otherwise all blocks will be mapped. And it is also + essential to generate the block map before the file had been copied + anywhere or compressed, because othewise we lose the information about + unmapped blocks. """ + + def is_mapped(block): + """ Returns True if block 'block' of the image file is mapped and + False otherwise. + + Implementation details: this function uses the FIBMAP ioctl (number + 1) to get detect whether 'block' is mapped to a disk block. The ioctl + returns zero if 'block' is not mapped and non-zero disk block number + if it is mapped. """ + + return unpack('I', ioctl(f_image, 1, pack('I', block)))[0] != 0 + + for key, group in groupby(xrange(blocks_cnt), is_mapped): + if key: + # Find the first and the last elements of the group + first = group.next() + last = first + for last in group: + pass + yield first, last + + def generate_bmap(self): + """ Generate block map file for an image. The idea is that while disk + images we generate may be large (e.g., 4GiB), they may actually contain + only little real data, e.g., 512MiB. This data are files, directories, + file-system meta-data, partition table, etc. In other words, when + flashing the image to the target device, you do not have to copy all the + 4GiB of data, you can copy only 512MiB of it, which is 4 times faster. + + This function generates the block map file for an arbitrary image that + mic has generated. The block map file is basically an XML file which + contains a list of blocks which have to be copied to the target device. + The other blocks are not used and there is no need to copy them. + + This function assumes the image file was originally created as a sparse + file. To generate the block map we use the FIBMAP ioctl. """ + + if self.bmap_needed is None: + return + + msger.info("Generating the map file(s)") + + for name in self.__disks.keys(): + image = self._full_path(self.__imgdir, name, self.__disk_format) + bmap_file = self._full_path(self.__imgdir, name, "bmap") + + msger.debug("Generating block map file '%s'" % bmap_file) + + image_size = os.path.getsize(image) + + with open(bmap_file, "w") as f_bmap: + with open(image, "rb") as f_image: + # Get the block size of the host file-system for the image + # file by calling the FIGETBSZ ioctl (number 2). + block_size = unpack('I', ioctl(f_image, 2, pack('I', 0)))[0] + blocks_cnt = (image_size + block_size - 1) / block_size + + # Write general information to the block map file, without + # block map itself, which will be written next. + xml = self._bmap_file_start(block_size, image_size, + blocks_cnt) + f_bmap.write(xml) + + # Generate the block map and write it to the XML block map + # file as we go. + mapped_cnt = 0 + for first, last in self._get_ranges(f_image, blocks_cnt): + mapped_cnt += last - first + 1 + sha1 = misc.calc_hashes(image, ('sha1', ), + first * block_size, + (last + 1) * block_size) + f_bmap.write("\t\t %s-%s " \ + "\n" % (sha1[0], first, last)) + + # Finish the block map file + xml = self._bmap_file_end(mapped_cnt, block_size, + blocks_cnt) + f_bmap.write(xml) diff --git a/mic/utils/misc.py b/mic/utils/misc.py index 5b5a299..cb833d3 100644 --- a/mic/utils/misc.py +++ b/mic/utils/misc.py @@ -27,11 +27,6 @@ import subprocess import platform import rpmmisc -try: - from hashlib import md5 -except ImportError: - from md5 import md5 - try: import sqlite3 as sqlite except ImportError: @@ -237,17 +232,44 @@ def check_space_pre_cp(src, dst): raise CreatorError("space on %s(%s) is not enough for about %s files" % (dst, human_size(freesize), human_size(srcsize))) -def get_md5sum(fpath): - blksize = 65536 # should be optimized enough +def calc_hashes(file_path, hash_names, start = 0, end = None): + """ Calculate hashes for a file. The 'file_path' argument is the file + to calculate hash functions for, 'start' and 'end' are the starting and + ending file offset to calculate the has functions for. The 'hash_names' + argument is a list of hash names to calculate. Returns the the list + of calculated hash values in the hexadecimal form in the same order + as 'hash_names'. + """ + if end == None: + end = os.path.getsize(file_path) - md5sum = md5() - with open(fpath, 'rb') as f: - while True: - data = f.read(blksize) - if not data: - break - md5sum.update(data) - return md5sum.hexdigest() + chunk_size = 65536 + to_read = end - start; + read = 0 + + hashes = [] + for hash_name in hash_names: + hashes.append(hashlib.new(hash_name)) + + with open(file_path, "rb") as f: + f.seek(start) + + while read < to_read: + if read + chunk_size > to_read: + chunk_size = to_read - read + chunk = f.read(chunk_size) + for hash_obj in hashes: + hash_obj.update(chunk) + read += chunk_size + + result = [] + for hash_obj in hashes: + result.append(hash_obj.hexdigest()) + + return result + +def get_md5sum(fpath): + return calc_hashes(fpath, ('md5', ))[0] def normalize_ksfile(ksconf, release, arch): def _clrtempks(): diff --git a/plugins/imager/raw_plugin.py b/plugins/imager/raw_plugin.py index 4c432a6..0b80d1e 100644 --- a/plugins/imager/raw_plugin.py +++ b/plugins/imager/raw_plugin.py @@ -39,6 +39,8 @@ class RawPlugin(ImagerPlugin): @cmdln.option("--compress-image", dest="compress_image", type='choice', choices=("gz", "bz2"), default = None, help="Compress all raw images before package") + @cmdln.option("--generate-bmap", action="store_true", default = None, + help="also generate the block map file") def do_create(self, subcmd, opts, *args): """${cmd_name}: create raw image @@ -92,7 +94,8 @@ class RawPlugin(ImagerPlugin): (creatoropts['pkgmgr'], ','.join(backends.keys()))) - creator = raw.RawImageCreator(creatoropts, pkgmgr, opts.compress_image) + creator = raw.RawImageCreator(creatoropts, pkgmgr, opts.compress_image, + opts.generate_bmap) if len(recording_pkgs) > 0: creator._recording_pkgs = recording_pkgs @@ -111,6 +114,7 @@ class RawPlugin(ImagerPlugin): creator.configure(creatoropts["repomd"]) creator.copy_kernel() creator.unmount() + creator.generate_bmap() creator.package(creatoropts["outdir"]) if creatoropts['release'] is not None: creator.release_output(ksconf, creatoropts['outdir'], creatoropts['release']) -- cgit v1.2.3