backport bmap patch from devel

Signed-off-by: Gui Chen <gui.chen@intel.com>
author: Gui Chen <gui.chen@intel.com> 2012-11-28 14:07:41 +0800
committer: Gui Chen <gui.chen@intel.com> 2012-12-12 19:33:44 +0800
commit: 06fd56e63606149500dcebc72ecca1253bf16c07 (patch)
tree: 303507689e52d7e7c2089259ab82e0af2fc6cb8b
parent: 5654dbf4302621091279036e130b68bb466f16a8 (diff)
download: mic-06fd56e63606149500dcebc72ecca1253bf16c07.tar.gz
mic-06fd56e63606149500dcebc72ecca1253bf16c07.tar.bz2
mic-06fd56e63606149500dcebc72ecca1253bf16c07.zip
3 files changed, 228 insertions, 82 deletions
diff --git a/mic/imager/raw.py b/mic/imager/raw.py
index 9ac3e6e..4b8c1f7 100644
--- a/mic/imager/raw.py
+++ b/mic/imager/raw.py
@@ -18,8 +18,9 @@
 import os
 import stat
 import shutil
-
-from urlgrabber import progress
+from fcntl import ioctl
+from struct import pack, unpack
+from itertools import groupby
 
 from mic import kickstart, msger
 from mic.utils import fs_related, runner, misc
@@ -36,7 +37,7 @@ class RawImageCreator(BaseImageCreator):
     subsequently be booted in a virtual machine or accessed with kpartx
     """
 
-    def __init__(self, creatoropts=None, pkgmgr=None, compress_image=None):
+    def __init__(self, creatoropts=None, pkgmgr=None, compress_image=None, generate_bmap=None):
         """Initialize a ApplianceImageCreator instance.
 
             This method takes the same arguments as ImageCreator.__init__()
@@ -54,6 +55,7 @@ class RawImageCreator(BaseImageCreator):
         self.appliance_version = None
         self.appliance_release = None
         self.compress_image = compress_image
+        self.bmap_needed = generate_bmap
         #self.getsource = False
         #self.listpkg = False
 
@@ -174,6 +176,14 @@ class RawImageCreator(BaseImageCreator):
 
         return self._diskinfo
 
+    def _full_name(self, name, extention):
+        """ Construct full file name for a file we generate. """
+        return "%s-%s.%s" % (self.name, name, extention)
+
+    def _full_path(self, path, name, extention):
+        """ Construct full file path to a file we generate. """
+        return os.path.join(path, self._full_name(name, extention))
+
     #
     # Actual implemention
     #
@@ -184,15 +194,11 @@ class RawImageCreator(BaseImageCreator):
 
         #create disk
         for item in self.get_diskinfo():
-            msger.debug("Adding disk %s as %s/%s-%s.raw with size %s bytes" %
-                        (item['name'], self.__imgdir, self.name, item['name'],
-                         item['size']))
-
-            disk = fs_related.SparseLoopbackDisk("%s/%s-%s.raw" % (
-                                                                self.__imgdir,
-                                                                self.name,
-                                                                item['name']),
-                                                 item['size'])
+            full_path = self._full_path(self.__imgdir, item['name'], "raw")
+            msger.debug("Adding disk %s as %s with size %s bytes" \
+                        % (item['name'], full_path, item['size']))
+
+            disk = fs_related.SparseLoopbackDisk(full_path, item['size'])
             self.__disks[item['name']] = disk
 
         self.__instloop = PartitionedMount(self.__disks, self._instroot)
@@ -240,13 +246,9 @@ class RawImageCreator(BaseImageCreator):
         return (bootdevnum, rootdevnum, rootdev, prefix)
 
     def _create_syslinux_config(self):
-        #Copy splash
-        splash = "%s/usr/lib/anaconda-runtime/syslinux-vesa-splash.jpg" \
-                 % self._instroot
 
+        splash = os.path.join(self._instroot, "boot/extlinux")
         if os.path.exists(splash):
-            shutil.copy(splash, "%s%s/splash.jpg" \
-                                % (self._instroot, "/boot/extlinux"))
             splashline = "menu background splash.jpg"
         else:
             splashline = ""
@@ -426,8 +428,9 @@ class RawImageCreator(BaseImageCreator):
 
         i = 0
         for name in self.__disks.keys():
-            xml += "      <drive disk='%s-%s.%s' target='hd%s'/>\n" \
-                   % (self.name,name, self.__disk_format,chr(ord('a')+i))
+            full_name = self._full_name(name, self.__disk_format)
+            xml += "      <drive disk='%s' target='hd%s'/>\n" \
+                       % (full_name, chr(ord('a') + i))
             i = i + 1
 
         xml += "    </boot>\n"
@@ -443,58 +446,27 @@ class RawImageCreator(BaseImageCreator):
 
         if self.checksum is True:
             for name in self.__disks.keys():
-                diskpath = "%s/%s-%s.%s" \
-                           % (self._outdir,self.name,name, self.__disk_format)
-                disk_size = os.path.getsize(diskpath)
-                meter_ct = 0
-                meter = progress.TextMeter()
-                meter.start(size=disk_size,
-                            text="Generating disk signature for %s-%s.%s" \
-                                 % (self.name,
-                                    name,
-                                    self.__disk_format))
-                xml += "    <disk file='%s-%s.%s' use='system' format='%s'>\n"\
-                       % (self.name,
-                          name,
-                          self.__disk_format,
-                          self.__disk_format)
-
-                try:
-                    import hashlib
-                    m1 = hashlib.sha1()
-                    m2 = hashlib.sha256()
-                except:
-                    import sha
-                    m1 = sha.new()
-                    m2 = None
-                f = open(diskpath,"r")
-                while 1:
-                    chunk = f.read(65536)
-                    if not chunk:
-                        break
-                    m1.update(chunk)
-                    if m2:
-                       m2.update(chunk)
-                    meter.update(meter_ct)
-                    meter_ct = meter_ct + 65536
-
-                sha1checksum = m1.hexdigest()
-                xml +=  "      <checksum type='sha1'>%s</checksum>\n" \
-                        % sha1checksum
+                diskpath = self._full_path(self._outdir, name, \
+                                           self.__disk_format)
+                full_name = self._full_name(name, self.__disk_format)
+
+                msger.debug("Generating disk signature for %s" % full_name)
 
-                if m2:
-                    sha256checksum = m2.hexdigest()
-                    xml += "      <checksum type='sha256'>%s</checksum>\n" \
-                           % sha256checksum
+                xml += "    <disk file='%s' use='system' format='%s'>\n" \
+                       % (full_name, self.__disk_format)
 
+                hashes = misc.calc_hashes(diskpath, ('sha1', 'sha256'))
+
+                xml +=  "      <checksum type='sha1'>%s</checksum>\n" \
+                        % hashes[0]
+                xml += "      <checksum type='sha256'>%s</checksum>\n" \
+                       % hashes[1]
                 xml += "    </disk>\n"
         else:
             for name in self.__disks.keys():
-                xml += "    <disk file='%s-%s.%s' use='system' format='%s'/>\n"\
-                       %(self.name,
-                         name,
-                         self.__disk_format,
-                         self.__disk_format)
+                full_name = self._full_name(name, self.__disk_format)
+                xml += "    <disk file='%s' use='system' format='%s'/>\n" \
+                       % (full_name, self.__disk_format)
 
         xml += "  </storage>\n"
         xml += "</image>\n"
@@ -503,3 +475,151 @@ class RawImageCreator(BaseImageCreator):
         cfg = open("%s/%s.xml" % (self._outdir, self.name), "w")
         cfg.write(xml)
         cfg.close()
+
+    def _bmap_file_start(self, block_size, image_size, blocks_cnt):
+        """ A helper function which generates the starting contents of the
+        block map file: the header comment, image size, block size, etc. """
+
+        xml = "<?xml version=\"1.0\" ?>\n\n"
+        xml += "<!-- This file contains block map for an image file. The block map\n"
+        xml += "     is basically a list of block numbers in the image file. It lists\n"
+        xml += "     only those blocks which contain data (boot sector, partition\n"
+        xml += "     table, file-system metadata, files, directories, extents, etc).\n"
+        xml += "     These blocks have to be copied to the target device. The other\n"
+        xml += "     blocks do not contain any useful data and do not have to be\n"
+        xml += "     copied to the target device. Thus, using the block map users can\n"
+        xml += "     flash the image fast. So the block map is just an optimization.\n"
+        xml += "     It is OK to ignore this file and just flash the entire image to\n"
+        xml += "     the target device if the flashing speed is not important.\n\n"
+
+        xml += "     Note, this file contains commentaries with useful information\n"
+        xml += "     like image size in gigabytes, percentage of mapped data, etc.\n"
+        xml += "     This data is there merely to make the XML file human-readable.\n\n"
+
+        xml += "     The 'version' attribute is the block map file format version in\n"
+        xml += "     the 'major.minor' format. The version major number is increased\n"
+        xml += "     whenever we make incompatible changes to the block map format,\n"
+        xml += "     meaning that the bmap-aware flasher would have to be modified in\n"
+        xml += "     order to support the new format. The minor version is increased\n"
+        xml += "     in case of compatible changes. For example, if we add an attribute\n"
+        xml += "     which is optional for the bmap-aware flasher. -->\n"
+        xml += "<bmap version=\"1.1\">\n"
+        xml += "\t<!-- Image size in bytes (%s) -->\n" \
+                % misc.human_size(image_size)
+        xml += "\t<ImageSize> %u </ImageSize>\n\n" % image_size
+
+        xml += "\t<!-- Size of a block in bytes -->\n"
+        xml += "\t<BlockSize> %u </BlockSize>\n\n" % block_size
+
+        xml += "\t<!-- Count of blocks in the image file -->\n"
+        xml += "\t<BlocksCount> %u </BlocksCount>\n\n" % blocks_cnt
+
+        xml += "\t<!-- The block map which consists of elements which may either\n"
+        xml += "\t     be a range of blocks or a single block. The 'sha1' attribute\n"
+        xml += "\t     is the SHA1 checksum of the this range of blocks. -->\n"
+        xml += "\t<BlockMap>\n"
+
+        return xml
+
+    def _bmap_file_end(self, mapped_cnt, block_size, blocks_cnt):
+        """ A helper funstion which generates the final parts of the block map
+        file: the ending tags and the information about the amount of mapped
+        blocks. """
+
+        xml = "\t</BlockMap>\n\n"
+
+        size = misc.human_size(mapped_cnt * block_size)
+        percent = (mapped_cnt * 100.0) / blocks_cnt
+        xml += "\t<!-- Count of mapped blocks (%s or %.1f%% mapped) -->\n" \
+                % (size, percent)
+        xml += "\t<MappedBlocksCount> %u </MappedBlocksCount>\n" % mapped_cnt
+        xml += "</bmap>"
+
+        return xml
+
+    def _get_ranges(self, f_image, blocks_cnt):
+        """ A helper for 'generate_bmap()' which generates ranges of mapped
+        blocks. It uses the FIBMAP ioctl to check which blocks are mapped. Of
+        course, the image file must have been created as a sparse file
+        originally, otherwise all blocks will be mapped. And it is also
+        essential to generate the block map before the file had been copied
+        anywhere or compressed, because othewise we lose the information about
+        unmapped blocks. """
+
+        def is_mapped(block):
+            """ Returns True if block 'block' of the image file is mapped and
+            False otherwise.
+
+            Implementation details: this function uses the FIBMAP ioctl (number
+            1) to get detect whether 'block' is mapped to a disk block. The ioctl
+            returns zero if 'block' is not mapped and non-zero disk block number
+            if it is mapped. """
+
+            return unpack('I', ioctl(f_image, 1, pack('I', block)))[0] != 0
+
+        for key, group in groupby(xrange(blocks_cnt), is_mapped):
+            if key:
+                # Find the first and the last elements of the group
+                first = group.next()
+                last = first
+                for last in group:
+                    pass
+                yield first, last
+
+    def generate_bmap(self):
+        """ Generate block map file for an image. The idea is that while disk
+        images we generate may be large (e.g., 4GiB), they may actually contain
+        only little real data, e.g., 512MiB. This data are files, directories,
+        file-system meta-data, partition table, etc. In other words, when
+        flashing the image to the target device, you do not have to copy all the
+        4GiB of data, you can copy only 512MiB of it, which is 4 times faster.
+
+        This function generates the block map file for an arbitrary image that
+        mic has generated. The block map file is basically an XML file which
+        contains a list of blocks which have to be copied to the target device.
+        The other blocks are not used and there is no need to copy them.
+
+        This function assumes the image file was originally created as a sparse
+        file. To generate the block map we use the FIBMAP ioctl. """
+
+        if self.bmap_needed is None:
+            return
+
+        msger.info("Generating the map file(s)")
+
+        for name in self.__disks.keys():
+            image = self._full_path(self.__imgdir, name, self.__disk_format)
+            bmap_file = self._full_path(self.__imgdir, name, "bmap")
+
+            msger.debug("Generating block map file '%s'" % bmap_file)
+
+            image_size = os.path.getsize(image)
+
+            with open(bmap_file, "w") as f_bmap:
+                with open(image, "rb") as f_image:
+                    # Get the block size of the host file-system for the image
+                    # file by calling the FIGETBSZ ioctl (number 2).
+                    block_size = unpack('I', ioctl(f_image, 2, pack('I', 0)))[0]
+                    blocks_cnt = (image_size + block_size - 1) / block_size
+
+                    # Write general information to the block map file, without
+                    # block map itself, which will be written next.
+                    xml = self._bmap_file_start(block_size, image_size,
+                                                blocks_cnt)
+                    f_bmap.write(xml)
+
+                    # Generate the block map and write it to the XML block map
+                    # file as we go.
+                    mapped_cnt = 0
+                    for first, last in self._get_ranges(f_image, blocks_cnt):
+                        mapped_cnt += last - first + 1
+                        sha1 = misc.calc_hashes(image, ('sha1', ),
+                                                first * block_size,
+                                                (last + 1) * block_size)
+                        f_bmap.write("\t\t<Range sha1=\"%s\"> %s-%s " \
+                                     "</Range>\n" % (sha1[0], first, last))
+
+                    # Finish the block map file
+                    xml = self._bmap_file_end(mapped_cnt, block_size,
+                                              blocks_cnt)
+                    f_bmap.write(xml)
diff --git a/mic/utils/misc.py b/mic/utils/misc.py
index 5b5a299..cb833d3 100644
--- a/mic/utils/misc.py
+++ b/mic/utils/misc.py
@@ -28,11 +28,6 @@ import platform
 import rpmmisc
 
 try:
-    from hashlib import md5
-except ImportError:
-    from md5 import md5
-
-try:
     import sqlite3 as sqlite
 except ImportError:
     import sqlite
@@ -237,17 +232,44 @@ def check_space_pre_cp(src, dst):
         raise CreatorError("space on %s(%s) is not enough for about %s files"
                            % (dst, human_size(freesize), human_size(srcsize)))
 
-def get_md5sum(fpath):
-    blksize = 65536 # should be optimized enough
+def calc_hashes(file_path, hash_names, start = 0, end = None):
+    """ Calculate hashes for a file. The 'file_path' argument is the file
+    to calculate hash functions for, 'start' and 'end' are the starting and
+    ending file offset to calculate the has functions for. The 'hash_names'
+    argument is a list of hash names to calculate. Returns the the list
+    of calculated hash values in the hexadecimal form in the same order
+    as 'hash_names'.
+    """
+    if end == None:
+        end = os.path.getsize(file_path)
 
-    md5sum = md5()
-    with open(fpath, 'rb') as f:
-        while True:
-            data = f.read(blksize)
-            if not data:
-                break
-            md5sum.update(data)
-    return md5sum.hexdigest()
+    chunk_size = 65536
+    to_read = end - start;
+    read = 0
+
+    hashes = []
+    for hash_name in hash_names:
+        hashes.append(hashlib.new(hash_name))
+
+    with open(file_path, "rb") as f:
+        f.seek(start)
+
+        while read < to_read:
+            if read + chunk_size > to_read:
+                chunk_size = to_read - read
+            chunk = f.read(chunk_size)
+            for hash_obj in hashes:
+                hash_obj.update(chunk)
+            read += chunk_size
+
+    result = []
+    for hash_obj in hashes:
+        result.append(hash_obj.hexdigest())
+
+    return result
+
+def get_md5sum(fpath):
+    return calc_hashes(fpath, ('md5', ))[0]
 
 def normalize_ksfile(ksconf, release, arch):
     def _clrtempks():
diff --git a/plugins/imager/raw_plugin.py b/plugins/imager/raw_plugin.py
index 4c432a6..0b80d1e 100644
--- a/plugins/imager/raw_plugin.py
+++ b/plugins/imager/raw_plugin.py
@@ -39,6 +39,8 @@ class RawPlugin(ImagerPlugin):
     @cmdln.option("--compress-image", dest="compress_image", type='choice',
                   choices=("gz", "bz2"), default = None,
                   help="Compress all raw images before package")
+    @cmdln.option("--generate-bmap", action="store_true", default = None,
+                  help="also generate the block map file")
     def do_create(self, subcmd, opts, *args):
         """${cmd_name}: create raw image
 
@@ -92,7 +94,8 @@ class RawPlugin(ImagerPlugin):
                                       (creatoropts['pkgmgr'],
                                        ','.join(backends.keys())))
 
-        creator = raw.RawImageCreator(creatoropts, pkgmgr, opts.compress_image)
+        creator = raw.RawImageCreator(creatoropts, pkgmgr, opts.compress_image,
+                                      opts.generate_bmap)
 
         if len(recording_pkgs) > 0:
             creator._recording_pkgs = recording_pkgs
@@ -111,6 +114,7 @@ class RawPlugin(ImagerPlugin):
             creator.configure(creatoropts["repomd"])
             creator.copy_kernel()
             creator.unmount()
+            creator.generate_bmap()
             creator.package(creatoropts["outdir"])
             if creatoropts['release'] is not None:
                 creator.release_output(ksconf, creatoropts['outdir'], creatoropts['release'])
author	Gui Chen <gui.chen@intel.com>	2012-11-28 14:07:41 +0800
committer	Gui Chen <gui.chen@intel.com>	2012-12-12 19:33:44 +0800
commit	06fd56e63606149500dcebc72ecca1253bf16c07 (patch)
tree	303507689e52d7e7c2089259ab82e0af2fc6cb8b
parent	5654dbf4302621091279036e130b68bb466f16a8 (diff)
download	mic-06fd56e63606149500dcebc72ecca1253bf16c07.tar.gz mic-06fd56e63606149500dcebc72ecca1253bf16c07.tar.bz2 mic-06fd56e63606149500dcebc72ecca1253bf16c07.zip