summaryrefslogtreecommitdiff
path: root/tsp/scripts
diff options
context:
space:
mode:
authorAleksander Mistewicz <a.mistewicz@samsung.com>2016-11-04 09:30:10 (GMT)
committerAleksander Mistewicz <a.mistewicz@samsung.com>2017-01-16 10:07:48 (GMT)
commit3c368184c138a3c0268aedae3f24edace66deae4 (patch)
tree963c0ef6b6a1ceb36e5f0dcc3abecafca94df5be /tsp/scripts
parent5211c9869327081c197a8d352cc79fd83d6093d9 (diff)
downloadmajor-3c368184c138a3c0268aedae3f24edace66deae4.zip
major-3c368184c138a3c0268aedae3f24edace66deae4.tar.gz
major-3c368184c138a3c0268aedae3f24edace66deae4.tar.bz2
Add tsp/scripts/download_all.pyrefs/changes/16/108716/3
Change-Id: I7125d71ac3be8607a97a5aeecdf6bca83667f8fa Signed-off-by: Aleksander Mistewicz <a.mistewicz@samsung.com>
Diffstat (limited to 'tsp/scripts')
-rwxr-xr-xtsp/scripts/download_all.py319
1 files changed, 319 insertions, 0 deletions
diff --git a/tsp/scripts/download_all.py b/tsp/scripts/download_all.py
new file mode 100755
index 0000000..82c06f2
--- /dev/null
+++ b/tsp/scripts/download_all.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2016 Samsung Electronics Co., Ltd All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+##
+# @author Aleksander Mistewicz <a.mistewicz@samsung.com>
+
+import os
+import subprocess
+import time
+import re
+import requests
+import argparse
+import logging
+import bs4
+import threading
+import signal
+
+__version__ = "0.0.1"
+__license__ = "APACHE-2.0"
+__author__ = "Aleksander Mistewicz"
+__author_email__ = "a.mistewicz@samsung.com"
+
+USAGE = "%prog <opts> <url>"
+
+AGENT = "%s/%s" % (__name__, __version__)
+
+
+class ImageVersion(object):
+
+ def __init__(self, url):
+ names = re.findall(r'tizen\-.{0,7}\w+\d{8}\.\d+', url)
+ if len(names) >= 1:
+ self.name = names[0]
+ else:
+ raise ValueError
+ versions = re.findall(r'\d{8}\.\d+', url)
+ if len(versions) == 3:
+ if versions[0] != versions[1]:
+ raise ValueError
+ self.snapshot = versions[0]
+ self.submission = versions[2]
+ elif len(versions) == 1:
+ self.snapshot = versions[0]
+ self.submission = None
+ else:
+ raise ValueError
+
+ def get_version(self):
+ if self.submission:
+ return '.'.join([self.snapshot, self.submission])
+ else:
+ return self.snapshot
+
+ def get_snapshot(self):
+ return self.snapshot
+
+ def get_submission(self):
+ return self.submission
+
+ def get_name(self):
+ if self.submission:
+ return '.'.join([self.name, self.submission])
+ else:
+ return self.name
+
+ def is_prerelease(self):
+ if self.submission:
+ return True
+ else:
+ return False
+
+
+class Crawler(object):
+
+ @classmethod
+ def get_links(self, session, url):
+ main = session.get(url)
+ soup = bs4.BeautifulSoup(main.text, 'html.parser')
+ links = set()
+ for link in soup.find_all('a'):
+ links.add(link.get('href'))
+ return links
+
+ @classmethod
+ def get_targets(self, url):
+ url += "images/"
+ s = requests.Session()
+ return self.crawl_targets(s, url)
+
+ @classmethod
+ def crawl_targets(self, session, url):
+ links = Crawler.get_links(session, url)
+ discovered = set()
+ for link in links:
+ if not link.startswith("/") and link.endswith("/") and not "../" in link:
+ logging.debug("Add link to discovered: %s", link)
+ discovered |= self.crawl_targets(session, url + link)
+ else:
+ if link == "MD5SUMS":
+ discovered.add(url)
+ return discovered
+
+ @classmethod
+ def crawl_images(self, session, url):
+ links = Crawler.get_links(session, url)
+ discovered = set()
+ for link in links:
+ if link == "MD5SUMS" \
+ or link.endswith(".tar.gz") \
+ or link.endswith(".ks") \
+ or link.endswith(".packages") \
+ or link.endswith(".xml") \
+ or link.endswith(".bmap") \
+ or link.endswith(".raw.bz2") \
+ or link.endswith("-default") \
+ or link.endswith(".log"):
+ discovered.add(url + link)
+ return discovered
+
+
+class Downloader(threading.Thread):
+
+ def __init__(self, work, img_ver, session, url):
+ self.work = work
+ threading.Thread.__init__(self)
+ self.url = url
+ self.session = session
+ self.img_ver = img_ver
+ self.is_prerelease = img_ver.is_prerelease()
+ m = re.search(r'.*/(.*)/$', url)
+ self.name = m.group(1)
+ try:
+ os.mkdir(self.name)
+ except OSError as e:
+ logging.warn("mkdir %s: %s" % (self.name, e.strerror))
+ self.diff_report_filename = self.name + "/diff.report"
+
+ def run(self):
+ logging.info("Start downloader: %s" % self.name)
+ self.files = Crawler.crawl_images(self.session, self.url)
+ logging.debug(self.files)
+
+ for url in frozenset(self.files):
+ if url.endswith(".packages"):
+ pre_url = url
+ self.files.discard(url)
+ elif url.endswith("/MD5SUMS"):
+ md5sums = url
+ self.files.discard(url)
+
+ if not self.is_prerelease:
+ self.write_diff_for_snapshot()
+ else:
+ # Replace prerelease with snapshots
+ snap_url = re.sub('prerelease', 'snapshots', pre_url)
+ # Remove prerelease subdirectory
+ snap_url = re.sub("/[^/]*" + self.img_ver.get_submission() + "/", '/', snap_url)
+ # Remove SR from filename
+ snap_url = re.sub("\." + self.img_ver.get_submission(), '', snap_url)
+ logging.info("snap: %s" % snap_url)
+
+ snap = self.session.get(snap_url)
+ pre = self.session.get(pre_url)
+ if self.check_diff(pre.text, snap.text):
+ return
+
+ while self.work.is_set():
+ sub_dwns = set()
+ for url in self.files:
+ sub_dwns.add(subprocess.Popen(["wget", "-cq", url], cwd=self.name))
+ for sub_dwn in sub_dwns:
+ sub_dwn.wait()
+ if self.check_md5(md5sums):
+ break
+ if self.work.is_set():
+ time.sleep(10)
+
+ logging.info("Stop downloader: %s" % self.name)
+
+ def check_diff(self, pre_pkgs, snap_pkgs):
+ logging.debug("Checking diff")
+ set_snap_pkgs = set(snap_pkgs.splitlines())
+ set_pre_pkgs = set(pre_pkgs.splitlines())
+ diff = set_pre_pkgs ^ set_snap_pkgs
+ with open(self.diff_report_filename, 'w') as f:
+ ret = (len(diff) == 0)
+ if ret:
+ s = 'Images are identical'
+ else:
+ s = '\n'.join(diff)
+ logging.info(s)
+ f.write(s)
+ return ret
+
+ def write_diff_for_snapshot(self):
+ logging.debug("Write diff for snapshot image")
+ with open(self.diff_report_filename, 'w') as f:
+ f.write('Snapshot')
+
+ def check_md5(self, md5sum_url):
+ logging.debug("Checking md5sum")
+ md5_file = "md5sums"
+ md5_path = self.name + "/" + md5_file
+ subprocess.call(["wget", md5sum_url, "-qO", md5_path])
+ subprocess.call(["sed", "-e", "/\(ks\|json\|log\|xml\|-default\|packages\)/d", "-i", md5_path])
+ p = subprocess.Popen(["md5sum", "-c", md5_file], cwd=self.name)
+ p.wait()
+ ret = p.returncode
+ if not ret:
+ logging.info("Checksum OK")
+ os.remove(md5_path)
+ else:
+ logging.warn("Checksum FAILED\nRemoving files mentioned in md5sums file")
+ with open(md5_path, 'r') as f:
+ for i in f:
+ try:
+ os.remove(self.name + "/" + i)
+ except OSError as e:
+ logging.warn("rm: %s" % e.strerror)
+ os.remove(md5_path)
+ return False
+ return True
+
+
+class ImageDownloader(object):
+
+ def __init__(self, url, dry):
+ self.url = url
+ self.dry = dry
+
+ self.img_ver = ImageVersion(url)
+ self.diff_report_filename = "diff.report"
+
+ logging.debug('snapshot number: %s', self.img_ver.get_snapshot())
+ logging.debug('version number: %s', self.img_ver.get_version())
+
+ self.urls = Crawler.get_targets(self.url)
+
+ # Postcondition
+ logging.debug("Files to download: %s", self.urls)
+
+ if self.dry:
+ logging.debug("Skipping run")
+ else:
+ self.create_projectconf("N/A", "N/A");
+ self.run()
+
+ def run(self):
+ logging.debug("Dispatching downloaders...")
+ s = requests.Session()
+ downloaders = set()
+ def handler(signum, frame):
+ logging.info("SIGINT")
+ work.clear()
+
+ work = threading.Event()
+ work.set()
+ signal.signal(signal.SIGINT, handler)
+ for url in self.urls:
+ dwn = Downloader(work, self.img_ver, s, url)
+ dwn.start()
+ downloaders.add(dwn)
+ for dwn in downloaders:
+ dwn.join()
+
+ def create_projectconf(self, arch, target_name):
+ logging.debug("Create project.conf file for: %s %s", arch, target_name)
+ if self.dry:
+ return
+ prjconf = [ self.img_ver.get_name(), arch, target_name ]
+ with open("project.conf", 'w') as f:
+ f.write('\n'.join(prjconf) + '\n')
+
+
+def parse_arguments():
+ parser = argparse.ArgumentParser(description="Image downloader for download.tizen.org")
+
+ parser.add_argument("url", metavar='<url>', type=str,
+ help='URL of prerelease or snapshot to download images from.')
+
+ parser.add_argument("-d", "--dry-run",
+ action="store_true", dest="dry",
+ help="Dry run - do not actually download images")
+
+ parser.add_argument("-l", "--log",
+ action="store", dest="loglevel",
+ help="Verbosity level")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_arguments()
+ if args.loglevel:
+ numeric_level = getattr(logging, args.loglevel.upper(), None)
+ if not isinstance(numeric_level, int):
+ raise ValueError('Invalid log level: %s' % args.loglevel)
+ logging.basicConfig(format='%(asctime)s %(message)s',level=numeric_level)
+ logging.debug("Begin")
+ ImageDownloader(args.url, args.dry)
+ logging.debug("End")
+
+if __name__ == '__main__':
+ main()