summaryrefslogtreecommitdiff
path: root/tsp/scripts
diff options
context:
space:
mode:
authorAleksander Mistewicz <a.mistewicz@samsung.com>2016-11-16 12:24:52 (GMT)
committerAleksander Mistewicz <a.mistewicz@samsung.com>2017-01-16 10:07:49 (GMT)
commit26d90eddf8bf489e6366516bb07222f70c966bb7 (patch)
tree99a4797a495874ce68e0009ceff266e88e192bc9 /tsp/scripts
parent1acd5850c9159b01eb8006053b653abbfddfa5a3 (diff)
downloadmajor-26d90eddf8bf489e6366516bb07222f70c966bb7.zip
major-26d90eddf8bf489e6366516bb07222f70c966bb7.tar.gz
major-26d90eddf8bf489e6366516bb07222f70c966bb7.tar.bz2
Add "--tree" argument to tsp/scripts/crawler.pyrefs/changes/33/108733/3
Add tsp/tree.example Change-Id: Ic72e4e80812a56b18d606fc36e57096a84e205bd Signed-off-by: Aleksander Mistewicz <a.mistewicz@samsung.com>
Diffstat (limited to 'tsp/scripts')
-rwxr-xr-xtsp/scripts/crawler.py21
1 files changed, 17 insertions, 4 deletions
diff --git a/tsp/scripts/crawler.py b/tsp/scripts/crawler.py
index 9ea25fd..86724e3 100755
--- a/tsp/scripts/crawler.py
+++ b/tsp/scripts/crawler.py
@@ -68,9 +68,14 @@ def get_urls2check(session, md5sums, url, level=2):
if level == 0:
md5sums.add(url + "MD5SUMS")
-def get_modified_paths(discovered, timestamp):
+def get_modified_paths(discovered, timestamp, tree):
logging.info("get_modified_paths")
ret = set()
+ if tree:
+ logging.debug("Read tree file")
+ with open(tree, 'r') as f:
+ tree_urls = f.read().splitlines()
+ logging.debug(tree_urls)
str_time = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(time.time()))
logging.info("Next timestamp: %s", str_time)
if os.path.exists(dispatched_urls):
@@ -83,8 +88,12 @@ def get_modified_paths(discovered, timestamp):
s.headers.update({"If-Modified-Since": stamp})
for url in discovered:
logging.debug("Check for MD5SUMS change: %s", url)
- md5sums_urls = set()
- get_urls2check(s, md5sums_urls, url + "images/")
+ if not tree:
+ logging.debug("Obtain files to check by crawl")
+ md5sums_urls = set()
+ get_urls2check(s, md5sums_urls, url + "images/")
+ else:
+ md5sums_urls = [url + e for e in tree_urls]
change = False
for md5sums_url in md5sums_urls:
r = s.get(md5sums_url)
@@ -124,6 +133,9 @@ def parse_arguments():
parser.add_argument("-t", "--timestamp", default="timestamp",
help="File to read a reference timestamp from")
+ parser.add_argument("-r", "--tree",
+ help="Path to a file with list of urls to check")
+
args = parser.parse_args()
return args
@@ -138,6 +150,7 @@ if '__main__' == __name__:
logging.debug("Begin")
snapshots = crawl(args.url)
timestamp_file = args.timestamp
+ tree_file = args.tree
if "snapshots" in args.url:
discovered = snapshots
@@ -151,7 +164,7 @@ if '__main__' == __name__:
dispatched = set([url.rstrip() for url in f.readlines()])
# save discovered URLs for dispatching download requests
- modified = get_modified_paths(discovered, timestamp_file)
+ modified = get_modified_paths(discovered, timestamp_file, tree_file)
with open(discovered_urls, 'w') as f:
f.write('\n'.join(modified) + '\n')