summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksander Mistewicz <a.mistewicz@samsung.com>2017-04-27 12:53:26 (GMT)
committerAleksander Mistewicz <a.mistewicz@samsung.com>2017-06-06 11:48:04 (GMT)
commitd21e12f9f5d89e9f718ca260832d9ee8727ff9eb (patch)
tree48254a259b6618e363f4697985c86dfb94105f78
parentd8b9322e6d4db533d8f8e8d894ed7a6d07889411 (diff)
downloadmajor-d21e12f9f5d89e9f718ca260832d9ee8727ff9eb.zip
major-d21e12f9f5d89e9f718ca260832d9ee8727ff9eb.tar.gz
major-d21e12f9f5d89e9f718ca260832d9ee8727ff9eb.tar.bz2
Use GO version of crawlerrefs/changes/19/132519/1
Change-Id: I97bc853df9d792bcc95aa23ed8de65a03780312f
-rwxr-xr-xtsp/jobs/trigger_downloads.sh31
1 files changed, 14 insertions, 17 deletions
diff --git a/tsp/jobs/trigger_downloads.sh b/tsp/jobs/trigger_downloads.sh
index db3e3a0..3127670 100755
--- a/tsp/jobs/trigger_downloads.sh
+++ b/tsp/jobs/trigger_downloads.sh
@@ -29,28 +29,25 @@ test -n "${TIMESTAMP}" || die "Missing argument: timestamp"
mkdir -p "${WS_WATCHER}"
cd "${WS_WATCHER}"
-touch dispatched_urls
-touch "${TIMESTAMP}"
-
-TREE="${TSP_DIR}/tree"
-test -f "${TREE}" || TREE="${TSP_DIR}/tree.example"
-
-# Crawl given url
-case "$URL" in
- *unified*)
- timeout 600 "${TSP_DIR}/scripts/crawler.py" --timestamp "${TIMESTAMP}" --tree "${TSP_DIR}/tree_unified" --log DEBUG "${URL}" || die "Crawl failed" "$?"
- ;;
- *)
- timeout 600 "${TSP_DIR}/scripts/crawler.py" --timestamp "${TIMESTAMP}" --tree "${TREE}" --log DEBUG "${URL}" || die "Crawl failed" "$?"
- ;;
-esac
-
# Read next buid_nr
touch next_dwn
next=$(cat next_dwn)
test -n "$next" || next=1
initial="$next"
+# Crawl given url
+DB="timestamp.db3"
+TEMP_DB="$DB.$$"
+cp "$DB" "$TEMP_DB" # Save database in the current state
+test -d json || mkdir json
+test -d log || mkdir log
+timeout 1200 gorawler -res "json/$TIMESTAMP.$next.$$.json" -db "$TEMP_DB" -log "log/$TIMESTAMP.$next.$$.log" \
+ -url "$URL" > modified_urls || {
+ rm -v "$TEMP_DB"
+ die "Crawl failed!"
+}
+mv "$TEMP_DB" "$DB" # Update database if it succeeded
+
# Allocate 300 numbers in case this script terminates during dispatch
echo $((next+300)) > "${WS_WATCHER}/next_dwn" || die "Write failed: ${WS_WATCHER}/next_dwn"
# Append to unique_dispatch in case previous job failed
@@ -58,7 +55,7 @@ if [ -f manual_urls ];
then
sort -u modified_urls manual_urls >> unique_dispatch
else
- cat modified_urls >> unique_dispatch
+ sort -u modified_urls >> unique_dispatch
fi
# Choose image_map