#!/bin/sh # Copyright (c) 2016 Samsung Electronics Co., Ltd All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Author: Aleksander Mistewicz export TSP_DIR="$(CDPATH='' cd -- "$(dirname -- "$0")" && pwd -P)/.." . "${TSP_DIR}/common.sh" URL="$1" TIMESTAMP="$2" test -n "${URL}" || die "Missing argument: url" test -n "${TIMESTAMP}" || die "Missing argument: timestamp" mkdir -p "${WS_WATCHER}" cd "${WS_WATCHER}" # Read next buid_nr touch next_dwn next=$(cat next_dwn) test -n "$next" || next=1 initial="$next" # Crawl given url DB="timestamp.db3" TEMP_DB="$DB.$$" cp "$DB" "$TEMP_DB" # Save database in the current state test -d json || mkdir json test -d log || mkdir log timeout 1200 watcher -db "$TEMP_DB" -log "log/$TIMESTAMP.$next.$$.log" -url "$URL" > modified_urls || { rm -v "$TEMP_DB" die "Crawl failed!" } mv "$TEMP_DB" "$DB" # Update database if it succeeded # Allocate 300 numbers in case this script terminates during dispatch echo $((next+300)) > "${WS_WATCHER}/next_dwn" || die "Write failed: ${WS_WATCHER}/next_dwn" # Append to unique_dispatch in case previous job failed if [ -f manual_urls ]; then sort -u modified_urls manual_urls >> unique_dispatch else sort -u modified_urls >> unique_dispatch fi # Choose image_map IMAGE_MAP="${TSP_DIR}/image_map" test -f "${IMAGE_MAP}" || IMAGE_MAP="${TSP_DIR}/image_map.example" while read -r url do test -n "${url}" || continue echo "Processing: ${url}" # kill currently running jobs for this url for i in $(tsmaster | awk -v URL="$url" '$2 ~ "running" && $0 ~ URL {print $1}'); do kill "$(tsmaster -p "$i")"; done next=$((next+1)) dwn_next="${next}" # prepare download directory rm -rf "${WS_DOWNLOAD}/$dwn_next" mkdir -p "${WS_DOWNLOAD}/$dwn_next" cd "${WS_DOWNLOAD}/$dwn_next" # download images dwn_nr=$(tsmaster -L "$dwn_next" python "${TSP_DIR}/scripts/download_all.py" --log=INFO -m "${IMAGE_MAP}" "$url") case "${url}" in *snapshots*) echo "Url is a snapshot, create/update symlink" ln -Tsf "${WS_DOWNLOAD}/${dwn_next}" "${SNAPSHOT_WS_DOWNLOAD_NEW}-$(url2profile "${url}")" ;; esac for target in ${TARGET_LIST} do next=$((next+1)) # prepare download directory rm -rf "${WS_DOWNLOAD}/${next}" ln -sf "${WS_DOWNLOAD}/${dwn_next}" "${WS_DOWNLOAD}/${next}" tsmaster -D "$dwn_nr" -L "IMAGE_TEST" sh "${TSP_DIR}/jobs/img_test_common.sh" "$next" "$target" # Update status in database update_db "$(url2sr "$url")" "Downloading" "$next" "$target" done done < unique_dispatch cd "${WS_WATCHER}" echo "$next" > "${WS_WATCHER}/next_dwn" || die "Write failed: ${WS_WATCHER}/next_dwn" # Report amount of new images post_prom "image_dispatch$(echo "$TIMESTAMP" | tr '.' '_')" "$((next-initial))" "Amount of images dispatched for $TIMESTAMP" # Cleanup rm -f unique_dispatch modified_urls manual_urls