From f2f8964a72f13657af301029c98bfd4b1732d9a6 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian Date: Fri, 27 Jul 2012 14:41:21 +0200 Subject: bin/comics/xkcd: get comic for specific date via the archive page It is possible to get a xkcd comic for a specific date via the archive page. This eleminates the need of the state directory and xkcd was the only script that used it. --- bin/comics/xkcd | 63 ++++++++++++++++++++++++++++----------------------------- etc/settings.sh | 15 ++++++-------- 2 files changed, 37 insertions(+), 41 deletions(-) diff --git a/bin/comics/xkcd b/bin/comics/xkcd index 7079294..78c81b3 100755 --- a/bin/comics/xkcd +++ b/bin/comics/xkcd @@ -15,38 +15,37 @@ cd "${tmp}" browser="Mozilla/4.76 [de] (X11; U; Linux 2.2.18 i586)" newn="${day}.png" -wget ${wget_args} -U "$browser" http://www.xkcd.com -O index.html - -url=$(grep -A 1 "Image URL" index.html | sed 'N;s/.*\(http.*png\)<\?.*/\1/') -imgname=$(echo "$url" | tr '/' '\n' | tail -1) - -# get additional information -img_tag=$(grep -A2 "src=\"${url}" index.html | sed 'N;N;s/.*\(]\+>\).*/\1/') -title=$(sed 's/.*title="\([^"]*\)".*/\1/'<<< $img_tag) -alt=$(sed 's/.*alt="\([^"]*\)".*/\1/'<<< $img_tag) - -if [ ! -s "${image_dir}/$newn" ]; then - # no image availalable for current date - - if ! grep -q "${url}" "${state_file}" >/dev/null; then - # new image - echo $url > "${state_file}" - - wget ${wget_args} -U "$browser" -O "${tmp}/${newn}" \ - --header="Referer: http://www.xkcd.com/" "$url" - - if [ -s "${tmp}/${newn}" ]; then - # save - mv "${tmp}/${newn}" "${image_dir}/$newn" - echo "$title" > "${image_dir}/${day}.title" - echo "$alt" > "${image_dir}/${day}.alt" - - # update symlinks - rm -f "${comic_dir}/latest.png" "${comic_dir}/latest.title" "${comic_dir}/latest.alt" - ln -s "${image_offset}/$newn" "${comic_dir}/latest.png" - ln -s "${image_offset}/${day}.title" "${comic_dir}/latest.title" - ln -s "${image_offset}/${day}.alt" "${comic_dir}/latest.alt" - fi +# get url +echo $(date -d"$daysago days ago" +%Y-%-m-%-d) +url="$(wget ${wget_args} -U "$browser" http://xkcd.com/archive/ -O - | \ + grep -o "href=\".*\" .*\"$(date -d"$daysago days ago" +%Y-%-m-%-d)\"" | \ + sed 's/^href="\(.*\)" title.*$/\1/')" + +if [ -n "$url" ]; then + # fetch comic + wget ${wget_args} -U "$browser" "http://www.xkcd.com${url}" -O index.html + + url=$(grep -A 1 "Image URL" index.html | sed 'N;s/.*\(http.*png\)<\?.*/\1/') + imgname=$(echo "$url" | tr '/' '\n' | tail -1) + + # get additional information + img_tag=$(grep -A2 "src=\"${url}" index.html | sed 'N;N;s/.*\(]\+>\).*/\1/') + alt=$(sed 's/.*title="\([^"]*\)".*/\1/'<<< $img_tag) + title=$(sed 's/.*alt="\([^"]*\)".*/\1/'<<< $img_tag) + + wget ${wget_args} -U "$browser" -O "${tmp}/${newn}" --header="Referer: http://www.xkcd.com/" "$url" + + if [ -s "${tmp}/${newn}" ]; then + # save + mv "${tmp}/${newn}" "${image_dir}/$newn" + echo "$title" > "${image_dir}/${day}.title" + echo "$alt" > "${image_dir}/${day}.alt" + + # update symlinks + rm -f "${comic_dir}/latest.png" "${comic_dir}/latest.title" "${comic_dir}/latest.alt" + ln -s "${image_offset}/$newn" "${comic_dir}/latest.png" + ln -s "${image_offset}/${day}.title" "${comic_dir}/latest.title" + ln -s "${image_offset}/${day}.alt" "${comic_dir}/latest.alt" fi fi diff --git a/etc/settings.sh b/etc/settings.sh index d07293f..00947ac 100644 --- a/etc/settings.sh +++ b/etc/settings.sh @@ -1,7 +1,11 @@ # this file is sourced by the get scripts -if [ "$1" = "" ] -then +if [ -z "$top" ]; then + echo "\$top needs to be set before sourcing settings.sh" 1>&2 + exit 1 +fi + +if [ -z "$1" ]; then daysago=0 else daysago=$1 @@ -12,9 +16,6 @@ year=$(date -d"$daysago days ago" +%Y) month=$(date -d"$daysago days ago" +%Y-%m) day=$(date -d"$daysago days ago" +%d) -top="$(pwd)/$(dirname $0)/../../" -state_dir=${top}/var/state/ -state_file=${state_dir}/${comic}.state comic_dir=${top}/htdocs/imgs/${comic} image_offset=${month}/ image_dir=${comic_dir}/${image_offset} @@ -23,8 +24,4 @@ if [ ! -d "${image_dir}" ]; then mkdir -p "${image_dir}" fi -if [ ! -d "${state_dir}" ]; then - mkdir -p "${state_dir}" -fi - wget_args="--timeout 100 --wait=1000 --random-wait --no-cache" -- cgit v1.2.3-1-g7c22