From The Compiler, 5 Years ago, written in Bash.
Embed
  1. #!/bin/bash
  2.  
  3. # Usage:
  4. #
  5. # create ~/.webdiff/pages with one page per line, in the format
  6. #   <char changed threshold> <url>
  7. # e.g:
  8. #   1 http://www.google.com/
  9. #
  10. # Then just periodically call webdiff, e.g. in a cronjob
  11.  
  12. if [[ $1 == -v ]]; then
  13.     shift 1
  14.     verbose=1
  15. else
  16.     verbose=0
  17. fi
  18.  
  19. tmp=$(mktemp)
  20.  
  21. trap 'rm -f "$tmp"' EXIT
  22.  
  23. while read thresh page; do
  24.     file=~/.webdiff/down/"${page//\//@}"
  25.     ((verbose)) && echo "=== $page ==="
  26.  
  27.     elinks -dump -no-references "$page" > "$tmp"
  28.  
  29.     if [[ $? != 0 ]]; then
  30.         echo "$page: elinks failed" >&2
  31.         continue
  32.     fi
  33.  
  34.     if [[ ! -r "$file" ]]; then
  35.         ((verbose)) && echo "First download"
  36.         mv "$tmp" "$file"
  37.         continue
  38.     fi
  39.  
  40.     n_old=$(wc -c "$file" | awk '{ print $1 }')
  41.     n_diff=$(diff -wy --suppress-common-lines <(sed 's/./&\n/g' "$file") <(sed 's/./&\n/g' "$tmp") | wc -l)
  42.  
  43.     if (( n_diff >= thresh || verbose)); then
  44.         echo "=== $page ==="
  45.         echo "$n_diff/$n_old chars changed"
  46.         echo
  47.         diff -wu "$file" "$tmp"
  48.     fi
  49.  
  50.     mv "$tmp" "$file"
  51. done < ~/.webdiff/pages