#!/usr/bin/perl # WEBWATCHER -- Watch for changes to web files =pod =head1 NAME Webwatcher - watch for changed web pages =head1 USAGE ./webwatcher.pl =head1 DESCRIPTION Webwatcher reads a file containing a list of URLs, one per line. It gets the web page and computes a checksum. It compares the checksum to the previous run to see if the page has changed. If any page has changed it sends a mail message with the changed pages. This script is meant to be run as a cron job. The script overwrites the input file with a new version of the file. Lines in the file starting with the sharp character (#) are ignored and passed through unchanged to the output. So are lines containing URLs that have not changed. If a URL has changed, the script adds the checksum and current time to the the line. To modify the list of URLs to check add or delete lines from the file. There are several configuration variables that need to be set before running the script: =over 4 =item MODFILE The full path to the modification file. This file contains the URLs of web pages to watch, one per line. =item EMAIL The email address to notify when the web page changes =item SUBJECT The subject line for the email message =item SENDMAIL The full path to the sendmail executable =back =head1 INSTALLATION This script should be placed on your ISP. Login to your ISP and make the script executable: chmod +x webwatcher.pl Then add the script to your crontab with the command crontab -e See the man page for cronab for more information. The script probably should run once a day before the time you usually check your mail. =head1 LICENSE Copyright 2005 by Bernard Simon. You may use this script as you wish as long as this license is not removed. =cut use strict; use FileHandle; use LWP::Simple; use Digest::MD5 qw(md5_base64); #---------------------------------------------------------------------- # Configuration variables # File containing pages to watch, one per line use constant MODFILE => '/PATH/TO/MODFILE'; # Email address to notify when page changes use constant EMAIL => 'YOU@YOURS.COM'; # Subject line for mail message use constant SUBJECT => 'Changed web pages'; # Path to sendmail command use constant SENDMAIL => '/usr/sbin/sendmail'; #---------------------------------------------------------------------- # Main routine my $mod = FileHandle->new(MODFILE) || die "Couldn't open modfile: $!"; my @lines = <$mod>; $mod->close; my $mail; my $newdate = time; $mod->open ('>' . MODFILE) || die "Can't write to modfile: $!"; foreach my $line (@lines) { chomp($line); if ($line =~ /^[^\#]/) { my ($url, $oldsum, $olddate) = split(' ', $line); my $newsum = md5_base64 (get ($url)); if (defined $newsum && $newsum ne $oldsum) { $line = "$url $newsum $newdate"; $mail = sendmail (to => EMAIL, from => EMAIL, subject => SUBJECT) unless $mail; print $mail "$url has been modified "; print $mail "since ", scalar (localtime ($olddate)) if $olddate; print $mail "\n"; } } print $mod "$line\n"; } $mod->close; $mail->close if $mail; #---------------------------------------------------------------------- # SENDMAIL -- Open the mailer as a file sub sendmail { my ($mail, $line); my %header = ( to => EMAIL, from => EMAIL, subject => '[No Subject]', @_ ); my $cmd = SENDMAIL . ' -oi -t'; $mail = new FileHandle ( "|$cmd"); return $mail unless defined ($mail); foreach $line (keys(%header)) { print $mail "\u$line: $header{$line}\n"; } print $mail "\n"; return $mail; }