#!/usr/bin/perl -w

######################################################################
###
###   IMPORTANT!
###   Please read the warranty and legal notice 
###   at the end of this file!
###
######################################################################

require 5.000;
use lib '/usr/local/bin/',"$ENV{HOME}/bin/",'/usr/stud/loescher/bin/';
use lib "$ENV{HOME}/lib/perl5/site_perl";
use slutil;
use English;
use Getopt::Std;
use LWP::Simple; # libwww-perl-5.08.tar.gz
# libwww braucht seinerseits:
# - IO-1.15.tar.gz
# - Nicht unbedingt nötig:
#   -  libnet-#.##
#   -  MD5-#.##

# Installation dieser Module ist ganz einfach:
#    perl Makefile.PL
#    make
#    make test
#    make install

######################################################################
### Voreinstellungen
######################################################################

$version = '1.01';
$appname = 'WWW-Diff';

$login = getlogin || (getpwuid($<))[0];
$opt_h = undef;  # Nur Header holen
$opt_m = undef;  # Mail senden
$opt_t = undef;  # Timeout
$opt_u = undef;  # URL
$opt_f = undef;  # File
$opt_p = undef;  # "Process URL File" (File, das alle URLs enthält)
$opt_d = undef;  # Directory, in dem die Files von "-p" landen sollen
$opt_c = undef;  # Kommando, das ausgeführt werden soll

######################################################################
### Hauptprogramm
######################################################################

&Hilfe if $#ARGV<0;
&Hilfe unless (getopts("hmt:u:f:p:d:c:"));

if (defined $opt_t) {
    $opt_t =~ /^(\d+)([smh])?/;
    die "Ungültiger Timeout-Wert!\n" unless defined $1;
    $timeout = $1;
    $timeout *= 60   if ($2 eq "m");
    $timeout *= 3600 if ($2 eq "h");
    $LWP::Simple::ua->timeout($timeout);
}

# Fehler-Konstellationen:
# Entweder muß "-p" und "-d" zusammen oder "-u" "-f" zusammen sein.
&Hilfe if ( ($opt_p && !$opt_d) || (!$opt_p && $opt_d) ||
	    ($opt_u && !$opt_f) || (!$opt_f && $opt_u) );
# Für "-p" und "-d" braucht man noch "-c"
&Hilfe if ($opt_p && $opt_d && !$opt_c);
# Bei "-u" und "-f" darf kein "-c" dabei sein
&Hilfe if ($opt_u && $opt_f && $opt_c);
# Wenn weder "-u" noch "-p" angegben
&Hilfe if (!$opt_u && !$opt_p);

if ($opt_p && $opt_d)
{
  ProcessURLFile($opt_p,$opt_d,$opt_c);
  exit;
}

exit ( HoleHeader($opt_u,$opt_f) ) if ( $opt_h);
exit ( HoleURL   ($opt_u,$opt_f) ) if (!$opt_h);


######################################################################
### Unterprogramme
######################################################################


sub ProcessURLFile
{
  my ($urlfile,$verzeichnis,$kommando) = @_;
  my ($url,$filename,$ret);
  KillSlash($verzeichnis);

  open(URLS, "<$urlfile") || die "Kann '$urlfile' nicht öffnen!\n";
  while(defined ($url = <URLS>))
  {
    chomp $url;
    $filename = $url;
    $filename =~ s/\//_/g; # Slashes durch "_" ersetzen
    $ret = &HoleHeader($url,"$verzeichnis$slash$filename.HEAD") if (  $opt_h);
    $ret = &HoleURL   ($url,"$verzeichnis$slash$filename"     ) if (! $opt_h);
    next unless $ret == 1;

    $kommandoURL =  $kommando;
    $kommandoURL =~ s/\{\}/$url/g;
    system($kommandoURL);
  }
  close URLS;
}


sub HoleURL
{
  my ($url,$file) = @_;
  $rc = mirror($url, $file);
  # $rc == 304 : File ist noch aktuell.
  # $rc == 200 : File wurde geholt.
  
  if ($rc == 200)
  {
    &UpdateNachricht($url);
    return 1;
  }
  
  if ( ($rc!=304) && (!is_success($rc)) )
  {
    print STDERR "$0: $rc ", status_message($rc), "   ($url)\n";
    return 2;
  }
  return 0;
}


sub HoleHeader
{
  my ($url,$file) = @_;

  # head() liefert diese Werte:
  # ($content_type, $document_length, $modified_time, $expires, $server)

  # Remote-Header holen
  my @RemoteHeader = (head($url))[0..2];
  unless (defined $RemoteHeader[0] && 
	  defined $RemoteHeader[1] && defined $RemoteHeader[2] )
  {
    warn "Fehler beim Empfangen des Headers von: $url\n";
    return 2;
  }

  # Lokalen Header lesen
  if (-e $file)
  {
    open(FILE, $file) || die "Kann '$file' nicht lesen!\n";
    @LocalHeader = <FILE>;
    close FILE;
    chomp(@LocalHeader);
  }
  else
  {
    @LocalHeader = ();
  }

  # Vergleich der beiden
  $remote = join('',@RemoteHeader);
  $local  = join('',@LocalHeader);
  if ( $remote ne $local ) # Unterschiedlich, also Update
  {
    &UpdateNachricht($url);
    # Schreiben des neuen Headers:
    open(FILE, ">$file") || die "Kann '$file' nicht schreiben!\n";
    print FILE join("\n",@RemoteHeader);
    close FILE;
    return 1;
  }
  return 0;
}


sub UpdateNachricht
{
  my $url = shift;
  if ($opt_m)
  {
    open(MAIL, "| mailx -s 'WWW-Diff' $login") || die "Fehler: Mailx!\n";
    print MAIL "Update:\n$url\n";
    close MAIL;
  }
  else
  {
    print "Update: $url\n";
  }
}


sub Kopf
{
  my $head = "$appname $version   -   von Stephan Löscher";
  return "\n$head\n" . '~' x length($head) . "\n";
}


sub Hilfe
{
  printumlautepaged
  Kopf().
"Syntax: 1.) wwwdiff OPTIONEN -u URL -f FILE
oder:   2.) wwwdiff OPTIONEN -p URLFILE -d VERZ -c KOMMANDO

In der ersten Variante wird genau ein URL mit einem FILE verglichen und URL
dann geholt, wenn der Inhalt neuer ist als FILE.

In der zweiten Variante wird das URLFILE eingelesen, das mehrere URLs
zeilenweise enthält. Dann wird jeder URL mit den evtl. schon vorhandenen Files
im Verzeichnis VERZ verglichen und dann der Befehl KOMMANDO ausgeführt.
In dem KOMMANDO wird '{}' durch den aktuellen URL ersetzt.

Exitcodes bei 1.): (identisch mit denen des normalen diff.)
0: kein Update (no-diff)
1: Update      (diff)
2: Fehler

Optionen:
-t : Timeout in Sekunden einstellen (auch Xm und Xh für Minuten und Stunden.)
-m : Mail schicken, wenn ein Update stattgefunden hat.
     (Dazu wird 'cat Nachricht | mailx -s Subject username' verwendet.)
-h : Holt nur den Header und legt ihn in 'FILE' ab.

Wenn man einen Proxy verwenden will/muß, dann muß man eine Environment-Variable
setzen: http_proxy=http://host:port/ bzw. ftp_proxy=http://host:port/
Beispiel für einen lokalen Cache: export http_proxy=http://localhost:3128/

Beispiele:
~~~~~~~~~~

1.)
wwwdiff    -u http://www.leo.org/ -f LEO.html
wwwdiff -h -u http://www.leo.org/ -f LEO.header

2.)
Man erstellt beispielsweise in ~/w3/ ein File 'urls' mit dem
Inhalt:
http://www.gimp.org/
http://www.gimp.org/download.html
http://www.linux.org/
...
und startet dann:
wwwdiff -h -p ~/w3/urls -d ~/w3 -c \"netscape -remote 'openURL({},new-window)'&\"

WWW-Freaks, die alle Bookmarks auf Veränderungen überprüfen wollen, können
sich ein URL-File aus den Bookmarks damit generieren:
perl -e 'while(<>){\$x{\$1}++if(/HREF=\"(http.+?)\"/i);}print join(\"\\n\",keys %x)' bookmarks.html > urls.www

";
exit;
}


######################################################################
#
# Warranty and legal notice
# ~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Copyright (c) 1997 by Stephan Löscher  -  all rights reserved
# My Address: Stephan Löscher, Dr.Troll-str. 3, 82194 Gröbenzell, Germany
# Email: loescher@gmx.de
# WWW: http://www.loescher-online.de/
#
# This program is freeware.
# It is NOT Public-Domain-Software!
# The author (Stephan Löscher) does NOT give up his copyright, but he 
# reserves his copyright. Usage and copying is free of charge for private
# use, but NOT for commercial use!
# 
# You may and should copy this program free of charge, use it,
# give it to your friends, upload it to a BBS or something similar, under
# the following conditions:
# * Don't charge any money for it. If you upload it to a BBS, make sure that
#    it can be downloaded free (without paying for downloading it, except
#    for usage fees that have to be paid anyway). Small copying fees (up to
#    5 DM or 3 $US) may be charged.
#  * Only distribute the whole original package, with all the files included.
#  * This program may not be part of any commercial product or service without
#    the written permission by the author.
#  * If you want to include this program on a CD-ROM and/or book, please send
#    me a free copy of the CD/book (this is not a must, but I would appreciate
#    it very much).
# 
# Distribution of the program is explicitly desired, provided that the above
# conditions are accepted.
# 
# YOU ARE USING THIS PROGRAM AT YOUR OWN RISK! THE AUTHOR (STEPHAN LÖSCHER)
# IS NOT LIABLE FOR ANY DAMAGE OR DATA-LOSS CAUSED BY THE USE OF THIS PROGRAM
# OR BY THE INABILITY TO USE THIS PROGRAM. IF YOU ARE NOT SURE ABOUT THIS, OR
# IF YOU DON'T ACCEPT THIS, THEN DO NOT USE THIS PROGRAM!
# BECAUSE OF THE VARIOUS HARDWARE AND SOFTWARE ENVIRONMENTS INTO WHICH THIS
# PROGRAM MAY BE PUT, NO WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE IS
# OFFERED.
# GOOD DATA PROCESSING PROCEDURE DICTATES THAT ANY PROGRAM BE THOROUGHLY
# TESTED WITH NON-CRITICAL DATA BEFORE RELYING ON IT.
# 
# No part of the documentation may be reproduced, transmitted, transcribed,
# stored in any retrieval system, or translated into any other language in
# whole or in part, in any form or by any means, whether it be electronic,
# mechanical, magnetic, optical, manual or otherwise, without prior written
# consent of the author, Stephan Löscher.
# 
# You may not make any changes or modifications to this software or this
# manual. You may not decompile, disassemble, or otherwise reverse-engineer
# the software in any way.
# If you got the source, then you are permitted to modify it if you
# contact me and tell me your enhancements.
# You also may include the source as a whole or parts of it into other
# programs, as long as you don't make profit directly out of selling
# the result. If you re-use code of this program then do not remove my name!
# If you include this source-code in your projects, mark it clearly as such
# "... derived from code XXX by Stephan Löscher".
# But don't distribute modified code!
# 
# If you believe your copy of this software has been tampered or altered in
# anyway, shape or form, please contact me immediately! Do not hesitate a
# moment to inform me. Remember, this software should be available to all, in
# the original form, so please do not accept modified or damaged versions of
# my software.
# 
# The author reserves his right for taking legal steps if the copyright or the
# license agreement is violated.
# 
# All product names mentioned in this software are trademarks or registered
# trademarks of their respective owners.
# 
# If you have any questions, ideas, suggestions for improvements or if you find
# bugs (I don't hope so.) then feel free to contact me. (Email is appreciated.)
# 
# I'm not a native english speaker. If you are one and discover some strange
# sounding parts in this documentation or in the program, please, feel free
# to point it out to me and give me suggestions for alteration!
# 
# If the program works for you, and you want to honour my efforts, you are
# invited to donate as much as you want... :)
#
# In any case, if you don't like the restrictions in this license, contact
# me, and we can work something out.
#
######################################################################
