#!/usr/local/bin/perl

# Update, Copyright 1994, Weizmann Institute of Science, Leo Novik
#   (levn@wisdom.weizmann.ac.il)
#
# version 2, patch level 0
#
# 
#
# This script is intended to make mirroring ftp sites much simpler.
# I am sure that something like this (and superior) already exists, but
# this will do, I guess, and is fairly simple.
#
########################################################################
#
# Basics:
#=======================================================================
#
# update.pl has two basic usages: 
# 1) list all the files that have changed (or been created) on the master site 
#    since a given date (list directive)
# 2) get all the files that have changed and update our mirror site. (get)
#
# The first usage is fot administrator's interest only and is not to be used in
# the automatic update scripts.
#
# There are two ways to specify the data for the update: 
# 1) On the command line (-data option)
# 2) In a timestamp file (-stamp option)
#
# After the program finishes, it leaves the date of the most recently modified
# file on the server and the search and replace parameters (see below) in the 
# timestamp file (or, if run with -data, in the file
# named 'timestamp'). Thus, an administrator may be sure that all files with
# the modification date (in the server's timezone) earlier than the one stored
# in the timestamp file are up-to-date. 
# 
##############################################################################
#
# Usage:
#=============================================================================
#
# update.pl <ftp site> <main directory>
#		[-date <year> <month> <date> <time> | -stamp <timstamp file>]
#		[list | get <receive directory>]
#
# <ftp site>: 			the full internet address to the ftp site.
#					i.e. 'ftp.combinatorics.org'
#
# <main directory>: 	the main directory *on the server* to be mirrored.
#						E.g. '/pub/ejc/Journal'
#
# 	-data: you are manually specifying the date of your last update and the
#		search and replace parameters.
#	Only the files with the modification date (in the server's timezone)
#		later than this will be listed/copied.
#
# <year>:				the year of the last update. E.g. '1994'
# <month>:				the month of the last update. E.g. 'Dec'
# <date>:				the date of the last update. E.g. '25'
# <time>:		the time of the last update (no seconds). E.g. '16:23'
# <replace>:			the text to replace in all the html files. 
#						(E.g. "http:/Journal")
# <with>:				what to replace <replace> with. E
#					(E.g. "http://your.host/My/Journal")
#
#-stamp: this file contains 6 lines, for year, month, date, time, string to
#		replce, and string to replace it with (see above). This
#	file will usually be the one created by the previous execution of 
#		update.pl.
#
#At the end of its execution, update.pl will put into this file (or the
#	file named 'timestamp' in case -data was used) the date of the youngest
#	file on the server and the search and replaace parmeters. Thus, next 
#	time you run it with the same timestamp
#	file, it will only get the files modified sonce the last run. This 
#	releaves the administrator from tinkering with the dates manually
#	and having to specify the search and replace parameters every time.
#
# <timestamp file>:		the file containing the timestamp (see above).
#						E.g. -stamp timestamp
#
# list:			this directive results in update.pl simply printing
#						out all the modified files.
#
# get <receive directory>:	this directive will 'get' all the modified files
#	into the directory specified by the <receive
#	directory>. It maintains the directory structure
#	and creates all the necessary new directories if
#	the server has done so.
#
#	The program will also replace every occurance of
#	<replace> with <with> in every .html file it
#	received.
#
#	Note: while new files and directories will be
#	created, old ones (no longer on the server), will
#	*not* be deleted.
#
#	E.g. get ~/public_html/Journal
#
#
# Note: the program adds a line to your .netrc file to allow you to log onto
# the ftp site without a password.
#
##############################################################################
#
#	Startegy
#=============================================================================
#
# The first time you run update.pl with some manual date, for instance:
#
#update.pl ftp.combinatorics.org/pub/ejc/Journal -data 1994 Dec 15 17:35 /
#		"http:/Journal" "http://your.host/~login/Journal" /
#					get ~/public_html/Journal
#
# This will create a 'timestamp' file. Then, every now and then, (by cron, or
# by hand), run somthing like this:
#
#update.pl ftp.combinatorics.org /pub/ejc/Journal -stamp timestamp /
#					get ~/public_html/Journal
#
# That will keep your site up to date.
#
###############################################################################

$TMP = "/tmp";			# temporary directory
$DATE = "date";			# the command for getting the date
$FTP = "ftp";			# the command for ftp
$NETRC = ".netrc";		# the location of your .netrc file.

## System commands, in case you need to specify explicit paths, or your setup
## is otherwise non-standard
$GREP = "grep";
$TOUCH = "touch";
$ECHO = "echo";
$SED = "sed";
$GREP = "grep";
$RENAME = "mv";


# This is an array of months, for ordering purposes

%Months = (
	"Jan", "1",
	"Feb", "2",
	"Mar", "3",
	"Apr", "4",
	"May", "5",
	"Jun", "6",
	"Jul", "7",
	"Aug", "8",
	"Sep", "9",
	"Oct", "10",
	"Nov", "11",
	"Dec", "12"
);

##############################################################################
# This routine takes two dates as quadruples: year, months, date and time in
# the form of 1994, "Sep", 12, "12:23" and returns 1 if the first date is 
# later than the second, -1 of the second date is later than the first, and 0
# if they are the same.

sub CompareDates {
	local($Year1, $Month1, $Day1, $Time1, $Year2, $Month2, $Day2, $Time2) = @_;

	if($Year1 > $Year2) {return 1;}
	if($Year2 > $Year1) {return -1;}

	if($Months{$Month1} > $Months{$Month2}) {return 1;}
	if($Months{$Month2} > $Months{$Month1}) {return -1;}

	if($Day1 > $Day2) {return 1;}
	if($Day2 > $Day1) {return -1;}

	if($Time1 gt $Time2) {return 1;}
	if($Time2 gt $Time1) {return -1;}

	return 0;
}

##############################################################################
#
# This routine takes a directory name and 'makes' it, makeing every
# intermediate directory. Thus, Mkdir("a/b/c") will create a, b under a, and
# c under b.

sub Mkdir {
	local($Dir) = @_;
	local(@Dirs);

	@Dirs = split(/\//, $Dir);
	for $I (0..$#Dirs) {
		$Prefix = join("/", @Dirs[0..$I]);
		mkdir($Prefix, 0777);
	}
}
		
##############################################################################
# Prints the "usage" information
#
sub Usage {
	print "usage:
	update.pl <ftp site> <dir on site> 
		[-data <year> <month> <date> <time> <replace> <with> | 
		 -stamp <timestamp file>] 
		[list | get <localdir>]
		<replace> <with>
";

	exit(0);
}
	
# There's got to be at least 5 args.
if($#ARGV < 4) {
	
	&Usage();
}

######################## Determine today's date #####################

open(fh, "$DATE|");
$_ = <fh>;
close (fh);
chop;
($ThisMonth, $ThisYear) = (/^....(...).*(....)$/);



####################### Parse the arguments #########################
$Machine = $ARGV[0];
$SubDir = $ARGV[1];
if($SubDir !~ /^\//) {
	die "Subdirectory must start with a /. '$SubDir' is not a valid subdirectory.\n";
}

if($ARGV[2] eq "-data") {

	### Read the date
	$OldYear = $ARGV[3];
	if($OldYear < 1900 || $OldYear > 2100) {
		die "Year must be between 1900 and 2100. '$Year' is not a valid year\n";
	}
	$OldMonth = $ARGV[4];
	if($Months{$OldMonth} == 0) {
		die "'$OldMonth' is not a valid month.\n";
	}
	$OldDay = $ARGV[5];
	if($OldDay < 1 || $OldDay > 31) {
		die "'$OldDay' is not a valid day\n";
	}
	
	$OldTime = $ARGV[6];
	if($OldTime !~ /^..:../) {
		die "Time must be of the form Hours:Minutes. '$OldTime' is not a valid time\n";
	}
	# Remove the seconds, if supplied.
	$OldTime =~ s/^(..:..)/\1/;

	### Read the subsitution pattern

	$Replace = $ARGV[7];
	$With = $ARGV[8];
	$RestArgs = 9;
}
elsif ($ARGV[2] eq "-stamp") {
	$Timestamp = $ARGV[3];
	
	# Read the date from the timestamp file

	if(!open(fh, $Timestamp)) {
		die "Cannot open timestamp file '$Timestamp'.\n";
	}
	chop($OldYear = <fh>);
	chop($OldMonth = <fh>);
	chop($OldDay = <fh>);
	chop($OldTime = <fh>);
	chop($Replace = <fh>);
	chop($With = <fh>);
	close(fh);

	$RestArgs = 4;
}
else {
	&Usage();
}
	
# Read the command (list/get)

$Command = $ARGV[$RestArgs];
if($Command eq "list") {
}
elsif($Command eq "get") {
	if($#ARGV < $RestArgs + 1) {
		die "You must supply a local directory to receive the files\n";
	}
	$LocalDir = $ARGV[$RestArgs + 1];
}
else {
	die "Command must be either list or get. '$Command' is not a valid
		command\n";
}

####################### Update .netrc ##########################

$Login = getlogin();
$HomeDir = (getpwnam($Login))[7];
if(system("$GREP $Machine $HomeDir/$NETRC > /dev/null")/256 != 0) {
	system("$TOUCH $HomeDir/$NETRC");
	system("$ECHO machine $Machine login anonymous password $Login@ >> $HomeDir/$NETRC");
}

	
######################### Get the listing of the modified files ############

print "Obtaining the listing of files at the main site...\n";
$Count = 0;

## Create the file with the commands for the ftp

open(fh, ">$TMP/ftp$$");
print fh "cd $SubDir\n";
print fh "dir -R";
close(fh);

## Run ftp

open(fh, "$FTP $Machine < $TMP/ftp$$|");

if($Command eq "get") {
	open(fhOut, ">$TMP/ftpls$$");
}

#### Read the file listing

$OldDir = "-1";
$MaxYear = 1900;
while ($_ = <fh>) {
	chop;
	if(/^total/){next;}			# extraneous
	if(/^ *$/){next;}			# extraneous
	if(/^([^ ]*):/) {			# New directory starting: remember.
		$Dir = $1;
		next;
	}
	if(! /^\-/) {next;}			# extraneous
	else {
		## This is a file

		# Parse the listing line

		($Permission, $Month, $Day, $Rest, $Name)  = 
			(/^([^ ]*) *[^ ]* *[^ ]* *[^ ]* *[^ ]* *([^ ]*) *([^ ]*) *([^ ]*) *([^ ]*)$/);
		
		# There are 2 types of dates: 6 months ago they put the year, less than
		# 6 months ago they put the time.
		if($Rest =~ /:/) {
			# Time
			if($Months{$Month} <= $Months{$ThisMonth}) {
				# This year
				$Year = $ThisYear;
			}
			else {
				# Last year
				$Year = $ThisYear - 1;
			}
	
			$Time = $Rest;
		}
		else {
			$Year = $Rest;
			$Time = "00:00";
		}

		## Compare the date of the file to the specified date

		if(&CompareDates($Year, $Month, $Day, $Time, 
						 $OldYear, $OldMonth, $OldDay, $OldTime) == 1) {

			if($Command eq "list") {
				# print out the path
				print "$SubDir";
				if($Dir) {print "/$Dir";}
				print "/$Name\n";
			}
			else {
				## Produce the instructions for getting it.
				if($Dir ne $OldDir) {
					# New directory.
					# Change it on the server
					print fhOut "cd $SubDir/$Dir\n";
					# Change it locally
					print fhOut "lcd $LocalDir/$Dir\n";
					# Make it locally (now)
					&Mkdir("$LocalDir/$Dir");
					$OldDir = $Dir;
				}
				## Get the file
				print fhOut "get $Name\n";
				$Count++;
			}
		}

		## Comapare to the current youngest file

		if(&CompareDates($Year, $Month, $Day, $Time, 
						 $MaxYear, $MaxMonth, $MaxDay, $MaxTime) == 1) {

			$MaxYear = $Year;
			$MaxMonth = $Month;
			$MaxDay = $Day;
			$MaxTime = $Time;
		}
		
	}
}

close(fh);

## Output the new version of the timestamp

if($Timestamp eq "") {
	$Timestamp = "timestamp";
}

open(fh, ">$Timestamp");
print  fh "$MaxYear\n";
print  fh "$MaxMonth\n";
print  fh "$MaxDay\n";
print  fh "$MaxTime\n";
print  fh "$Replace\n";
print  fh "$With\n";
close(fh);

if($Command eq "list") {
	exit(0);
}

close(fhOut);
unlink("$TMP/ftp$$");

print "$Count modified files. Receiving...\n";

## Run ftp again to receive the files
system("$FTP $Machine < $TMP/ftpls$$");

## Run replace on all the files to set them up for the local server
print "Setting up for local server...";

open(fh, "$TMP/ftpls$$");

while($File = <fh>) {
	chop $File;
	if($File =~ /^lcd (.*)$/) {
		$Dir = $1;
		next;
	}
	if($File =~ /^get (.*)$/) {
		$File = "$Dir/$1";
		# Check if it is a .html file
		if($File !~ /\.html$/) { next;}

		# Check if it contains the string
		if(system("$GREP '$Replace' $File >/dev/null") / 256 == 1) {next;}

		# Replace it via sed
		if(system("$SED -e 's&$Replace&$With&g' $File > $TMP/repl$$") / 256 != 0) {
			die "Can't sed\n";
		}

		# Replace the original file with the new one
		if(system("$RENAME $TMP/repl$$ $File") / 256 != 0) {
			die "Can't move\n";
		}
		print "."; $| = 1;
	}
}

close(fh);

unlink("$TMP/ftpls$$");
print "Done\n";




#------------------------------- replace.pl ----------------------------------


#!/usr/local/bin/perl

# Replace, Copyright 1994, Weizmann Institute of Science, Leo Novik
# (levn@wisdom.weizmann.ac.il)
#
# version 1, patch level 1

# TMP points to the temp directory on your system, ususally at /tmp
$TMP = "/tmp";

# ECHO is the 'echo' command (in case your path is weird)
$ECHO = "echo";

# FIND is the 'find' command (in case your path is weird)
$FIND = "find";

# SED is the 'sed' command (in case your path is weird)
$SED = "sed";

# GREP is the 'grep' command (in case your path is weird)
$GREP = "grep";

# RENAME is the 'mv' command (in case your path is weird)
$RENAME = "mv";


# ARG[0] = Root of the directory tree to search and replace in.
# ARG[1] = String to replace (e.g. 'http://ejc.mat.edu/Journal')
# ARG[2] = Replace string with (e.g. 'http://your.host/My/Journal/Path')

$Root = $ARGV[0];
$Replace = $ARGV[1];
$With = $ARGV[2];

# This is the file where the listing will be temporarily put (removed after
# execution)
$LISTING = "$TMP/wwwrpl$$";

# This is where the result of the substitution is temporarily put (removed
# after executrion)
$TEMPFILE = "$TMP/wwwsed$$";

# Check permissions
if(! -e $Root) {die "$Root doesn't exist\n";}
if(! -r $Root) {die "$Root is not readable by you\n";}
if(! -w $Root) {die "$Root is not writable by you\n";}

if(-f $Root) {
#	Plain file
	if(system("$ECHO '$Root' > $LISTING") / 256 != 0) {
		die "Can't echo\n";
	}
}
elsif(-d $Root) {
#	Directory
	if(system("$FIND $Root -type f -name '*.html' -print > $LISTING") / 256 
			!= 0) {
		die "Can't find\n";
	}
}
	
open(fh, $LISTING);

# For each HTML file...
while($File = <fh>) {
	chop $File;
	print "$File."; $| = 1;

#	Check if it contains the string
	if(system("$GREP '$Replace' $File >/dev/null") / 256 == 1) {next;}
	print "."; $| = 1;

#	Replace it via sed
	if(system("$SED -e 's&$Replace&$With&g' $File > $TEMPFILE") / 256 != 0) {
		die "Can't sed\n";
	}
	print "."; $| = 1;

#	Replace the original file with the new one
	if(system("$RENAME $TEMPFILE $File") / 256 != 0) {
		die "Can't move\n";
	}
} continue {
	print "\n";
}

close(fh);

unlink($LISTING);





