#!/usr/local/bin/perl
#
# $Id: psync,v 0.60 2002/01/27 16:43:20 dankogai Exp $
#

use strict;
use Getopt::Long;
use Fcntl;
use DB_File;
use File::Basename;
use MacOSX::File;
use MacOSX::File::Copy;
use MacOSX::File::Info;
use Fcntl qq(:mode);

my $Debug = 0;
my $Psync_DB = '.psync.db';

my (
    $opt_a,
    $opt_d,
    $opt_f,
    $opt_n,
    $opt_r,
    $opt_q, 
    $opt_v, 
    );

Getopt::Long::Configure("bundling");
GetOptions(
	   "D"   => \$Debug,
	   "a"   => \$opt_a,
           "d:n" => \$opt_d,
	   "f"   => \$opt_f,
           "n"   => \$opt_n,
	   "r"   => \$opt_r,
	   "q"   => \$opt_q,
           "v:n" => \$opt_v,
           );

defined $opt_d and $opt_d ||= 1;
$opt_v ||= 1;
$opt_q and $opt_v = 0;

$Debug and print <<"EOT";
\$opt_a = $opt_a,
\$opt_d = $opt_d,
\$opt_f = $opt_f,
\$opt_n = $opt_n,
\$opt_r = $opt_r,
\$opt_q = $opt_q,
\$opt_v = $opt_v,
EOT

my $IgnorePat =
    qr[
       ^/(?:
            tmp/.*
          | dev/.*
          | private/var/tmp/.*
          | private/var/vm/.* 
          | private/var/run/.* 
	  | Temporary\ Items/.*
          )
       ]xo;

my %IgnoreFiles = map { $_ => 1 }
(
 $Psync_DB,
 '.DS_Store',
 '.FBCIndex',
 '.FBCLockFolder',
 '.Trashes',
 'AppleShare PDS',
 'Desktop DB',
 'Desktop DF',
 'TheFindByContentFolder',
 'TheVolumeSettingsFolder',
 );

my $Del_Ignored = $opt_d > 1 ? -1 : 0;

my $Topdev;
# Maybe we should tie them to DB_File to save memory
my (%Signature, %Attribs, %Action, %Root) = ();

select(STDOUT);
$|=1; #autoflush

my $Dst = pop @ARGV;
-d $Dst or help();
scalar @ARGV >= 1 or help();
if ($opt_r){
    scalar @ARGV != 1 and help();
    if (-f "$ARGV[0]/$Psync_DB"){ # remote restore mode
	$opt_v and 
	    do_log("Using $ARGV[0]/$Psync_DB to retrieve extra attributes.");
	tie (%Attribs, 'DB_File', "$ARGV[0]/$Psync_DB",
	     O_RDONLY, 0440, $DB_HASH) or die "$Dst/$Psync_DB : $!";
	$opt_r = 1;
    }else{
	$opt_r = 2;               # remote backup mode
    }
}

my $ScanCount = 0;

$opt_v and do_log("Scanning Destination Directory $Dst ...");
$Topdev = (lstat($Dst))[0];
scantree($Dst, '', -1);
$opt_v and do_log("\n$ScanCount items found.");

for my $src (@ARGV){
    $ScanCount = 0;
    $opt_v and do_log("Scanning Source Item $src ...");
    $Topdev = (lstat($src))[0];
    scantree($src, '' , +1);
    $opt_v and do_log("\n$ScanCount items found.");
}

if ($opt_v){
    my ($n_del, $n_unchg, $n_copy) = (0,0,0);
    while(my ($k, $v) = each %Action){
	$k or next;
        $v <  0 and $n_del++;
        $v == 0 and $n_unchg++;
        $v >  0 and $n_copy++;
    }
    do_log(sprintf "%8d items to delete,", $n_del);
    do_log(sprintf "%8d items unchanged,", $n_unchg);
    do_log(sprintf "%8d items to copy.", $n_copy);
}
if ($opt_d){
    $opt_v and do_log("deleting items ...");
    # sort must be this order for depth-first traversal
    for my $k (sort {$b cmp $a} keys %Action){
	$k or next; $Action{$k} <  0 or next;
	my $dpath = $Dst . $k;
	$opt_v and do_log("- $dpath");
	unless ($opt_n){
	    -e $dpath or next;
	    unlink $dpath or rmdir $dpath or warn "$dpath : $!";
	    my $atticf = dirname($dpath) . '/._' . basename($dpath);
	    if (-f $atticf){
		unlink $atticf or warn "$atticf : $!";
	    }
	}
    }
}
$opt_v and do_log("copying items ...");
# sort must be this order for depth-last traversal
for my $k (sort keys %Action){
    my $spath = $Root{$k} . $k; $spath =~ s,^/+,/,o;
    my $dpath = $Dst . $k;
    $Action{$k} == 0 and $opt_v > 1 and do_log("== $spath");
    $Action{$k} >  0 or next;
    unless ($opt_n){
	my ($size, $mtime)                = unpack("N2", $Signature{$k});
	my ($mode, $uid,  $gid,  $atime)  = unpack("N4", $Attribs{$k});
	if     (S_ISDIR($mode)){ # -d
	    unless (-d $dpath){
		$opt_v and do_log("+d $spath");
		mkdir $dpath, 0755 or warn "$dpath : $!";
	    }else{
		$opt_v > 1 and do_log("=d $spath");
	    }
	}elsif (S_ISREG($mode)){ # -f
	    $opt_v and do_log("+f $spath");
	    $opt_a or copy ($spath, $dpath)
		or $Debug ? 
		warn "$spath -> $dpath : ", &MacOSX::File::strerr :
		warn "$spath -> $dpath : $MacOSX::File::CopyErr" ;
	    copyattrib($spath, $dpath, $mode, $uid, $gid, $atime, $mtime);
	}elsif (S_ISLNK($mode)){ # -l
	    $opt_v and do_log("+l $spath");
	    my $slink = readlink($spath);
	    if ($slink ne readlink($dpath)){
		unlink $dpath && symlink(readlink($spath), $dpath);
	    }
	}
    }
}

$opt_v and do_log("fixing directory attributes ...");
# sort must be this order for depth-first traversal
for my $k (sort {$b cmp $a} keys %Action){
    $Action{$k} >  0 or next;
    my ($size, $mtime)                = unpack("N2", $Signature{$k});
    my ($mode, $uid,  $gid,  $atime)  = unpack("N4", $Attribs{$k});
    S_ISDIR($mode) or next;  # -d
    my $spath = $Root{$k} . $k; $spath =~ s,^/+,/,o;
    my $dpath = $Dst . $k;

    unless ($opt_n){
	copyattrib($spath, $dpath, $mode, $uid, $gid, $atime, $mtime);
	$opt_v and do_log(sprintf "0%04o,%s,%s $dpath", ($mode & 07777), 
			  (getpwuid($uid))[0],(getgrgid($gid))[0] );
    }
}

if ($opt_r >= 2){
    # these are to make DB operation fast enough
    my $hashinfo = DB_File::HASHINFO->new;
    $hashinfo->{nelem} = scalar keys %Action;
    $hashinfo->{bsize} = 1024; # MAXPATHLEN
    $hashinfo->{cachesize} = 4 * 1024 * 1024;
    tie (my %db, 'DB_File', $Psync_DB,  O_CREAT|O_RDWR, 0640, $hashinfo)
	or die "$Psync_DB : $!";
    $opt_v and do_log("Using $Dst/$Psync_DB to store extra attributes.");
    my $count;
    while ( my ($k, $v) = each %Action){
	if ($v >= 0){
	    $db{$k} = $Attribs{$k};
	    $count++ % 10000 == 0 and do_log("$count items stored.");
	}
    }
    untie %db;
    move $Psync_DB, "$Dst/$Psync_DB" or die "Can't move $Psync_DB";
}

sub copyattrib{
    my ($spath, $dpath, $mode, $uid, $gid, $atime, $mtime) = @_;
    my $finfo = getfinfo($spath);
    $finfo and $finfo->set($dpath);
    unless ($opt_r > 1){
	chmod $mode & 07777, $dpath;
	chown $uid,   $gid,  $dpath;
    }
    utime $atime, $mtime,  $dpath;
}

exit;
sub do_log{
    print shift, "\n";
}

sub sig2txt{
    return sprintf("0x%08x,0x%08x",unpack("N2",shift));
}

sub addsig{
    my ($path, $mode,$uid,$gid,$size,$atime,$mtime, $action) = @_;
    my $sig  = pack("N2", (S_ISREG($mode) ? $size : 0), $mtime);
    tied %Attribs or $Attribs{$path} = pack("N4", $mode, $uid, $gid, $atime);
    if ($opt_v > 3 and $action > 0){
	do_log qq(was: ) . sig2txt($Signature{$path});
	do_log qq(now: ) . sig2txt($sig);
    }
    if ($Signature{$path} eq $sig){
        $opt_f or $action = 0;     # same file
    }else{
        $Signature{$path} =  $sig; # different
    }
    $Action{$path} = $action;
    $opt_v > 2 and 
	do_log(join("," => $Action{$path},
		    sprintf("0x%08x,0x%08x,0x%08x", 
			    unpack("N2",$Signature{$path})),
		    $path));
}

# File::Find is too general purpose thus slow.
# we implement our own traversal routine

sub scantree {
    my ($root, $path, $action) = @_;
    if ($opt_v){
	$ScanCount % 8192 == 0 and  printf "\n%10d:", $ScanCount;
	$ScanCount % 128  == 0 and  print  ".";
	$ScanCount++;
    }
    if ($path =~ $IgnorePat){
        addsig($path, 0, 0, 0, 0, 0, 0, $Del_Ignored);
        return;
    }
    $action > 0 and $Root{$path} = $root;
    my $fpath = $root . $path;
    my ($dev, $mode, $nlink, $uid, $gid, $size, $atime, $mtime) = 
	(lstat($fpath))[0,2,3,4,5,7,8,9] or warn "can't stat $fpath";

    addsig($path, $mode, $uid, $gid, $size, $atime, $mtime, $action);

    if (-d _){
        $dev != $Topdev and return;
        opendir my $d, $fpath or warn "$fpath:$!";
        # see ._* is avoided
        my @f = grep !/^\.(?:\.?$|_)/o, readdir $d;
        closedir $d;
        for my $f (@f){
            my $spath = "$path/$f";
            if ($IgnoreFiles{$f}){
                addsig($spath, $mode, $uid, $gid, $size, $mtime, $atime, 
		       $Del_Ignored) unless $f eq $Psync_DB;
            }else{
                scantree($root, $spath, $action);
            }
        }
    }
}

sub help{
    print <<"EOT";
psync   [-d][-n][-q|-v] source_items ... target_directory
psync -r[-d][-n][-q|-v] source_directory target_directory
EOT
exit;
}
1;
__END__
=head1 NAME

psync -- update copy

=head1 SYNOPSIS

 psync [-n][-q|-vI<n>] source_items ... target_directory

=head1 DESCRIPTION

psync does an update copy.  It compares source directory and target
directory at first, then erases items that are nonexistent on source
directory if specified and finally copies everything on source directory.
Items with the same modification date and (data fork) size remain
untouched, saving time on operation.

Currently psync supports options below

=over 4

=item -r

Remote backup/restore mode.  Ownership and permissions are
stored/retrieved via C<.psync.db>

If the source directory contains a file C<.psync.db>, psync
turns into remote restore mode.  It uses .psync.db on source
directory to restore ownership and permissions.

If not, psync turns into remote backup mode.  After the backup
it stores ownership and permissions to C<.psync.db>

As the name suggests, this option is imperative when the backup
directory is on remote volume such as AFP, NFS, and Samba.

=item -dI<n>

Delete nonexistent files before starting copy.  If the number larger
than 2 is specified, it also deletes ignored files.

CAVEAT:  Prior to 0.50 this option was default.

=item -f

Force copy.  Copy files even when the file remains unchanged.

=item -n

"Simulation mode".  It prints what it would do on standard output but
does nothing thus far.

=item -vI<n>

Sets verbose level.  Default verbose level is 1;  It prints only items
that are changed.  Level 2 prints unchanged files also.  Level 3 and
above are practically debugging mode.

=item -q

Quiet mode.  Sets verbose level to 0.

=back

=head1 EXAMPLE

To backup everything in startup volume, all you have to say is

  sudo psync -d / /Volumes/I<backup>

And the resulting I<backup> volume is fully-bootable copy thereof.
Note C<sudo> or root privilege is necessary to restore file
ownership. 

=cut

HFS+ on Pismo's Expansion Bay

     2539.48 real       121.97 user       290.78 sys
      452.98 real        47.29 user        39.38 sys

UFS on Pismo's Expansion Bay
     9278.25 real       775.60 user       667.82 sys
     1086.35 real        69.19 user        53.68 sys

HFS+ Disk Image on AFP Volume

     3127.60 real       217.51 user       445.04 sys
     1059.37 real        69.80 user        52.00 sys

DVD-RAM formatted as  HFS+
    12258.39 real       210.52 user       441.67 sys
      564.49 real        62.47 user        46.65 sys


NFS
    13227.76 real       429.44 user       583.40 sys
     2348.72 real        83.87 user        88.10 sys


=head1 PERFORMANCE

On PowerBook G3 (pismo) with G3/400, 384MB Memory,  I tested with
C</usr/bin/time -l sudo psync -d / /Volumes/backup>.  The boot volume
contains no more than vanilla OS X 10.1.2 and Developer kit.  It
had a little over 10000 items and 1.8 GB of used space.  Here is
the result;

     1st run
     2539.48 real       121.97 user       290.78 sys
     run that immedeately follows
      452.98 real        47.29 user        39.38 sys

Note screensaver was on with some other background programs.  I used
this program happily with my PowerBook G4 (Ti) while I am surfing the web
and listening to iTunes at the same time letting SETI@Home search for
cosmic programmers :)  With MacOS X, background backup is no problem

=head1 FILES

=over 4

=item .psync.db

Berkeley DB Hash file used to store ownership and permission
information when -r option is set.

=head1 BUGS

Backing up to AFP volume may lose some files with Unicode names other
than the language you specified when you mount the volume.  That is,
When you mount the volume with "Japanese" support,  You may fail to 
backup files with Korean and Chinese names.  AFP is also vulnerable to
file names that are longer than 31 bytes.

Most NFS servers do not have this problem so if you backup MacOS X
boot volume onto remote servers, use NFS or disk image.

In theory the backup also works on WebDAV and SMB but they remain
untested.

=head1 DISCLAIMER

The author of this utility will be held no responsibility for any
damages and losses of data and/or files that may be caused by the use
thereof.

B<Use me at your own risk!>

=head1 AUTHOR

Dan Kogai <dankogai@dan.co.jp>

=head1 SEE ALSO

L<pcpmac/1>

hfstar F<http://www.geocities.com/paulotex/tar/>

hfspax F<http://homepage.mac.com/howardoakley/>

C<The Finder and File Operations> F<http://developer.apple.com/techpubs/macosx/Essentials/SystemOverview/Finder/The_Finder___Operations.html>

=head1 COPYRIGHT

Copyright 2002 Dan Kogai <dankogai@dan.co.jp>

This library is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
