#!/usr/bin/perl -w

# $Id: scan_for_new.pl,v 1.3 2001/11/26 13:44:45 lachoy Exp $

use strict;
use Class::Date qw( now );
use File::Find;
use OpenInteract::Startup;

use constant DEBUG => 0;

my $NOW                 = now;
my $DATE_PATTERN        = '%Y-%m-%d';
my $DEFAULT_ACTIVE_DAYS = 365;
my @ALWAYS_SKIP         = ( "~\$", 'pod2', 'tmp', 'bak', 'old', '^/images', '^/oi_docs' );
my ( %EXISTING, %NEW );
my ( @OPT_skip );
my ( $OPT_root, $OPT_debug, $OPT_active_days );

{
    my %opts = ( 'skip=s'   => \@OPT_skip,
                 'root=s'   => \$OPT_root,
                 'active=s' => \$OPT_active_days,
                 'debug'    => \$OPT_debug );
    my $R = OpenInteract::Startup->setup_static_environment_options( '', \%opts );

    $OPT_root ||= $R->CONFIG->get_dir( 'html' );
    $OPT_root =~ s|/$||;

    $OPT_active_days ||= $DEFAULT_ACTIVE_DAYS;
    $OPT_debug ||= DEBUG;

    push @OPT_skip, @ALWAYS_SKIP;

    %EXISTING = map { $_ => 1 }
                    @{ $R->page->db_select({ select => [ 'location' ],
                                             from   => [ $R->page->table_name ],
                                             return => 'single-list' }) };
    find( \&descend, $OPT_root );

    foreach my $location ( sort keys %NEW ) {
        my %page_info = get_page_info( $R, $location );
        add_new_location( $R, $location, \%page_info );
        $OPT_debug && warn "--Added new location $location\n";
    }

}


sub descend {
    my $filename = $_;
    return if ( $filename =~ /^\.+$/ );
    return unless ( -f $filename );
    my $full_dir = $File::Find::dir;
    $full_dir =~ s/^$OPT_root//;
    $full_dir =~ s|/$||;

    foreach my $pattern ( @OPT_skip ) {
        return if ( $full_dir =~ /$pattern/ );
    }
    my $location = join( '/', $full_dir, $filename );
    $location = "/$location" unless ( $location =~ m|^/| );
    return if ( $EXISTING{ $location } );
    $OPT_debug && warn "--Adding ($location) as new file\n";
    $NEW{ $location }++;
}


sub get_page_info {
    my ( $R, $location ) = @_;
    my $full_filename = join( '', $OPT_root, $location );
    my $mime_type = $R->page->mime_type_file( $full_filename ) ||
                    'text/html';
    my %info = ( mime_type => $mime_type,
                 size      => (stat $full_filename)[7] );
    $OPT_debug && warn "--MIME type found for ($location): $mime_type\n";
    return %info unless ( $mime_type eq 'text/html' );

    open( FILE, $full_filename ) || die "Cannot open ($full_filename): $!";
    while ( <FILE> ) {
        $info{title} = $1 if ( m|<title>(.*?)</title>| );
    }
    close( FILE );
    return %info;
}


sub add_new_location {
    my ( $R, $location, $page_info ) = @_;
    my $expire_date = $NOW + "${OPT_active_days}D";
    $R->page->new({ location   => $location,
                    mime_type  => $page_info->{mime_type},
                    title      => $page_info->{title},
                    is_active  => 'yes',
                    active_on  => $NOW->strftime( $DATE_PATTERN ),
                    expires_on => $expire_date->strftime( $DATE_PATTERN ),
                    size       => $page_info->{size},
                    storage    => 'file' })
            ->save();
}

__END__

=pod

=head1 NAME

scan_for_new.pl - Scan for new pages in a tree and add new ones to the database

=head1 SYNOPSIS

 $ export OIWEBSITE=/home/httpd/mysite
 $ perl scan_for_new --skip=^images

=head1 DESCRIPTION

This script scans a directory tree (by default the tree with 'html/'
as the root in your website directory) and adds any new files to the
database.

=head1 BUGS

None known.

=head1 TO DO

Nothing known.

=head1 SEE ALSO

=head1 COPYRIGHT

Copyright (c) 2001 intes.net, inc.. All rights reserved.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=head1 AUTHORS

Chris Winters <chris@cwinters.com>

=cut
