#
# $Id: Split.pm,v 0.31 2003/12/31 22:26:09 st.schubiger Exp $

package Dir::Split;

use 5.6.1;
use strict;
use warnings;

use Carp;
use File::Copy 'cp';
use File::Path;
use File::Spec;

our (# external refs
        $source_dir_ref, # scl
        $target_dir_ref, # scl

     # opt refs
        $mode_ref,     # scl
        $override_ref, # scl
        $warn_ref,     # scl
        $verbose_ref,  # scl
        $ident_ref,    # scl
        $f_limit_ref,  # scl
        $f_sort_ref,   # scl
        $cont_num_ref, # scl
        $sep_ref,      # scl
        $l_req_ref,    # scl
        $case_ref,     # scl

     # data refs
        $files_ref,        # arr
        $f_names_case_ref, # hsh
        $f_names_char_ref, # hsh
        $path_ref,         # scl
        $ret_f_moved_ref,  # scl
        $suffix_ref,       # scl

     # data
        @files,
        %f_names_case,
        %f_names_char,
        $path,
        $ret_f_moved,
        $suffix,
    );

our $VERSION = 0.31;

=head1 NAME

Dir::Split - Split the files of a directory to subdirectories.

=head1 SYNOPSIS

 use Dir::Split;

 # numeric options
 %behavior = (  mode    =>    'num',

                options => {  verbose        =>           1,
                              warn           =>       'all',
                              override       =>      'none',
                },

                sub_dir => {  identifier     =>    'system',
                              file_limit     =>           2,
                              file_sort      =>         '+',
                },

                suffix  => {  continue_num   =>         'y',
                              separator      =>         '-',
                              length         =>           5,
                },
 );

 # create object
 $dir = Dir::Split->new (\%behavior);

 # set source & target dir
 $source_dir = '/var/tmp/src';
 $target_dir = '/var/tmp/target';

 # split files to subdirs
 $files_moved = $dir->split (\$source_dir, \$target_dir);

 # change the subdir identifier
 $dir->{'sub_dir'}{'identifier'} = 'test';

 # split files to subdirs
 $files_moved = $dir->split (\$source_dir, \$target_dir);

=head1 DESCRIPTION

C<Dir::Split> moves files from a source directory to numbered subdirectories within
a target directory.

=head1 METHODS

=head2 new ( \%behavior )

Object constructor.

 $dir = Dir::Split->new (\%behavior);

C<%behavior> contains the options that will influence the splitting process.

=over 4

=item numeric splitting

Split files to subdirectories with a numeric suffix. Numbering may
be continued or started at 1 one each time. Options are explained below.
See EXAMPLES to gain an understanding how numeric splitting works.

 %behavior = (  mode    =>    'num',

                options => {  verbose        =>           1,
                              warn           =>       'all',
                              override       =>      'none',
                },

                sub_dir => {  identifier     =>    'system',
                              file_limit     =>           2,
                              file_sort      =>         '+',
                },

                suffix  => {  continue_num   =>         'y',
                              separator      =>         '-',
                              length         =>           5,
                },
 );

=item characteristic splitting

Split files to subdirectories with a characteristic suffix. Files
are assigned to subdirectories which suffixes correspond with the leading character
of the filenames. Options are explained below. See EXAMPLES to gain an understanding
how characteristic splitting works.

 %behavior = (  mode    =>    'char',

                options => {  verbose     =>           1,
                              warn        =>       'all',
                              override    =>      'none',
                },

                sub_dir => {  identifier  =>    'system',
                },

                suffix  => {  separator   =>         '-',
                              case        =>     'lower',
                },

 );

=item generic behavior

=over 4

=item C<mode>

string - either I<num> for numeric or I<char> for characteristic.

=item C<options/verbose>

integer - verbosity; if enabled, mkpath will output the pathes
on creating subdirectories.

 MODES
   0  disabled
   1  enabled

=item C<options/warn>

string - warn upon the encounter of existing files/dirs.

 LEVELS
   none
   files
   dirs
   all

=item C<options/override>

string - override of existing files/dirs.

 LEVELS
   none
   files
   dirs
   all

=item C<sub_dir/identifier>

string - prefix of each subdirectory created.

=item C<suffix/separator>

string - separates the identifier from the suffix.

=back

=item numeric behavior

=over 4

=item C<sub_dir/file_limit>

integer - limit of files per subdirectory.

=item C<sub_dir/file_sort>

string - sorting order of files.

 MODES
   +  ascending sort order
   -  descending sort order

=item C<suffix/continue_num>

string - numbering continuation (will start at 1 if no).

 MODES
   y  yes
   n  no

=item C<suffix/length>

integer - amount of zeros to be added to the suffix.

=back

Differing identifiers or separators do affect the numbering e.g. I<systm-> does
not equal I<system->, I<system_> does not equal I<system->. C<file_limit>, C<file_sort>
and C<length> options have no influence on decisions whether the numbering shall
be continued, whereas C<identifier>, C<separator> and C<continue_num> do.

=item characteristic behavior

=over 4

=item C<suffix/case>

string - lower/upper case of the suffix.

 MODES
   lower
   upper

=back

=back

=cut

sub new {
    my ($pkg, $opt_ref) = @_;
    croak q~Invalid arguments: new (\%behavior)~
      unless ref $opt_ref eq 'HASH';

    my $class = ref ($pkg) || $pkg;
    my $obj = Dir::Split::_tie_var ($opt_ref);

    bless $obj, $class;
}

#
# _tie_var (\%hash)
#
# Dereferences a two-dimensional hash. A reference
# to a private hash will be returned.
#

sub _tie_var {
    my $opt_ref = $_[0];

    my %my_hash;
    foreach my $key (keys %$opt_ref) {
        if (ref $$opt_ref{$key} eq 'HASH') {
            foreach ( keys %{$$opt_ref{$key}} ) {
                $my_hash{$key}{$_} = $$opt_ref{$key}{$_};
            }
        }
        else { $my_hash{$key} = $$opt_ref{$key} }
    }

    return \%my_hash;
};

#
# DESTROY() - object destructor
#

sub DESTROY { }

=head2 split ( \$source_dir, \$target_dir )

Split files to subdirectories.

 $files_moved = $dir->split (\$source_dir, \$target_dir);

C<$source_dir> specifies the source directory.

C<$target_dir> specifies the target directory.

Returns the amount of files that have been successfully moved;
if none, it will return undef.

=cut

sub split {
    my $self = shift;
    ($source_dir_ref, $target_dir_ref) = @_;
    croak q~Invalid arguments: split (\$source_dir, \$target_dir)~
      unless ref $source_dir_ref eq 'SCALAR' && ref $target_dir_ref eq 'SCALAR';

    # opts refs
    $mode_ref = \$self->{'mode'};
    $verbose_ref = \$self->{'options'}{'verbose'};
    $override_ref = \$self->{'options'}{'override'};
    $warn_ref = \$self->{'options'}{'warn'};
    $ident_ref = \$self->{'sub_dir'}{'identifier'};
    $f_limit_ref = \$self->{'sub_dir'}{'file_limit'};
    $f_sort_ref = \$self->{'sub_dir'}{'file_sort'};
    $cont_num_ref = \$self->{'suffix'}{'continue_num'};
    $sep_ref = \$self->{'suffix'}{'separator'};
    $l_req_ref = \$self->{'suffix'}{'length'};
    $case_ref = \$self->{'suffix'}{'case'};

    # data refs
    $files_ref = \@files;
    $f_names_case_ref = \%f_names_case;
    $f_names_char_ref = \%f_names_char;
    $path_ref = \$path;
    $suffix_ref = \$suffix;

    $self->_sanity_input;
    $self->_gather_files;
    my $sub_suffix = "_suffix_$$mode_ref";
    $self->$sub_suffix;
    $self->_move;
}

#
# _sanity_input()
#
# Ensure that the interface input passes sanity;
# if not, croak.
#

sub _sanity_input {
    no warnings;
    my $err_str;
    {
        unless ($$mode_ref eq 'char' || $$mode_ref eq 'num') {
            $err_str = 'No mode specified.'; last;
        }
        unless ($$verbose_ref =~ /^0|1$/) {
            $err_str = 'No verbosity specified.'; last;
        }
        unless ($$warn_ref =~ /^none|files|dirs|all$/) {
            $err_str = 'No warn mode specified.'; last;
        }
        unless ($$override_ref =~ /^none|files|dirs|all$/) {
            $err_str = 'No override mode specified.'; last;
        }
        unless ($$ident_ref =~ /\w/) {
            $err_str = 'No subdir identifier specified.'; last;
        }
        unless ($$sep_ref) {
            $err_str = 'No suffix separator specified.'; last;
        }
        if ($$mode_ref eq 'char') {
            unless ($$case_ref eq 'lower' || $$case_ref eq 'upper') {
                $err_str = 'No suffix case mode specified.'; last;
            }
        }
        elsif ($$mode_ref eq 'num') {
            unless ($$f_limit_ref =~ /\d/) {
                $err_str = 'No file limit specified.'; last;
            }
            unless ($$f_sort_ref eq '+' || $$f_sort_ref eq '-') {
                $err_str = 'No file sort mode specified.'; last;
            }
            unless ($$cont_num_ref eq 'y' || $$cont_num_ref eq 'n') {
                $err_str = 'No continuation mode specified.'; last;
            }
            unless ($$l_req_ref =~ /\d/) {
                $err_str = 'No suffix length specified.'; last;
            }

        }
    }
    croak $err_str if $err_str;
    use warnings;
}

#
# _gather_files()
#
# Gathers the files the source directory
# consists of and sorts them if required.
#

sub _gather_files {
    my $self = $_[0];

    $self->_dir_read ($source_dir_ref, $files_ref);
    @$files_ref = grep !-d File::Spec->catfile($$source_dir_ref, $_), @$files_ref;

    if ($$mode_ref eq 'num' && ($$f_sort_ref eq '+' || $$f_sort_ref eq '-') ) {
        # preserve case-sensitive filenames in hash.
        foreach (@$files_ref) { $$f_names_case_ref{lc($_)} = $_ }
        @$files_ref = map { lc } @$files_ref;

        if ($$f_sort_ref eq '+') { @$files_ref = sort @$files_ref }
        elsif ($$f_sort_ref eq '-') { @$files_ref = reverse @$files_ref }
    }
}

#
# _suffix_char()
#
# Evaluates all filenames in the source directory
# and stores them in a hash associated with their
# leading character.
#

sub _suffix_char {
    foreach my $file (@$files_ref) {
        ($_) = $file =~ /^(.)/;
        if ($_ =~ /\w/) {
            if ($$case_ref eq 'upper') { $_ = uc ($_) }
            elsif ($$case_ref eq 'lower') { $_ = lc ($_) }
        }
        push (@{$$f_names_char_ref{$_}}, $file);
    }
    undef @$files_ref;
}

#
# _suffix_num()
#
# Evaluates the highest existing subdir suffix number
# from the target directory in order to continue numbering
# where it stopped previously. The suffix will be summed up
# with zeros if required.
#

sub _suffix_num {
    my $self = $_[0];

    # surpress warnings
    $$suffix_ref = 0;

    if ($$cont_num_ref eq 'y') {
        my @dirs;
        $self->_dir_read ($target_dir_ref, \@dirs);
        @dirs = grep -d File::Spec->catfile($$target_dir_ref, $_), @dirs;

        my $sep = quotemeta $$sep_ref;
        foreach (@dirs) {
            # extract exist. identifier
            ($_) = /(.+?)$sep(.*)/;
            next unless $$ident_ref eq $_;
            # increase suffix to highest number
            $$suffix_ref = $2 if $2 > $$suffix_ref;
        }
        # avoid collisions with exist. subdirs
        $$suffix_ref++;
    }

    if (length $$suffix_ref != $$l_req_ref) {
        my $format = "%0.$$l_req_ref" . 'd';
        $$suffix_ref = sprintf $format, $$suffix_ref;
    }
}

#
# _move()
#
# Invokes either _move_num() or _move_char() by
# relying upon the opts.
#

sub _move {
    my $self = $_[0];

    $ret_f_moved_ref = \$ret_f_moved;

    my $sub_move = "_move_$$mode_ref";
    $self->$sub_move;

    return $$ret_f_moved_ref ? $$ret_f_moved_ref : undef;
}

#
# _move_num()
#
# Moves the files to numeric subdirs.
#

sub _move_num {
    my $self = $_[0];

    for (; @$files_ref; $$suffix_ref++) {
       $self->_mkpath ($$suffix_ref);

        for (my $i = 0; $i < $$f_limit_ref; $i++) {
            last unless my $file = shift @$files_ref;
            $self->_cp_unlink ($$f_names_case_ref{$file});
        }
    }
}

#
# _move_char()
#
# Basically the same operation as _move_num(),
# except that characteristic suffixes are attached
# to the subdirs.
#

sub _move_char {
    my $self = $_[0];

    foreach (sort keys %$f_names_char_ref) {
        $self->_mkpath ($_);

        while (@{$$f_names_char_ref{$_}}) {
            $self->_cp_unlink (shift @{$$f_names_char_ref{$_}});
        }
    }
}

#
# _dir_read (\$dir, \@items)
#
# Stores the items a directory consists of
# in a referenced array.
#

sub _dir_read {
    shift; my ($dir_ref, $items_ref) = @_;

    opendir D, $$dir_ref or
      croak qq~Could not open dir $$dir_ref for read-access: $!~;
    @$items_ref = readdir D; splice (@$items_ref, 0, 2);
    closedir D or croak qq~Could not close dir $$dir_ref: $!~;
}

#
# _mkpath ($suffix)
#
# Creates subdirs.
# Optional warning and overriding avoidance
# upon dir collision.
#

sub _mkpath {
    shift; my $suffix = $_[0];

    $$path_ref = File::Spec->catfile($$target_dir_ref, "$$ident_ref$$sep_ref$suffix");

    if (-e $$path_ref) {
        warn "$$path_ref exists.\n" if $$warn_ref eq 'all' || $$warn_ref eq 'dirs';
        return unless $$override_ref eq 'all' || $$override_ref eq 'dirs';
    }
    mkpath $$path_ref, $$verbose_ref or croak qq~Could not create subdir $$path_ref: $!~;
}

#
# _cp_unlink ($file)
#
# Copies files and unlinks them after they've been copied.
# Optional warning and overriding avoidance upon file collision.
#

sub _cp_unlink {
    shift; my $file = $_[0];

    my $path_full = File::Spec->catfile($$path_ref, $file);
    $file = File::Spec->catfile($$source_dir_ref, $file);

    if (-e $path_full) {
        warn "$path_full exists.\n" if $$warn_ref eq 'all' || $$warn_ref eq 'files';
        return unless $$override_ref eq 'all' || $$override_ref eq 'files';
    }
    cp ($file, $path_full) or
      croak qq~Could not copy file $file to subdir $path_full: $!~;
    unlink ($file) or croak qq~Could not remove file $file: $!~;

    $$ret_f_moved_ref++;
}

1;

__END__

=head1 EXAMPLES

Assuming the source directory I</var/tmp/src> contains 5 files:

    +- _12.tmp
    +- abc.tmp
    +- def.tmp
    +- ghi.tmp
    +- jkl.tmp

After splitting the directory tree in the target directory I</var/tmp/target> 
will look as following:

=head2 numeric splitting

    +- system-00001
    +-- _12.tmp
    +-- abc.tmp
    +- system-00002
    +-- def.tmp
    +-- ghi.tmp
    +- system-00003
    +-- jkl.tmp

=head2 characteristic splitting

    +- system-_
    +-- _12.tmp
    +- system-a
    +-- abc.tmp
    +- system-d
    +-- def.tmp
    +- system-g
    +-- ghi.tmp
    +- system-j
    +-- jkl.tmp

=head1 DEPENDENCIES

C<Perl 5.6.1>; C<File::Copy>, C<File::Path>, C<File::Spec>.

=head1 CAVEATS & BUGS

Recursive source directory processing is not supported;
existing directories within the source directory will be ignored.

Minus integer values in C<file_limit> and C<length> options
cause an odd behavior at runtime.

=head1 SEE ALSO

perl(1)

=head1 LICENSE

This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.

=head1 AUTHOR

Steven Schubiger

=cut