#
# $Id: Split.pm,v 0.36 2004/01/02 23:43:01 st.schubiger Exp $

package Dir::Split;

use 5.6.1;
use strict;
use warnings;

use Carp;
use File::Copy 'cp';
use File::Path;
use File::Spec;

our $VERSION = '0.36';

our (# external refs
        $Source_dir_ref, # scl
        $Target_dir_ref, # scl

     # opt refs
        $Mode_ref,       # scl
        $Override_ref,   # scl
        $Warn_ref,       # scl
        $Verbose_ref,    # scl
        $Ident_ref,      # scl
        $F_limit_ref,    # scl
        $F_sort_ref,     # scl
        $Num_contin_ref, # scl
        $Sep_ref,        # scl
        $L_req_ref,      # scl
        $Case_ref,       # scl

     # data refs
        $Files_ref,        # arr
        $F_names_case_ref, # hsh
        $F_names_char_ref, # hsh
        $Path_ref,         # scl
        $Ret_f_moved_ref,  # scl
        $Suffix_ref,       # scl

     # data
        @Files,
        %F_names_case,
        %F_names_char,
        $Path,
        $Ret_f_moved,
        $Suffix,

     # constants
        $PACKAGE,
);

=head1 NAME

Dir::Split - Split the files of a directory to subdirectories.

=head1 SYNOPSIS

 use Dir::Split;

 # numeric options
 %behavior = (  mode    =>    'num',

                options => {  verbose        =>           1,
                              warn           =>       'all',
                              override       =>      'none',
                },

                sub_dir => {  identifier     =>    'system',
                              file_limit     =>           2,
                              file_sort      =>         '+',
                },

                suffix  => {  separator      =>         '-',
                              continue       =>         'y',
                              length         =>           5,
                },
 );

 # create object
 $dir = Dir::Split->new (\%behavior);

 # set source & target dir
 $source_dir = '/tmp/src';
 $target_dir = '/tmp/target';

 # split files to subdirs
 $files_moved = $dir->split (\$source_dir, \$target_dir);

=head1 DESCRIPTION

C<Dir::Split> moves files from a source directory to numbered subdirectories within
a target directory.

=head1 METHODS

=head2 new ( \%behavior )

Object constructor.

 $dir = Dir::Split->new (\%behavior);

C<%behavior> contains the options that will influence the splitting process.

=cut

sub new {
    my ($pkg, $Opt_ref) = @_;
    croak q~usage: Dir::Split->new (\%behavior)~
      unless ref $Opt_ref eq 'HASH';

    my $class = ref ($pkg) || $pkg;
    my $obj = Dir::Split::_tie_var($Opt_ref);

    return bless $obj, $class;
}

#
# _tie_var (\%hash)
#
# Dereferences a two-dimensional hash.
#

sub _tie_var {
    my $Opt_ref = $_[0];

    my %my_hash;
    foreach my $key (keys %$Opt_ref) {
        if (ref $$Opt_ref{$key} eq 'HASH') {
            foreach ( keys %{$$Opt_ref{$key}} ) {
                $my_hash{$key}{$_} = $$Opt_ref{$key}{$_};
            }
        }
        else { $my_hash{$key} = $$Opt_ref{$key} }
    }

    return \%my_hash;
};

=head2 split ( \$source_dir, \$target_dir )

Split files to subdirectories.

 $files_moved = $dir->split (\$source_dir, \$target_dir);

C<$source_dir> specifies the source directory.

C<$target_dir> specifies the target directory.

Returns the amount of files that have been successfully moved;
if none, it will return undef.

=cut

sub split {
    my $self = shift;
    ($Source_dir_ref, $Target_dir_ref) = @_;
    croak q~usage: $obj->split (\$source_dir, \$target_dir)~
      unless ref $Source_dir_ref eq 'SCALAR' && ref $Target_dir_ref eq 'SCALAR';

    $self->_init_refs();
    $self->_sanity_input();
    $self->_init_presets();

    $self->_gather_files();
    my $sub_suffix = "_suffix_$$Mode_ref";
    $self->$sub_suffix();
    $self->_move();

    # amount of files that have been moved
    return $$Ret_f_moved_ref ? $$Ret_f_moved_ref : undef;
}

#
# _init_refs()
#
# Initializes references.
#

sub _init_refs {
    my $self = $_[0];

    # opts refs
    $Mode_ref = \$self->{'mode'};
    $Verbose_ref = \$self->{'options'}{'verbose'};
    $Override_ref = \$self->{'options'}{'override'};
    $Warn_ref = \$self->{'options'}{'warn'};
    $Ident_ref = \$self->{'sub_dir'}{'identifier'};
    $F_limit_ref = \$self->{'sub_dir'}{'file_limit'};
    $F_sort_ref = \$self->{'sub_dir'}{'file_sort'};
    $Num_contin_ref = \$self->{'suffix'}{'continue'};
    $Sep_ref = \$self->{'suffix'}{'separator'};
    $L_req_ref = \$self->{'suffix'}{'length'};
    $Case_ref = \$self->{'suffix'}{'case'};

    # data refs
    $Files_ref = \@Files;
    $F_names_case_ref = \%F_names_case;
    $F_names_char_ref = \%F_names_char;
    $Path_ref = \$Path;
    $Suffix_ref = \$Suffix;
}

#
# _sanity_input()
#
# Ensures that options input passes sanity.
#

sub _sanity_input {
    my %err_msg = ( source_dir  =>    'No source dir specified.',
                    target_dir  =>    'No target dir specified.',
                    mode        =>    'No mode specified.',
                    verbosity   =>    'No verbosity specified.',
                    warn        =>    'No warn mode specified.',
                    override    =>    'No override mode specified.',
                    ident       =>    'No subdir identifier specified.',
                    sep         =>    'No suffix separator specified.',
                    case        =>    'No suffix case mode specified.',
                    file_limit  =>    'No file limit specified.',
                    file_sort   =>    'No file sort mode specified.',
                    continue    =>    'No continuation mode specified.',
                    length      =>    'No suffix length specified.'
    );

    no warnings;
    my $err_input;
    {
        unless ($$Source_dir_ref) {
            $err_input = $err_msg{'source_dir'}; last;
        }
        unless ($$Target_dir_ref) {
            $err_input = $err_msg{'target_dir'}; last;
        }
        unless ($$Mode_ref eq 'char' || $$Mode_ref eq 'num') {
            $err_input = $err_msg{'mode'}; last;
        }
        unless ($$Verbose_ref =~ /^0|1$/) {
            $err_input = $err_msg{'verbosity'}; last;
        }
        unless ($$Warn_ref =~ /^none|file|dir|all$/) {
            $err_input = $err_msg{'warn'}; last;
        }
        unless ($$Override_ref =~ /^none|file|dir|all$/) {
            $err_input = $err_msg{'override'}; last;
        }
        unless ($$Ident_ref =~ /\w/) {
            $err_input = $err_msg{'ident'}; last;
        }
        unless ($$Sep_ref) {
            $err_input = $err_msg{'sep'}; last;
        }
        if ($$Mode_ref eq 'char') {
            unless ($$Case_ref eq 'lower' || $$Case_ref eq 'upper') {
                $err_input = $err_msg{'case'}; last;
            }
        }
        elsif ($$Mode_ref eq 'num') {
            unless ($$F_limit_ref =~ /\d/) {
                $err_input = $err_msg{'file_limit'}; last;
            }
            unless ($$F_sort_ref eq '+' || $$F_sort_ref eq '-') {
                $err_input = $err_msg{'file_sort'}; last;
            }
            unless ($$Num_contin_ref eq 'y' || $$Num_contin_ref eq 'n') {
                $err_input = $err_msg{'continue'}; last;
            }
            unless ($$L_req_ref =~ /\d/) {
                $err_input = $err_msg{'length'}; last;
            }

        }
    }
    croak $err_input if $err_input;
}

#
# _init_presets()
#
# Initializes var presets.
#

sub _init_presets {
    my %Presets = (
                    warn =>  {  dir  =>    "exists (d)\t",
                                file =>    "exists (f)\t",
                    },
    );

    no strict 'refs';
    $PACKAGE = __PACKAGE__.'::';

    ${$PACKAGE.'warn'}{'dir'} ||= $Presets{'warn'}{'dir'};
    ${$PACKAGE.'warn'}{'file'} ||= $Presets{'warn'}{'file'};
}

#
# _gather_files()
#
# Gathers the files from the source directory.
#

sub _gather_files {
    my $self = $_[0];

    $self->_dir_read($Source_dir_ref, $Files_ref);
    @$Files_ref = grep !-d File::Spec->catfile($$Source_dir_ref, $_), @$Files_ref;

    # if numeric splitting, sort the files.
    if ($$Mode_ref eq 'num' && ($$F_sort_ref eq '+' || $$F_sort_ref eq '-')) {
        $self->_sort_files();
    }
}

#
# _sort_files()
#
# Sorts the files if numeric splitting.
#

sub _sort_files {
    # preserve case-sensitive filenames.
    foreach (@$Files_ref) {
        $$F_names_case_ref{lc($_)} = $_;
    }
    @$Files_ref = map { lc } @$Files_ref;

    if ($$F_sort_ref eq '+') { @$Files_ref = sort @$Files_ref }
    elsif ($$F_sort_ref eq '-') { @$Files_ref = reverse @$Files_ref }
}

#
# _suffix_num()
#
# Sub handler for numeric suffixes.
#

sub _suffix_num {
    my $self = $_[0];

    if ($$Num_contin_ref eq 'y') { $self->_suffix_num_contin() }

    $self->_suffix_num_sum_up();
}

#
# _suffix_num_contin()
#
# Evaluates the highest existing subdir suffix number
# from the target directory.
#

sub _suffix_num_contin {
    my $self = $_[0];

    my @dirs;
    $self->_dir_read($Target_dir_ref, \@dirs);
    @dirs = grep -d File::Spec->catfile($$Target_dir_ref, $_), @dirs;

    # surpress warnings
    $$Suffix_ref = 0;

    my $sep = quotemeta $$Sep_ref;
    foreach (@dirs) {
        # extract exist. identifier
        ($_) = /(.+?)$sep(.*)/;
        # increase suffix to highest number
        if ($$Ident_ref eq $_ && $2 !~ /[a-z]/i) {
            $$Suffix_ref = $2 if $2 > $$Suffix_ref;
        }
    }
}

#
# _suffix_num_sum_up()
#
# Sums the numeric suffix with zeros up if required.
#

sub _suffix_num_sum_up {
    # initialize suffix or avoid suffix collision if != 0
    $$Suffix_ref++;

    if (length $$Suffix_ref != $$L_req_ref) {
        my $format = "%0.$$L_req_ref" . 'd';
        $$Suffix_ref = sprintf $format, $$Suffix_ref;
    }
}

#
# _suffix_char()
#
# Evaluates all filenames in the source directory
# and stores them in a hash associated with their
# leading character.
#

sub _suffix_char {
    foreach my $file (@$Files_ref) {
        ($_) = $file =~ /^(.)/;
        if ($_ =~ /\w/) {
            if ($$Case_ref eq 'upper') { $_ = uc ($_) }
            elsif ($$Case_ref eq 'lower') { $_ = lc ($_) }
        }
        push @{$$F_names_char_ref{$_}}, $file;
    }
    undef @$Files_ref;
}

#
# _move()
#
# Sub handler for moving files.
#

sub _move {
    my $self = $_[0];

    $Ret_f_moved_ref = \$Ret_f_moved;

    # sub handler
    my $sub_move = "_move_$$Mode_ref";
    $self->$sub_move;
}

#
# _move_num()
#
# Moves the files to numeric subdirs.
#

sub _move_num {
    my $self = $_[0];

    for (; @$Files_ref; $$Suffix_ref++) {
       $self->_mkpath($$Suffix_ref);

        for (my $i = 0; $i < $$F_limit_ref; $i++) {
            last unless my $file = shift @$Files_ref;
            $self->_cp_unlink($$F_names_case_ref{$file});
        }
    }
}

#
# _move_char()
#
# Moves the files to characteristic subdirs.
#

sub _move_char {
    my $self = $_[0];

    foreach (sort keys %$F_names_char_ref) {
        $self->_mkpath($_);

        while (@{$$F_names_char_ref{$_}}) {
            $self->_cp_unlink(shift @{$$F_names_char_ref{$_}});
        }
    }
}

#
# _dir_read (\$dir, \@items)
#
# Reads the files of a directory.
#

sub _dir_read {
    shift; my ($dir_ref, $items_ref) = @_;

    opendir D, $$dir_ref
      or croak qq~Could not open dir $$dir_ref for read-access: $!~;
    @$items_ref = readdir D; splice(@$items_ref, 0, 2);
    closedir D or croak qq~Could not close dir $$dir_ref: $!~;
}

#
# _mkpath ($suffix)
#
# Creates subdirs.
# Upon dir collision optional warning
# and overriding avoidance.
#

sub _mkpath {
    my ($self, $suffix) = @_;

    $$Path_ref = File::Spec->catfile($$Target_dir_ref, "$$Ident_ref$$Sep_ref$suffix");

    return if $self->_exists_and_not_override('dir', $$Path_ref);

    mkpath $$Path_ref, $$Verbose_ref
      or croak qq~Could not create subdir $$Path_ref: $!~;
}

#
# _cp_unlink ($file)
#
# Copies files and unlinks them.
# Upon file collision optional warning
# and overriding avoidance.
#

sub _cp_unlink {
    my ($self, $file) = @_;

    my $path_full = File::Spec->catfile($$Path_ref, $file);
    $file = File::Spec->catfile($$Source_dir_ref, $file);

    return if $self->_exists_and_not_override('file', $path_full);

    cp $file, $path_full or
      croak qq~Could not copy file $file to subdir $path_full: $!~;
    unlink $file or croak qq~Could not remove file $file: $!~;

    $$Ret_f_moved_ref++;
}

#
# _exists_and_not_override ($item, $path)
#
# Check for existing files/paths and warn accordingly.
#

sub _exists_and_not_override {
    shift; my ($item, $path) = @_;

    if (-e $path) {
        if ($$Warn_ref eq 'all' || $$Warn_ref eq $item) {
            no strict 'refs';
            warn "${$PACKAGE.'warn'}{$item} $path\n";
        }

        return 1 unless $$Override_ref eq 'all' || $$Override_ref eq $item;
    }

    return 0;
}

1;
__END__

=head1 OPTIONS

=head2 numeric

Split files to subdirectories with a numeric suffix. Numbering may
be continued or started at 1 one each time. Options are explained below.
See EXAMPLES to gain an understanding how numeric splitting works.

    %behavior = (  mode    =>    'num',

                   options => {  verbose        =>           1,
                                 warn           =>       'all',
                                 override       =>      'none',
                   },

                   sub_dir => {  identifier     =>    'system',
                                 file_limit     =>           2,
                                 file_sort      =>         '+',
                   },

                   suffix  => {  separator      =>         '-',
                                 continue       =>         'y',
                                 length         =>           5,
                   },
    );

=over 4

=item B<mode>

string - either I<num> for numeric or I<char> for characteristic.

=item B<options/verbose>

integer - verbosity; if enabled, mkpath will output the pathes
on creating subdirectories.

 MODES
   0  disabled
   1  enabled

=item B<options/warn>

string - warn upon the encounter of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item B<options/override>

string - override of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item B<sub_dir/identifier>

string - prefix of each subdirectory created.

=item B<sub_dir/file_limit>

integer - limit of files per subdirectory.

=item B<sub_dir/file_sort>

string - sort order of files.

 MODES
   +  ascending
   -  descending

=item B<suffix/separator>

string - separates the identifier from the suffix.

=item B<suffix/continue>

string - numbering continuation (will start at 1 if no).

 MODES
   y  yes
   n  no

=item B<suffix/length>

integer - amount of zeros to be added to the suffix.

Differing identifiers or separators do affect the numbering e.g. I<systm-> does
not equal I<system->, I<system_> does not equal I<system->. B<file_limit>, B<file_sort>
and B<length> options have no influence on decisions whether the numbering shall
be continued, whereas B<identifier>, B<separator> and B<continue> do.

=back

=head2 characteristic

Split files to subdirectories with a characteristic suffix. Files
are assigned to subdirectories which suffixes correspond with the leading character
of the filenames. Options are explained below. See EXAMPLES to gain an understanding
how characteristic splitting works.

    %behavior = (  mode    =>    'char',

                   options => {  verbose     =>           1,
                                 warn        =>       'all',
                                 override    =>      'none',
                   },

                   sub_dir => {  identifier  =>    'system',
                   },

                   suffix  => {  separator   =>         '-',
                                 case        =>     'lower',
                   },

    );

=over 4

=item B<mode>

string - either I<num> for numeric or I<char> for characteristic.

=item B<options/verbose>

integer - verbosity; if enabled, mkpath will output the pathes
on creating subdirectories.

 MODES
   0  disabled
   1  enabled

=item B<options/warn>

string - warn upon the encounter of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item B<options/override>

string - override of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item B<sub_dir/identifier>

string - prefix of each subdirectory created.

=item B<suffix/separator>

string - separates the identifier from the suffix.

=item B<suffix/case>

string - lower/upper case of the suffix.

 MODES
   lower
   upper

=back

=head2 presets

The warning messages upon the encounter of existing files/dirs.

    %Dir::Split::warn = (  dir  =>    "exists (d)\t",
                           file =>    "exists (f)\t",
    );

=head1 EXAMPLES

Assuming the source directory I</tmp/src> contains 5 files:

    +- _12.tmp
    +- abc.tmp
    +- def.tmp
    +- ghi.tmp
    +- jkl.tmp

After splitting the directory tree in the target directory I</tmp/target>
will look as following:

=head2 numeric splitting

    +- system-00001
    +-- _12.tmp
    +-- abc.tmp
    +- system-00002
    +-- def.tmp
    +-- ghi.tmp
    +- system-00003
    +-- jkl.tmp

=head2 characteristic splitting

    +- system-_
    +-- _12.tmp
    +- system-a
    +-- abc.tmp
    +- system-d
    +-- def.tmp
    +- system-g
    +-- ghi.tmp
    +- system-j
    +-- jkl.tmp

=head1 DEPENDENCIES

C<Perl 5.6.1>; C<File::Copy>, C<File::Path>, C<File::Spec>.

=head1 CAVEATS & BUGS

Recursive source directory processing is not supported;
existing directories within the source directory will be ignored.

Minus integer values in B<file_limit> and B<length> options
cause an odd behavior at runtime.

=head1 SEE ALSO

perl(1)

=head1 LICENSE

This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.

=head1 AUTHOR

Steven Schubiger

=cut
