#
# $Id: Split.pm,v 0.35 2004/01/02 02:27:13 st.schubiger Exp $

package Dir::Split;

use 5.6.1;
use strict;
use warnings;

use Carp;
use File::Copy 'cp';
use File::Path;
use File::Spec;

our $VERSION = '0.35';

our (# external refs
        $Source_dir_ref, # scl
        $Target_dir_ref, # scl

     # opt refs
        $Mode_ref,     # scl
        $Override_ref, # scl
        $Warn_ref,     # scl
        $Verbose_ref,  # scl
        $Ident_ref,    # scl
        $F_limit_ref,  # scl
        $F_sort_ref,   # scl
        $Cont_num_ref, # scl
        $Sep_ref,      # scl
        $L_req_ref,    # scl
        $Case_ref,     # scl

     # data refs
        $Files_ref,        # arr
        $F_names_case_ref, # hsh
        $F_names_char_ref, # hsh
        $Path_ref,         # scl
        $Ret_f_moved_ref,  # scl
        $Suffix_ref,       # scl

     # data
        @Files,
        %F_names_case,
        %F_names_char,
        $Path,
        $Ret_f_moved,
        $Suffix,
    );

{
    my %Presets = (
                    warn =>  {  dir  =>    "exists (d)\t",
                                file =>    "exists (f)\t",
                    },
    );

    {
    no strict 'refs';

    ${__PACKAGE__.'::Warn'}{'dir'} ||= $Presets{'warn'}{'dir'};
    ${__PACKAGE__.'::Warn'}{'file'} ||= $Presets{'warn'}{'file'};
    }
}

=head1 NAME

Dir::Split - Split the files of a directory to subdirectories.

=head1 SYNOPSIS

 use Dir::Split;

 # numeric options
 %behavior = (  mode    =>    'num',

                options => {  verbose        =>           1,
                              warn           =>       'all',
                              override       =>      'none',
                },

                sub_dir => {  identifier     =>    'system',
                              file_limit     =>           2,
                              file_sort      =>         '+',
                },

                suffix  => {  continue_num   =>         'y',
                              separator      =>         '-',
                              length         =>           5,
                },
 );

 # create object
 $dir = Dir::Split->new (\%behavior);

 # set source & target dir
 $source_dir = '/tmp/src';
 $target_dir = '/tmp/target';

 # split files to subdirs
 $files_moved = $dir->split (\$source_dir, \$target_dir);

 # change the subdir identifier
 $dir->{'sub_dir'}{'identifier'} = 'test';

 # split files to subdirs
 $files_moved = $dir->split (\$source_dir, \$target_dir);

=head1 DESCRIPTION

C<Dir::Split> moves files from a source directory to numbered subdirectories within
a target directory.

=head1 METHODS

=head2 new ( \%behavior )

Object constructor.

 $dir = Dir::Split->new (\%behavior);

C<%behavior> contains the options that will influence the splitting process.

=cut

sub new {
    my ($pkg, $Opt_ref) = @_;
    croak q~usage: Dir::Split->new (\%behavior)~
      unless ref $Opt_ref eq 'HASH';

    my $class = ref ($pkg) || $pkg;
    my $obj = Dir::Split::_tie_var($Opt_ref);

    bless $obj, $class;
}

#
# _tie_var (\%hash)
#
# Dereferences a two-dimensional hash. A reference
# to a private hash will be returned.
#

sub _tie_var {
    my $Opt_ref = $_[0];

    my %my_hash;
    foreach my $key (keys %$Opt_ref) {
        if (ref $$Opt_ref{$key} eq 'HASH') {
            foreach ( keys %{$$Opt_ref{$key}} ) {
                $my_hash{$key}{$_} = $$Opt_ref{$key}{$_};
            }
        }
        else { $my_hash{$key} = $$Opt_ref{$key} }
    }

    \%my_hash;
};

=head2 split ( \$source_dir, \$target_dir )

Split files to subdirectories.

 $files_moved = $dir->split (\$source_dir, \$target_dir);

C<$source_dir> specifies the source directory.

C<$target_dir> specifies the target directory.

Returns the amount of files that have been successfully moved;
if none, it will return undef.

=cut

sub split {
    my $self = shift;
    ($Source_dir_ref, $Target_dir_ref) = @_;
    croak q~usage: $obj->split (\$source_dir, \$target_dir)~
      unless ref $Source_dir_ref eq 'SCALAR' && ref $Target_dir_ref eq 'SCALAR';

    # opts refs
    $Mode_ref = \$self->{'mode'};
    $Verbose_ref = \$self->{'options'}{'verbose'};
    $Override_ref = \$self->{'options'}{'override'};
    $Warn_ref = \$self->{'options'}{'warn'};
    $Ident_ref = \$self->{'sub_dir'}{'identifier'};
    $F_limit_ref = \$self->{'sub_dir'}{'file_limit'};
    $F_sort_ref = \$self->{'sub_dir'}{'file_sort'};
    $Cont_num_ref = \$self->{'suffix'}{'continue_num'};
    $Sep_ref = \$self->{'suffix'}{'separator'};
    $L_req_ref = \$self->{'suffix'}{'length'};
    $Case_ref = \$self->{'suffix'}{'case'};

    # data refs
    $Files_ref = \@Files;
    $F_names_case_ref = \%F_names_case;
    $F_names_char_ref = \%F_names_char;
    $Path_ref = \$Path;
    $Suffix_ref = \$Suffix;

    $self->_sanity_input;
    $self->_gather_files;
    my $sub_suffix = "_suffix_$$Mode_ref";
    $self->$sub_suffix;
    $self->_move;
}

#
# _sanity_input()
#
# Ensures that the interface input passes sanity;
# if not, croak.
#

sub _sanity_input {
    my %err_msg = ( source_dir  =>    'No source dir specified.',
                    target_dir  =>    'No target dir specified.',
                    mode        =>    'No mode specified.',
                    verbosity   =>    'No verbosity specified.',
                    warn        =>    'No warn mode specified.',
                    override    =>    'No override mode specified.',
                    ident       =>    'No subdir identifier specified.',
                    sep         =>    'No suffix separator specified.',
                    case        =>    'No suffix case mode specified.',
                    file_limit  =>    'No file limit specified.',
                    file_sort   =>    'No file sort mode specified.',
                    cont_num    =>    'No continuation mode specified.',
                    length      =>    'No suffix length specified.'
    );

    no warnings;
    my $err_input;
    {
        unless ($$Source_dir_ref) {
            $err_input = $err_msg{'source_dir'}; last;
        }
        unless ($$Target_dir_ref) {
            $err_input = $err_msg{'target_dir'}; last;
        }
        unless ($$Mode_ref eq 'char' || $$Mode_ref eq 'num') {
            $err_input = $err_msg{'mode'}; last;
        }
        unless ($$Verbose_ref =~ /^0|1$/) {
            $err_input = $err_msg{'verbosity'}; last;
        }
        unless ($$Warn_ref =~ /^none|file|dir|all$/) {
            $err_input = $err_msg{'warn'}; last;
        }
        unless ($$Override_ref =~ /^none|file|dir|all$/) {
            $err_input = $err_msg{'override'}; last;
        }
        unless ($$Ident_ref =~ /\w/) {
            $err_input = $err_msg{'ident'}; last;
        }
        unless ($$Sep_ref) {
            $err_input = $err_msg{'sep'}; last;
        }
        if ($$Mode_ref eq 'char') {
            unless ($$Case_ref eq 'lower' || $$Case_ref eq 'upper') {
                $err_input = $err_msg{'case'}; last;
            }
        }
        elsif ($$Mode_ref eq 'num') {
            unless ($$F_limit_ref =~ /\d/) {
                $err_input = $err_msg{'file_limit'}; last;
            }
            unless ($$F_sort_ref eq '+' || $$F_sort_ref eq '-') {
                $err_input = $err_msg{'file_sort'}; last;
            }
            unless ($$Cont_num_ref eq 'y' || $$Cont_num_ref eq 'n') {
                $err_input = $err_msg{'cont_num'}; last;
            }
            unless ($$L_req_ref =~ /\d/) {
                $err_input = $err_msg{'length'}; last;
            }

        }
    }
    croak $err_input if $err_input;
}

#
# _gather_files()
#
# Gathers the files the source directory
# consists of and sorts them if required.
#

sub _gather_files {
    my $self = $_[0];

    $self->_dir_read($Source_dir_ref, $Files_ref);
    @$Files_ref = grep !-d File::Spec->catfile($$Source_dir_ref, $_), @$Files_ref;

    if ($$Mode_ref eq 'num' && ($$F_sort_ref eq '+' || $$F_sort_ref eq '-') ) {
        # preserve case-sensitive filenames in hash.
        foreach (@$Files_ref) { $$F_names_case_ref{lc($_)} = $_ }
        @$Files_ref = map { lc } @$Files_ref;

        if ($$F_sort_ref eq '+') { @$Files_ref = sort @$Files_ref }
        elsif ($$F_sort_ref eq '-') { @$Files_ref = reverse @$Files_ref }
    }
}

#
# _suffix_char()
#
# Evaluates all filenames in the source directory
# and stores them in a hash associated with their
# leading character.
#

sub _suffix_char {
    foreach my $file (@$Files_ref) {
        ($_) = $file =~ /^(.)/;
        if ($_ =~ /\w/) {
            if ($$Case_ref eq 'upper') { $_ = uc ($_) }
            elsif ($$Case_ref eq 'lower') { $_ = lc ($_) }
        }
        push @{$$F_names_char_ref{$_}}, $file;
    }
    undef @$Files_ref;
}

#
# _suffix_num()
#
# Evaluates the highest existing subdir suffix number
# from the target directory in order to continue numbering
# where it stopped previously. The suffix will be summed up
# with zeros if required.
#

sub _suffix_num {
    my $self = $_[0];

    # surpress warnings
    $$Suffix_ref = 0;

    if ($$Cont_num_ref eq 'y') {
        my @dirs;
        $self->_dir_read($Target_dir_ref, \@dirs);
        @dirs = grep -d File::Spec->catfile($$Target_dir_ref, $_), @dirs;

        my $sep = quotemeta $$Sep_ref;
        foreach (@dirs) {
            # extract exist. identifier
            ($_) = /(.+?)$sep(.*)/;
            # increase suffix to highest number
            if ($$Ident_ref eq $_ && $2 !~ /[a-z]/i) {
               $$Suffix_ref = $2 if $2 > $$Suffix_ref;
            }
        }
    }

    # Initialize suffix or avoid suffix collision if != 0
    $$Suffix_ref++;

    if (length $$Suffix_ref != $$L_req_ref) {
        my $format = "%0.$$L_req_ref" . 'd';
        $$Suffix_ref = sprintf $format, $$Suffix_ref;
    }
}

#
# _move()
#
# Invokes either _move_num() or _move_char() by
# relying upon the opts.
#

sub _move {
    my $self = $_[0];

    $Ret_f_moved_ref = \$Ret_f_moved;

    # sub handler
    my $sub_move = "_move_$$Mode_ref";
    $self->$sub_move;

    $$Ret_f_moved_ref ? $$Ret_f_moved_ref : undef;
}

#
# _move_num()
#
# Moves the files to numeric subdirs.
#

sub _move_num {
    my $self = $_[0];

    for (; @$Files_ref; $$Suffix_ref++) {
       $self->_mkpath($$Suffix_ref);

        for (my $i = 0; $i < $$F_limit_ref; $i++) {
            last unless my $file = shift @$Files_ref;
            $self->_cp_unlink($$F_names_case_ref{$file});
        }
    }
}

#
# _move_char()
#
# Basically the same operation as _move_num(),
# except that characteristic suffixes are attached
# to the subdirs.
#

sub _move_char {
    my $self = $_[0];

    foreach (sort keys %$F_names_char_ref) {
        $self->_mkpath($_);

        while (@{$$F_names_char_ref{$_}}) {
            $self->_cp_unlink(shift @{$$F_names_char_ref{$_}});
        }
    }
}

#
# _dir_read (\$dir, \@items)
#
# Stores the items a directory consists of
# in a referenced array.
#

sub _dir_read {
    shift; my ($dir_ref, $items_ref) = @_;

    opendir D, $$dir_ref
      or croak qq~Could not open dir $$dir_ref for read-access: $!~;
    @$items_ref = readdir D; splice(@$items_ref, 0, 2);
    closedir D or croak qq~Could not close dir $$dir_ref: $!~;
}

#
# _mkpath ($Suffix)
#
# Creates subdirs.
# Upon dir collision optional warning
# and overriding avoidance.
#

sub _mkpath {
    shift; my $suffix = $_[0];

    $$Path_ref = File::Spec->catfile($$Target_dir_ref, "$$Ident_ref$$Sep_ref$suffix");

    if (-e $$Path_ref) {
        warn "$Dir::Split::Warn{'dir'} $$Path_ref\n"
          if $$Warn_ref eq 'all' || $$Warn_ref eq 'dir';
        return unless $$Override_ref eq 'all' || $$Override_ref eq 'dir';
    }
    mkpath $$Path_ref, $$Verbose_ref
      or croak qq~Could not create subdir $$Path_ref: $!~;
}

#
# _cp_unlink ($file)
#
# Copies files and unlinks them after they've been copied.
# Upon file collision optional warning and overriding avoidance.
#

sub _cp_unlink {
    shift; my $file = $_[0];

    my $path_full = File::Spec->catfile($$Path_ref, $file);
    $file = File::Spec->catfile($$Source_dir_ref, $file);

    if (-e $path_full) {
        warn "$Dir::Split::Warn{'file'} $path_full \n"
          if $$Warn_ref eq 'all' || $$Warn_ref eq 'file';
        return unless $$Override_ref eq 'all' || $$Override_ref eq 'file';
    }
    cp $file, $path_full or
      croak qq~Could not copy file $file to subdir $path_full: $!~;
    unlink $file or croak qq~Could not remove file $file: $!~;

    $$Ret_f_moved_ref++;
}

1;
__END__

=head1 OPTIONS

=head2 numeric splitting

Split files to subdirectories with a numeric suffix. Numbering may
be continued or started at 1 one each time. Options are explained below.
See EXAMPLES to gain an understanding how numeric splitting works.

    %behavior = (  mode    =>    'num',

                   options => {  verbose        =>           1,
                                 warn           =>       'all',
                                 override       =>      'none',
                   },

                   sub_dir => {  identifier     =>    'system',
                                 file_limit     =>           2,
                                 file_sort      =>         '+',
                   },

                   suffix  => {  separator      =>         '-',
                                 continue_num   =>         'y',
                                 length         =>           5,
                   },
    );

=head2 characteristic splitting

Split files to subdirectories with a characteristic suffix. Files
are assigned to subdirectories which suffixes correspond with the leading character
of the filenames. Options are explained below. See EXAMPLES to gain an understanding
how characteristic splitting works.

    %behavior = (  mode    =>    'char',

                   options => {  verbose     =>           1,
                                 warn        =>       'all',
                                 override    =>      'none',
                   },

                   sub_dir => {  identifier  =>    'system',
                   },

                   suffix  => {  separator   =>         '-',
                                 case        =>     'lower',
                   },

    );

=over 4

=item generic behavior

=over 4

=item C<mode>

string - either I<num> for numeric or I<char> for characteristic.

=item C<options/verbose>

integer - verbosity; if enabled, mkpath will output the pathes
on creating subdirectories.

 MODES
   0  disabled
   1  enabled

=item C<options/warn>

string - warn upon the encounter of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item C<options/override>

string - override of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item C<sub_dir/identifier>

string - prefix of each subdirectory created.

=item C<suffix/separator>

string - separates the identifier from the suffix.

=back

=item numeric behavior

=over 4

=item C<sub_dir/file_limit>

integer - limit of files per subdirectory.

=item C<sub_dir/file_sort>

string - sort order of files.

 MODES
   +  ascending
   -  descending

=item C<suffix/continue_num>

string - numbering continuation (will start at 1 if no).

 MODES
   y  yes
   n  no

=item C<suffix/length>

integer - amount of zeros to be added to the suffix.

=back

Differing identifiers or separators do affect the numbering e.g. I<systm-> does
not equal I<system->, I<system_> does not equal I<system->. C<file_limit>, C<file_sort>
and C<length> options have no influence on decisions whether the numbering shall
be continued, whereas C<identifier>, C<separator> and C<continue_num> do.

=item characteristic behavior

=over 4

=item C<suffix/case>

string - lower/upper case of the suffix.

 MODES
   lower
   upper

=back

=back

=head2 presets

The warning messages upon the encounter of existing files/dirs.

    %Dir::Split::Warn = (  dir  =>    "exists (d)\t",
                           file =>    "exists (f)\t",
    );

=head1 EXAMPLES

Assuming the source directory I</tmp/src> contains 5 files:

    +- _12.tmp
    +- abc.tmp
    +- def.tmp
    +- ghi.tmp
    +- jkl.tmp

After splitting the directory tree in the target directory I</tmp/target>
will look as following:

=head2 numeric splitting

    +- system-00001
    +-- _12.tmp
    +-- abc.tmp
    +- system-00002
    +-- def.tmp
    +-- ghi.tmp
    +- system-00003
    +-- jkl.tmp

=head2 characteristic splitting

    +- system-_
    +-- _12.tmp
    +- system-a
    +-- abc.tmp
    +- system-d
    +-- def.tmp
    +- system-g
    +-- ghi.tmp
    +- system-j
    +-- jkl.tmp

=head1 DEPENDENCIES

C<Perl 5.6.1>; C<File::Copy>, C<File::Path>, C<File::Spec>.

=head1 CAVEATS & BUGS

Recursive source directory processing is not supported;
existing directories within the source directory will be ignored.

Minus integer values in C<file_limit> and C<length> options
cause an odd behavior at runtime.

=head1 SEE ALSO

perl(1)

=head1 LICENSE

This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.

=head1 AUTHOR

Steven Schubiger

=cut
