#
# $Id: Split.pm,v 0.38 2004/01/04 22:03:23 sts Exp $

package Dir::Split;

our $VERSION = '0.38';

use 5.6.1;
use strict;
use warnings;

use File::Copy 'cp';
use File::Path;
use File::Spec;

our (# external data
        @exists,
        %failure,
        %track,

     # data
        @Files,
        %F_names_case,
        %F_names_char,
        $Path,
        $Ret_Status,
        $Suffix,
);

sub ACTION { 1 }
sub NO_ACTION { 0 }
sub EXISTS { -1 }
sub FAILURE { -2 }

sub croak {
    require Carp;
    &Carp::croak;
}

=head1 NAME

Dir::Split - Split files of a directory to subdirectories.

=head1 SYNOPSIS

 use Dir::Split;

 %options = (   mode    =>    'num',

                source  =>    '/source',
                target  =>    '/target',

                options => {  verbose      =>         1,
                              override     =>         0,
                },
                sub_dir => {  identifier   =>     'sub',
                              file_limit   =>         2,
                              file_sort    =>       '+',
                },
                suffix  => {  separator    =>       '-',
                              continue     =>         1,
                              length       =>         5,
                },
 );


 $dir = Dir::Split->new (\%options);

 $return = $dir->split;

=head1 DESCRIPTION

C<Dir::Split> moves files to either numbered or characteristic subdirectories.

=head1 CONSTRUCTOR

=head2 new

Creates an object. C<%options> contains the key / value pairs which will influence the splitting process.

 $dir = Dir::Split->new (\%options);

=cut

sub new {
    my ($pkg, $Opt_ref) = @_;
    croak q~usage: Dir::Split->new (\%options)~
      unless ref $Opt_ref eq 'HASH';

    my $class = ref ($pkg) || $pkg;
    my $obj = Dir::Split::_tie_var($Opt_ref);

    return bless $obj, $class;
}

#
# _tie_var (\%hash)
#
# Assigns the interface options to the object.
#

sub _tie_var {
    my $Opt_ref = $_[0];

    my %assign = (  mode    =>    'mode',

                    source  =>    'source',
                    target  =>    'target',

                    options => {  verbose     =>      'verbose',
                                  override    =>     'override',
                    },
                    sub_dir => {  identifier  =>        'ident',
                                  file_limit  =>      'f_limit',
                                  file_sort   =>       'f_sort',
                    },
                    suffix  => {  separator   =>          'sep',
                                  continue    =>   'num_contin',
                                  length      =>       'length',
                                  case        =>         'case',
                    },
    );
    my %assigned;
    foreach my $key (keys %$Opt_ref) {
        if (ref $$Opt_ref{$key} eq 'HASH') {
            foreach ( keys %{$$Opt_ref{$key}} ) {
                $assigned{$assign{$key}{$_}} = $$Opt_ref{$key}{$_};
            }
        }
        else { $assigned{$assign{$key}} = $$Opt_ref{$key} }
    }
    return \%assigned;
};

=head1 METHODS

=head2 split

Split files to subdirectories.

 $return = $dir->split;

It is of tremendous importance to notice that checking the return code is a B<must>.
Leaving the return code untouched will not allow appropriate gathering of harmless
debug data (such as existing files) and system operations that failed. C<Dir::Split>
does only report verbose output of mkpath to STDOUT. See I<OPTIONS / debug> on how to
become aware of existing files and failed system operations (I<copy> & I<unlink>).

B<RETURN CODES>

=over 4

=item (1)

Files moved successfully.

=item (0)

No action.

=item (-1)

Exists.

I<(see OPTIONS / debug / existing)>.

=item (-2)

Failure.

I<(see OPTIONS / debug / failures)>.

=back

=cut

sub split {
    my $o = $_[0];

    $o->_sanity_input();
    $o->_gather_files();

    # files found, split.
    if (@Files) {
        $Ret_Status = ACTION;

        #$o->_init_presets(); # unused atm.

        # engine
        $o->_sort_files() if $o->{'mode'} eq 'num';
        my $sub_suffix = "_suffix_$o->{'mode'}";
        $o->$sub_suffix();
        $o->_move();
    }
    # no files? exit.
    else { $Ret_Status = NO_ACTION }

    return $Ret_Status;
}

#
# _sanity_input()
#
# Ensures that interface input passes sanity.
#

sub _sanity_input {
    my $o = $_[0];

    my %err_msg = (  mode        =>    'No mode specified.',
                     source      =>    'No source dir specified.',
                     target      =>    'No target dir specified.',
                     verbose     =>    'No verbosity specified.',
                     override    =>    'No override mode specified.',
                     ident       =>    'No subdir identifier specified.',
                     sep         =>    'No suffix separator specified.',
                     f_limit     =>    'No file limit specified.',
                     f_sort      =>    'No file sort mode specified.',
                     num_contin  =>    'No continuation mode specified.',
                     length      =>    'No suffix length specified.',
                     case        =>    'No suffix case mode specified.',
    );

    my $err_input;
    {   # generic opts
        unless ($o->{'mode'} eq 'num' || $o->{'mode'} eq 'char') {
            $err_input = $err_msg{'mode'}; last;
        }
        unless ($o->{'source'}) {
            $err_input = $err_msg{'source'}; last;
        }
        unless ($o->{'target'}) {
            $err_input = $err_msg{'target'}; last;
        }
        unless ($o->{'verbose'} =~ /^0|1$/) {
            $err_input = $err_msg{'verbose'}; last;
        }
        unless ($o->{'override'} =~ /^0|1$/) {
            $err_input = $err_msg{'override'}; last;
        }
        unless ($o->{'ident'} =~ /\w/) {
            $err_input = $err_msg{'ident'}; last;
        }
        unless ($o->{'sep'}) {
            $err_input = $err_msg{'sep'}; last;
        }
        # numeric opts
        if ($o->{'mode'} eq 'num') {
            unless ($o->{'f_limit'} > 0) {
                $err_input = $err_msg{'f_limit'}; last;
            }
            unless ($o->{'f_sort'} eq '+' || $o->{'f_sort'} eq '-') {
                $err_input = $err_msg{'f_sort'}; last;
            }
            unless ($o->{'num_contin'} =~ /^0|1$/) {
                $err_input = $err_msg{'num_contin'}; last;
            }
            unless ($o->{'length'} > 0) {
                $err_input = $err_msg{'length'}; last;
            }
        }
        # characteristic opts
        elsif ($o->{'mode'} eq 'char') {
            unless ($o->{'case'} eq 'lower' || $o->{'case'} eq 'upper') {
                $err_input = $err_msg{'case'}; last;
            }
        }
    }
    croak $err_input if $err_input;
}

#
# _gather_files()
#
# Gathers the files from the source directory.
#

sub _gather_files {
    my $o = $_[0];

    $o->_dir_read($o->{'source'}, \@Files);
    @Files = grep !-d File::Spec->catfile($o->{'source'}, $_), @Files;

    $track{'source'}{'files'} = scalar @Files;
}

#
# _init_presets()
#
# Initializes var presets.
#

#sub _init_presets {
#}

#
# _sort_files()
#
# Sorts the files if numeric splitting.
#

sub _sort_files {
    my $o = $_[0];

    if ($o->{'f_sort'} eq '+' || $o->{'f_sort'} eq '-') {
        # preserve case-sensitive filenames.
        foreach (@Files) {
           $F_names_case{lc($_)} = $_;
        }
        @Files = map lc, @Files;

        if ($o->{'f_sort'} eq '+') { @Files = sort @Files }
        elsif ($o->{'f_sort'} eq '-') { @Files = reverse @Files }
    }
}

#
# _suffix_num()
#
# Sub handler for numeric suffixes.
#

sub _suffix_num {
    my $o = $_[0];

    $o->_suffix_num_contin() if $o->{'num_contin'} == 1;
    $o->_suffix_num_sum_up();
}

#
# _suffix_num_contin()
#
# Evaluates the highest existing subdir suffix number.
#

sub _suffix_num_contin {
    my $o = $_[0];

    my @dirs;
    $o->_dir_read($o->{'target'}, \@dirs);
    @dirs = grep -d File::Spec->catfile($o->{'target'}, $_), @dirs;

    # surpress warnings
    $Suffix = 0;
    my $sep = quotemeta $o->{'sep'};
    foreach (@dirs) {
        # extract exist. identifier
        ($_) = /(.+?)$sep(.*)/;
        my $suff_cmp = $2;
        # increase suffix to highest number
        if ($o->{'ident'} eq $_ && $suff_cmp =~ /[0-9]/) {
            $Suffix = $suff_cmp if $suff_cmp > $Suffix;
        }
    }
}

#
# _suffix_num_sum_up()
#
# Sums the numeric suffix with zeros up if required.
#

sub _suffix_num_sum_up {
    my $o = $_[0];

    $Suffix++;
    if (length $Suffix < $o->{'length'}) {
        my $format = "%0.$o->{'length'}".'d';
        $Suffix = sprintf $format, $Suffix;
    }
}

#
# _suffix_char()
#
# Evaluates all filenames in the source directory
# and stores them in a hash associated with their
# leading character.
#

sub _suffix_char {
    my $o = $_[0];

    foreach my $file (@Files) {
        ($_) = $file =~ /^(.)/;
        if ($_ =~ /\w/) {
            if ($o->{'case'} eq 'upper') { $_ = uc $_ }
            elsif ($o->{'case'} eq 'lower') { $_ = lc $_ }
        }
        push @{$F_names_char{$_}}, $file;
    }
    undef @Files;
}

#
# _move()
#
# Sub handler for moving files.
#

sub _move {
    my $o = $_[0];

    # initalize tracking
    $track{'target'}{'dirs'} = 0;
    $track{'target'}{'files'} = 0;

    my $sub_move = "_move_$o->{'mode'}";
    $o->$sub_move();
}

#
# _move_num()
#
# Moves the files to numeric subdirs.
#

sub _move_num {
    my $o = $_[0];

    for (; @Files; $Suffix++) {
       $o->_mkpath($Suffix);

        for (my $i = 0; $i < $o->{'f_limit'}; $i++) {
            last unless my $file = shift @Files;
            $o->_cp_unlink($F_names_case{$file});
        }
    }
}

#
# _move_char()
#
# Moves the files to characteristic subdirs.
#

sub _move_char {
    my $o = $_[0];

    foreach (sort keys %F_names_char) {
        $o->_mkpath($_);

        while (my $file = shift @{$F_names_char{$_}}) {
            $o->_cp_unlink($file);
        }
    }
}

#
# _dir_read (\$dir, \@files)
#
# Reads the files of a directory.
#

sub _dir_read {
    shift; my ($dir, $files_ref) = @_;

    opendir D, $dir
      or croak qq~Could not open dir $dir for read-access: $!~;
    @$files_ref = readdir D; splice @$files_ref, 0, 2;
    closedir D or croak qq~Could not close dir $dir: $!~;
}

#
# _mkpath ($suffix)
#
# Creates subdirs.
#

sub _mkpath {
    my ($o, $suffix) = @_;

    $Path = File::Spec->catfile($o->{'target'}, "$o->{'ident'}$o->{'sep'}$suffix");

    return if -e $Path;
    mkpath $Path, $o->{'verbose'} or
      die "Directory $Path could not be created: $!\n";

    $track{'target'}{'dirs'}++;
}

#
# _cp_unlink ($file)
#
# Copies files and unlinks them.
# Upon existing files / failures debug data.
#

sub _cp_unlink {
    my ($o, $file) = @_;

    my $path_full = File::Spec->catfile($Path, $file);
    $file = File::Spec->catfile($o->{'source'}, $file);

    if ($o->_exists_and_not_override($path_full)) {
        push @exists, $path_full;
        return;
    }

    unless (cp $file, $path_full) {
        push @{$failure{'copy'}}, $path_full;
        $Ret_Status = FAILURE unless $Ret_Status eq FAILURE;
        return;
    }
    unless (unlink $file) {
        push @{$failure{'unlink'}}, $file;
        $Ret_Status = FAILURE unless $Ret_Status eq FAILURE;
        return;
    }

    $track{'target'}{'files'}++;
}

#
# _exists_and_not_override ($item, $path)
#
# Looks out for existing files.
#

sub _exists_and_not_override {
    my ($o, $path) = @_;

    if (-e $path) {
        if ($o->{'override'} != 1) {
            if ($Ret_Status ne FAILURE && $Ret_Status ne EXISTS) {
                $Ret_Status = EXISTS;
            }
            return 1;
        }
    }

    return 0;
}

1;
__END__

=head1 OPTIONS

=head2 type indicators

=over 4

=item (c)

character

=item (i)

integer

=item (s)

string

=back

=head2 numeric

Split files to subdirectories with a numeric suffix.

    %options = (  mode    =>    'num',

                  source  =>    '/source',
                  target  =>    '/target',

                  options => {  verbose     =>         1,
                                override    =>         0,
                  },
                  sub_dir => {  identifier  =>     'sub',
                                file_limit  =>         2,
                                file_sort   =>       '+',
                  },
                  suffix  => {  separator   =>       '-',
                                continue    =>         1,
                                length      =>         5,
                  },
    );

B<options> (mandatory)

=over 4

=over 4

=item *

=item B<mode>

(s) - I<num> for numeric.

=item B<source>

(s) - source directory.

=item B<target>

(s) - target directory.

=item B<options / verbose>

(i) - if enabled, mkpath will output the pathes on creating
subdirectories.

 MODES
   1  enabled
   0  disabled

=item B<options / override>

(i) - overriding of existing files.

 MODES
   1  enabled
   0  disabled

=item B<sub_dir / identifier>

(s) - prefix of each subdirectory created.

=item B<sub_dir / file_limit>

(i) - limit of files per subdirectory.

=item B<sub_dir / file_sort>

(c) - sort order of files.

 MODES
   +  ascending
   -  descending

=item B<suffix / separator>

(s) - suffix separator.

=item B<suffix / continue>

(i) - numbering continuation.

 MODES
   1  enabled
   0  disabled    (will start at 1)

If numbering continuation is enabled, and numeric subdirectories are found
within target directory which match the given identifier and separator,
then the suffix numbering will be continued. Disabling numbering continuation
may cause interfering with existing files.

=item B<suffix / length>

(i) - length of the suffix in characters.

This option will have no effect if its smaller than the current size
of the highest suffix number.

=back

=back

=head2 characteristic

Split files to subdirectories with a characteristic suffix. Files
are assigned to subdirectories which suffixes equal the leading character
of their filenames.

    %options = (  mode    =>    'char',

                  source  =>    '/source',
                  target  =>    '/target',

                  options => {  verbose     =>         1,
                                override    =>         0,
                  },
                  sub_dir => {  identifier  =>     'sub',
                  },
                  suffix  => {  separator   =>       '-',
                                case        =>   'lower',
                  },

    );

B<options> (mandatory)

=over 4

=over 4

=item *

=item B<mode>

(s) - I<char> for characteristic.

=item B<source>

(s) - source directory.

=item B<target>

(s) - target directory.

=item B<options / verbose>

(i) - if enabled, mkpath will output the pathes on creating
subdirectories.

 MODES
   1  enabled
   0  disabled

=item B<options / override>

(i) - overriding of existing files.

 MODES
   1  enabled
   0  disabled

=item B<sub_dir / identifier>

(s) - prefix of each subdirectory created.

=item B<suffix / separator>

(s) - suffix separator.

=item B<suffix / case>

(s) - lower/upper case of the suffix.

 MODES
   lower
   upper

=back

=back

=head2 tracking

C<%Dir::Split::track> keeps count of how many files the source and directories / files
the target consists of. It may prove its usefulness, if the amount of files that could
not be transferred due to existing ones has to be counted.
Each time a new splitting is attempted, the track will be reseted.

    %Dir::Split::track = (  source  =>    {  files  =>    512,
                            },
                            target  =>    {  dirs   =>    128,
                                             files  =>    512,
                            },
    );

Above example: directory consisting 512 files successfully splitted to 128 directories.

=head2 debug

=head3 existing

If C<split()> returns a I<EXISTS>, this implys that the B<override> option is disabled and
files could not be moved due to existing files within the target subdirectories; they will have
their paths appearing in the according keys in C<%Dir::Split::exists>.

   file    @Dir::Split::exists    # existing files, not attempted to
                                  # be overwritten.

=head3 failures

If C<split()> returns a I<FAILURE>, this most often implys that the B<override> option is enabled
and existing files could not be overriden. Files that could not be copied / unlinked,
will have their paths appearing in the according keys in C<%Dir::Split::failure>.

   file    @{$Dir::Split::failure{'copy'}}      # files that couldn't be copied,
                                                # most often on overriding failures.

           @{$Dir::Split::failure{'unlink'}}    # files that could be copied but not unlinked,
                                                # rather seldom.

It is highly recommended to evaluate those arrays on I<FAILURE>.

=head1 EXAMPLES

Assuming I</source> contains 5 files:

    +- _12.tmp
    +- abc.tmp
    +- def.tmp
    +- ghi.tmp
    +- jkl.tmp

After splitting the directory tree in I</target> will look as following:

=head2 numeric splitting

    +- system-00001
    +-- _12.tmp
    +-- abc.tmp
    +- system-00002
    +-- def.tmp
    +-- ghi.tmp
    +- system-00003
    +-- jkl.tmp

=head2 characteristic splitting

    +- system-_
    +-- _12.tmp
    +- system-a
    +-- abc.tmp
    +- system-d
    +-- def.tmp
    +- system-g
    +-- ghi.tmp
    +- system-j
    +-- jkl.tmp

=head1 DEPENDENCIES

C<Perl 5.6.1>; C<File::Copy>, C<File::Path>, C<File::Spec>.

=head1 CAVEATS

Recursive source directory processing is not supported;
existing directories within the source directory will be ignored.

=head1 SEE ALSO

perl(1)

=head1 LICENSE

This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.

=head1 AUTHOR

Steven Schubiger

=cut
