#
# $Id: Split.pm,v 0.37 2004/01/03 21:31:27 st.schubiger Exp $

package Dir::Split;

use 5.6.1;
use strict;
use warnings;

use Carp;
use File::Copy 'cp';
use File::Path;
use File::Spec;

our $VERSION = '0.37';

our (# external data
        %track,
        %warn,

     # data
        @Files,
        %F_names_case,
        %F_names_char,
        $Path,
        $Ret_Status,
        $Suffix,
);

# return codes
our $ACTION    = 1;
our $NO_ACTION = 0;
our $EXISTS   = -1;

=head1 NAME

Dir::Split - Split files of a directory to subdirectories.

=head1 SYNOPSIS

 use Dir::Split;

 %options = (   mode    =>    'num',

                source  =>    '/tmp/source',
                target  =>    '/tmp/target',

                options => {  verbose      =>         1,
                              warn         =>     'all',
                              override     =>    'none',
                },
                sub_dir => {  identifier   =>     'sub',
                              file_limit   =>         2,
                              file_sort    =>       '+',
                },
                suffix  => {  separator    =>       '-',
                              continue     =>       'y',
                              length       =>         5,
                },
 );


 $dir = Dir::Split->new (\%options);

 $return = $dir->split;

 $dir->{'source'} = '/tmp/dir1';
 $dir->{'target'} = '/tmp/dir2';

 $return = $dir->split;

=head1 DESCRIPTION

C<Dir::Split> moves files to either numbered or characteristic subdirectories.

=head1 CONSTRUCTOR

=head2 new ( \%options )

Creates an object. C<%options> contains the key/value pairs which will influence the splitting process.

 $dir = Dir::Split->new (\%options);

=cut

sub new {
    my ($pkg, $Opt_ref) = @_;
    croak q~usage: Dir::Split->new (\%options)~
      unless ref $Opt_ref eq 'HASH';

    my $class = ref ($pkg) || $pkg;
    my $obj = Dir::Split::_tie_var($Opt_ref);

    return bless $obj, $class;
}

#
# _tie_var (\%hash)
#
# Assigns the interface options to the object.
#

sub _tie_var {
    my $Opt_ref = $_[0];

    my %assign = (  mode    =>    'mode',

                    source  =>    'source',
                    target  =>    'target',

                    options => {  verbose     =>      'verbose',
                                  warn        =>         'warn',
                                  override    =>     'override',
                    },
                    sub_dir => {  identifier  =>        'ident',
                                  file_limit  =>      'f_limit',
                                  file_sort   =>       'f_sort',
                    },
                    suffix  => {  separator   =>          'sep',
                                  continue    =>   'num_contin',
                                  length      =>       'length',
                                  case        =>         'case',
                    },
    );

    my %assigned;
    foreach my $key (keys %$Opt_ref) {
        if (ref $$Opt_ref{$key} eq 'HASH') {
            foreach ( keys %{$$Opt_ref{$key}} ) {
                $assigned{$assign{$key}{$_}} = $$Opt_ref{$key}{$_};
            }
        }
        else { $assigned{$assign{$key}} = $$Opt_ref{$key} }
    }

    return \%assigned;
};

=head1 METHODS

=head2 split

Split files to subdirectories.

 $return = $dir->split;

B<RETURN CODES>

=over 4

=item (1)

Files moved successfully.

Implies access to C<%Dir::Split::track> (see OPTIONS / tracking).

=item (0)

No action.

=item (-1)

Existing directories/files found.

Implies access to C<%Dir::Split::track> (see OPTIONS / tracking).

=back

=cut

sub split {
    my $o = $_[0];

    $o->_sanity_input();
    $o->_gather_files();

    # files found, split.
    if (@Files) {
        $Ret_Status = $ACTION;

        $o->_init_presets();

        # engine
        $o->_sort_files() if $o->{'mode'} eq 'num';
        my $sub_suffix = "_suffix_$o->{'mode'}";
        $o->$sub_suffix();
        $o->_move();
    }
    # no files? goodbye.
    else { $Ret_Status = $NO_ACTION }

    return $Ret_Status;
}

#
# _sanity_input()
#
# Ensures that interface input passes sanity.
#

sub _sanity_input {
    my $o = $_[0];

    my %err_msg = (  mode        =>    'No mode specified.',
                     source      =>    'No source dir specified.',
                     target      =>    'No target dir specified.',
                     verbose     =>    'No verbosity specified.',
                     warn        =>    'No warn mode specified.',
                     override    =>    'No override mode specified.',
                     ident       =>    'No subdir identifier specified.',
                     sep         =>    'No suffix separator specified.',
                     f_limit     =>    'No file limit specified.',
                     f_sort      =>    'No file sort mode specified.',
                     num_contin  =>    'No continuation mode specified.',
                     length      =>    'No suffix length specified.',
                     case        =>    'No suffix case mode specified.',
    );

    my $err_input;
    {
        unless ($o->{'mode'} eq 'num' || $o->{'mode'} eq 'char') {
            $err_input = $err_msg{'mode'}; last;
        }
        unless ($o->{'source'}) {
            $err_input = $err_msg{'source'}; last;
        }
        unless ($o->{'target'}) {
            $err_input = $err_msg{'target'}; last;
        }
        unless ($o->{'verbose'} =~ /^0|1$/) {
            $err_input = $err_msg{'verbose'}; last;
        }
        unless ($o->{'warn'} =~ /^none|file|dir|all$/) {
            $err_input = $err_msg{'warn'}; last;
        }
        unless ($o->{'override'} =~ /^none|file|dir|all$/) {
            $err_input = $err_msg{'override'}; last;
        }
        unless ($o->{'ident'} =~ /\w/) {
            $err_input = $err_msg{'ident'}; last;
        }
        unless ($o->{'sep'}) {
            $err_input = $err_msg{'sep'}; last;
        }
        if ($o->{'mode'} eq 'num') {
            unless ($o->{'f_limit'} =~ /\d/) {
                $err_input = $err_msg{'f_limit'}; last;
            }
            unless ($o->{'f_sort'} eq '+' || $o->{'f_sort'} eq '-') {
                $err_input = $err_msg{'f_sort'}; last;
            }
            unless ($o->{'num_contin'} eq 'y' || $o->{'num_contin'} eq 'n') {
                $err_input = $err_msg{'num_contin'}; last;
            }
            unless ($o->{'length'} =~ /\d/) {
                $err_input = $err_msg{'length'}; last;
            }

        }
        elsif ($o->{'mode'} eq 'char') {
            unless ($o->{'case'} eq 'lower' || $o->{'case'} eq 'upper') {
                $err_input = $err_msg{'case'}; last;
            }
        }
    }
    croak $err_input if $err_input;
}

#
# _gather_files()
#
# Gathers the files from the source directory.
#

sub _gather_files {
    my $o = $_[0];

    $o->_dir_read($o->{'source'}, \@Files);
    @Files = grep !-d File::Spec->catfile($o->{'source'}, $_), @Files;

    $track{'source'}{'files'} = scalar @Files;
}

#
# _init_presets()
#
# Initializes var presets.
#

sub _init_presets {
    my %Presets = (  warn =>  {  dir   =>    "exists (d)\t",
                                 file  =>    "exists (f)\t",
                     },
    );

    $warn{'dir'} ||= $Presets{'warn'}{'dir'};
    $warn{'file'} ||= $Presets{'warn'}{'file'};
}

#
# _sort_files()
#
# Sorts the files if numeric splitting.
#

sub _sort_files {
    my $o = $_[0];

    if ($o->{'f_sort'} eq '+' || $o->{'f_sort'} eq '-') {
        # preserve case-sensitive filenames.
        foreach (@Files) {
           $F_names_case{lc($_)} = $_;
        }
        @Files = map lc, @Files;

        if ($o->{'f_sort'} eq '+') { @Files = sort @Files }
        elsif ($o->{'f_sort'} eq '-') { @Files = reverse @Files }
    }
}

#
# _suffix_num()
#
# Sub handler for numeric suffixes.
#

sub _suffix_num {
    my $o = $_[0];

    $o->_suffix_num_contin() if $o->{'num_contin'} eq 'y';
    $o->_suffix_num_sum_up();
}

#
# _suffix_num_contin()
#
# Evaluates the highest existing subdir suffix number
# from the target directory.
#

sub _suffix_num_contin {
    my $o = $_[0];

    my @dirs;
    $o->_dir_read($o->{'target'}, \@dirs);
    @dirs = grep -d File::Spec->catfile($o->{'target'}, $_), @dirs;

    # surpress warnings
    $Suffix = 0;

    my $sep = quotemeta $o->{'sep'};
    foreach (@dirs) {
        # extract exist. identifier
        ($_) = /(.+?)$sep(.*)/;
        my $suff_cmp = $2;
        # increase suffix to highest number
        if ($o->{'ident'} eq $_ && $suff_cmp =~ /[0-9]/) {
            $Suffix = $suff_cmp if $suff_cmp > $Suffix;
        }
    }
}

#
# _suffix_num_sum_up()
#
# Sums the numeric suffix with zeros up if required.
#

sub _suffix_num_sum_up {
    my $o = $_[0];

    # initialize suffix or avoid suffix collision if != 0
    $Suffix++;

    if (length $Suffix < $o->{'length'}) {
        my $format = "%0.$o->{'length'}".'d';
        $Suffix = sprintf $format, $Suffix;
    }
}

#
# _suffix_char()
#
# Evaluates all filenames in the source directory
# and stores them in a hash associated with their
# leading character.
#

sub _suffix_char {
    my $o = $_[0];

    foreach my $file (@Files) {
        ($_) = $file =~ /^(.)/;
        if ($_ =~ /\w/) {
            if ($o->{'case'} eq 'upper') { $_ = uc $_ }
            elsif ($o->{'case'} eq 'lower') { $_ = lc $_ }
        }
        push @{$F_names_char{$_}}, $file;
    }
    undef @Files;
}

#
# _move()
#
# Sub handler for moving files.
#

sub _move {
    my $o = $_[0];

    # initalize tracking
    $track{'target'}{'dirs'} = 0;
    $track{'target'}{'files'} = 0;

    # sub handler
    my $sub_move = "_move_$o->{'mode'}";
    $o->$sub_move();
}

#
# _move_num()
#
# Moves the files to numeric subdirs.
#

sub _move_num {
    my $o = $_[0];

    for (; @Files; $Suffix++) {
       $o->_mkpath($Suffix);

        for (my $i = 0; $i < $o->{'f_limit'}; $i++) {
            last unless my $file = shift @Files;
            $o->_cp_unlink($F_names_case{$file});
        }
    }
}

#
# _move_char()
#
# Moves the files to characteristic subdirs.
#

sub _move_char {
    my $o = $_[0];

    foreach (sort keys %F_names_char) {
        $o->_mkpath($_);

        while (my $file = shift @{$F_names_char{$_}}) {
            $o->_cp_unlink($file);
        }
    }
}

#
# _dir_read (\$dir, \@files)
#
# Reads the files of a directory.
#

sub _dir_read {
    shift; my ($dir, $files_ref) = @_;

    opendir D, $dir
      or croak qq~Could not open dir $dir for read-access: $!~;
    @$files_ref = readdir D; splice @$files_ref, 0, 2;
    closedir D or croak qq~Could not close dir $dir: $!~;
}

#
# _mkpath ($suffix)
#
# Creates subdirs.
# Upon dir collision optional warning
# and overriding avoidance.
#

sub _mkpath {
    my ($o, $suffix) = @_;

    $Path = File::Spec->catfile($o->{'target'}, "$o->{'ident'}$o->{'sep'}$suffix");

    return if $o->_exists_and_not_override('dir', $Path);

    mkpath $Path, $o->{'verbose'}
      or croak qq~Could not create subdir $Path: $!~;

    $track{'target'}{'dirs'}++;
}

#
# _cp_unlink ($file)
#
# Copies files and unlinks them.
# Upon file collision optional warning
# and overriding avoidance.
#

sub _cp_unlink {
    my ($o, $file) = @_;

    my $path_full = File::Spec->catfile($Path, $file);
    $file = File::Spec->catfile($o->{'source'}, $file);

    return if $o->_exists_and_not_override('file', $path_full);

    cp $file, $path_full or
      croak qq~Could not copy file $file to subdir $path_full: $!~;
    unlink $file or croak qq~Could not remove file $file: $!~;

    $track{'target'}{'files'}++;
}

#
# _exists_and_not_override ($item, $path)
#
# Checks for existing files/paths and warns accordingly.
#

sub _exists_and_not_override {
    my ($o, $item, $path) = @_;

    if (-e $path) {
        if ($o->{'warn'} eq 'all' || $o->{'warn'} eq $item) {
            no strict 'refs';
            warn "$warn{$item} $path\n";
        }

        $Ret_Status = $EXISTS unless $Ret_Status eq $EXISTS;

        return 1 unless $o->{'override'} eq 'all' || $o->{'override'} eq $item;
    }

    return 0;
}

1;
__END__

=head1 OPTIONS

=head2 type indicators

=over 4

=item (c)

character

=item (i)

integer

=item (s)

string

=back

=head2 numeric

Split files to subdirectories with a numeric suffix.

    %options = (  mode    =>    'num',

                  source  =>    '/tmp/source',
                  target  =>    '/tmp/target',

                  options => {  verbose     =>         1,
                                warn        =>     'all',
                                override    =>    'none',
                  },
                  sub_dir => {  identifier  =>     'sub',
                                file_limit  =>         2,
                                file_sort   =>       '+',
                  },
                  suffix  => {  separator   =>       '-',
                                continue    =>       'y',
                                length      =>         5,
                  },
    );

=over 4

=over 4

=item *

=item B<mode>

(s) - I<num> for numeric.

=item B<source>

(s) - the source directory.

=item B<target>

(s) - the target directory.

=item B<options / verbose>

(i) - if enabled, mkpath will output the pathes on creating
subdirectories.

 MODES
   1  enabled
   0  disabled

=item B<options / warn>

(s) - warnings upon the encounter of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item B<options / override>

(s) - overriding of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item B<sub_dir / identifier>

(s) - prefix of each subdirectory created.

=item B<sub_dir / file_limit>

(i) - limit of files per subdirectory.

=item B<sub_dir / file_sort>

(c) - sort order of files.

 MODES
   +  ascending
   -  descending

=item B<suffix / separator>

(s) - suffix separator.

=item B<suffix / continue>

(c) - numbering continuation.

 MODES
   y  yes
   n  no     (will start at 1)

If numbering continuation is set to yes, and numeric subdirectories are found
within the target directory which match the given identifier and separator,
then the suffix numbering will be continued.

=item B<suffix / length>

(i) - length of the suffix in characters.

This option will have no effect if its smaller than the current size
of the highest suffix number.

=back

=back

=head2 characteristic

Split files to subdirectories with a characteristic suffix. Files
are assigned to subdirectories which suffixes correspond with the leading character
of the filenames.

    %options = (  mode    =>    'char',

                  source  =>    '/tmp/source',
                  target  =>    '/tmp/target',

                  options => {  verbose     =>         1,
                                warn        =>     'all',
                                override    =>    'none',
                  },
                  sub_dir => {  identifier  =>     'sub',
                  },
                  suffix  => {  separator   =>       '-',
                                case        =>   'lower',
                  },

    );

=over 4

=over 4

=item *

=item B<mode>

(s) - I<char> for characteristic.

=item B<source>

(s) - the source directory.

=item B<target>

(s) - the target directory.

=item B<options / verbose>

(i) - if enabled, mkpath will output the pathes on creating
subdirectories.

 MODES
   1  enabled
   0  disabled

=item B<options / warn>

(s) - warnings upon the encounter of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item B<options / override>

(s) - overriding of existing files/dirs.

 LEVELS
   none
   file
   dir
   all

=item B<sub_dir / identifier>

(s) - prefix of each subdirectory created.

=item B<suffix / separator>

(s) - suffix separator.

=item B<suffix / case>

(s) - lower/upper case of the suffix.

 MODES
   lower
   upper

=back

=back

=head2 warnings

The warning messages upon the encounter of existing directories/files;
paths of existing directories/files will be applied after the messages.

    %Dir::Split::warn = (  dir   =>    "exists (d)\t",
                           file  =>    "exists (f)\t",
    );

=head2 tracking

Keeps track of how many files the source and directories/files the target consists of.

    %Dir::Split::track = (  source  =>    {  files  =>    512,
                            },
                            target  =>    {  dirs   =>    128,
                                             files  =>    512,
                            },
    );

Above example: directory consisting 512 files successfully splitted to 128 directories.

The B<override> option will depending upon your operating system and/or file permissions
not always be able to override existing directories/files.
In case C<split()> returns a EXISTS (implying existing directories/files), following code
will print the amount of files that could not be moved.

    my $not_moved = $Dir::Split::track{'source'}{'files'} -
                    $Dir::Split::track{'target'}{'files'};

    warn "Could not move $not_moved files.\n";

Currently there is no possibility to access the paths of directories/files
that could not be moved.

=head1 EXAMPLES

Assuming I</tmp/source> contains 5 files:

    +- _12.tmp
    +- abc.tmp
    +- def.tmp
    +- ghi.tmp
    +- jkl.tmp

After splitting the directory tree in I</tmp/target> will look as following:

=head2 numeric splitting

    +- system-00001
    +-- _12.tmp
    +-- abc.tmp
    +- system-00002
    +-- def.tmp
    +-- ghi.tmp
    +- system-00003
    +-- jkl.tmp

=head2 characteristic splitting

    +- system-_
    +-- _12.tmp
    +- system-a
    +-- abc.tmp
    +- system-d
    +-- def.tmp
    +- system-g
    +-- ghi.tmp
    +- system-j
    +-- jkl.tmp

=head1 DEPENDENCIES

C<Perl 5.6.1>; C<File::Copy>, C<File::Path>, C<File::Spec>.

=head1 CAVEATS & BUGS

Recursive source directory processing is not supported;
existing directories within the source directory will be ignored.

Minus integer values in B<file_limit> and B<length> options
cause an odd behavior at runtime.

=head1 SEE ALSO

perl(1)

=head1 LICENSE

This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.

=head1 AUTHOR

Steven Schubiger

=cut
