#
# $Id: Split.pm,v 0.1 2003/12/29 22:52:30 st.schubiger Exp $

package Dir::Split;

use 5.6.1;
use strict;
use warnings;

use Carp;
use File::Copy 'cp';
use File::Path;
use File::Spec;

our (%f_names,
     $scl_source_dir_ref,
     $scl_target_dir_ref,
     $sub_dir_ident,
     $sub_dir_f_sort,
     $suffix_l,
     $suffix_sep);

our $VERSION = 0.1;

=head1 NAME

Dir::Split - Split the files of a directory to subdirectories.

=head1 SYNOPSIS

 use Dir::Split;

 # define options
 %options = ( verbose =>   0,

              sub_dir => { identifier          =>    'system',
                           file_limit          =>         '2',
                           file_sort           =>         '+',
              },

              suffix =>  { separator           =>         '.',
                           length              =>           4,
                           continue_num        =>         'y',
              },
 );

 # create object
 $dir = Dir::Split->new (\%options);

 # set source & target dirs
 $source_dir = '/var/tmp/src';
 $target_dir = '/var/tmp/target';

 # split files to subdirs
 $files_moved = $dir->split (\$source_dir, \$target_dir);

 # changes the subdir identifier
 $dir->{'sub_dir'}{'identifier'} = 'test';

 # split files to subdirs
 $files_moved = $dir->split (\$source_dir, \$target_dir);

=head1 DESCRIPTION

C<Dir::Split> moves files from a source directory to numbered subdirectories within
a target directory.

=head1 METHODS

=head2 new ( \%options )

Object constructor.

 $dir = Dir::Split->new (\%options);

C<%options> contains the options that will influence the splitting process.

 %options = ( verbose =>   0,

              sub_dir => { identifier          =>    'system',
                           file_limit          =>         '2',
                           file_sort           =>         '+',
              },

              suffix =>  { separator           =>         '.',
                           length              =>           4,
                           continue_num        =>         'y',
              },
 );

C<verbose> sets the verbosity (see table VERBOSITY MODES below); if enabled,
mkpath will output the pathes on creating subdirectories.

C<sub_dir/identifier> will affect the prefix of each subdirectory.
C<sub_dir/file_limit> sets the limit of files per each subdirectory.
C<sub_dir/file_sort> defines the sorting order of files
(see table SORT MODES below).

C<suffix/separator> contains the string that separates the prefix (identifier)
from the suffix. C<suffix/length> is an non-floating-point integer that sets
the amount of zeros to be added to the subdirectory numbering.
C<suffix/continue_num> defines whether the numbering shall be continued
where it previously stopped or start at 1 (see table CONTINUE NUMBERING MODES below).

Differing identifiers or separators do affect the numbering e.g. systm-0001 does
not equal system-0001, system_0001 does not equal system-0001. C<file_limit>, C<file_sort>
and C<separator> options have no influence on decisions whether the numbering shall be coninued.

  VERBOSITY MODES
    0  disabled
    1  enabled

  SORT MODES
    +  ascending sort order
    -  descending sort order

  CONTINUE NUMBERING MODES
    y   yes
    ''  no

=cut

sub new {
    my ($package, $hsh_opt_ref) = @_;
    my $class = ref ($package) || $package;
    my $self = Dir::Split::_set_var ($hsh_opt_ref);
    bless $self, $class;
}

#
# _set_var ($hsh_opt_ref)
#
# Internally called by the object constructor new()
# to dereference a two-dimensional hash. A reference
# to a private hash will be returned.
#

sub _set_var {
    my ($hsh_opt_ref) = @_;
    my %my_hash;
    foreach my $key (keys %$hsh_opt_ref) {
        if (ref $$hsh_opt_ref{$key} eq 'HASH') {
            foreach ( keys %{$$hsh_opt_ref{$key}} ) {
                $my_hash{$key}{$_} = $$hsh_opt_ref{$key}{$_};
            }
        }
        else { $my_hash{$key} = $$hsh_opt_ref{$key} }
    }
    return \%my_hash;
}

=head2 split ( \$source_dir, \$target_dir )

Split the files of the source directory to subdirectories.

 $files_moved = $dir->split (\$source_dir, \$target_dir);

C<$source_dir> specifies the source directory.

C<$target_dir> specifies the target directory.

Returns the amount of files that have been successfully moved;
if none, it will return undef.

=cut

sub split {
    my $self = shift;
    ($scl_source_dir_ref, $scl_target_dir_ref) = @_;
    croak q~Invalid arguments: split (\$source_dir, \$target_dir)~
      unless (ref $scl_source_dir_ref eq 'SCALAR') && (ref $scl_target_dir_ref eq 'SCALAR');

    my (@files,
        $path_full,
        $ret_f_moved,
        $sub_dir_f_limit,
        $suffix,
        $suffix_cont_num,
        $verbose_mode);
    STRING_VAR: {
        $verbose_mode = $self->{'verbose'};
        $sub_dir_ident = $self->{'sub_dir'}{'identifier'};
        $sub_dir_f_limit = $self->{'sub_dir'}{'file_limit'};
        $sub_dir_f_sort = $self->{'sub_dir'}{'file_sort'};
        $suffix_sep = $self->{'suffix'}{'separator'};
        $suffix_l = $self->{'suffix'}{'length'};
        $suffix_cont_num = $self->{'suffix'}{'continue_num'};
    }

    $self->_eval_files (\@files, \%f_names);
    if ($suffix_cont_num eq 'y') {
        $self->_eval_suffix_highest_num (\$suffix);
    }
    $self->_eval_suffix_sum_up (\$suffix);

    for (; @files; $suffix++) {
        # create subdir
        my $path = File::Spec->catfile($$scl_target_dir_ref,
          "$sub_dir_ident$suffix_sep$suffix");
        unless (mkpath $path, $verbose_mode) {
            croak qq~Could not create subdir $path: $!~;
        }
        # cp & rm files
        for (my $i = 0; $i < $sub_dir_f_limit; $i++) {
            last unless my $file = shift @files;
            # obtain case-sensitive filename
            $path_full = File::Spec->catfile($path, $f_names{$file});
            # cat absolute file path
            $file = File::Spec->catfile($$scl_source_dir_ref, $file);
            cp ($file, $path_full) or croak qq~Could not copy file $file to subdir $path_full: $!~;
            unlink ($file) or croak qq~Could not remove file $file: $!~;
            $ret_f_moved++;
        }
    }

    return $ret_f_moved ? $ret_f_moved : undef;
}

#
# _eval_files (\@files)
#
# Internally called by split() to read the files the
# source directory consists of, to sort them according
# to the options (lowercase filenames) and transform
# the relative paths to absolute ones.
#

sub _eval_files {
    my ($self, $arr_files_ref, $hsh_f_names_ref) = @_;

    # fetch source files
    opendir D, $$scl_source_dir_ref or
      croak qq~Could not open source dir $$scl_source_dir_ref for read-access: $!~;
    @$arr_files_ref =
      # ignore exist. dirs
      grep { ! opendir E, File::Spec->catfile($$scl_source_dir_ref, $_) }
      # skip . ..  
      grep { !/^\./ } 
      readdir D;
    closedir D or croak qq~Could not close source dir $$scl_source_dir_ref: $!~;
    no warnings; if (telldir E == 0) { closedir E } use warnings;

    # if files are to be sorted, preserve filenames
    # in hash with their lowercased filenames as keys.
    if ($sub_dir_f_sort eq '+' || $sub_dir_f_sort eq '-') {
        foreach (@$arr_files_ref) {
            $$hsh_f_names_ref{lc($_)} = $_;
        }
        # lowercase filenames
        @$arr_files_ref = map { lc } @$arr_files_ref;
    }

    # ascending sort order
    if ($sub_dir_f_sort eq '+') {
        @$arr_files_ref = sort @$arr_files_ref;
    } # descending sort order
    elsif ($sub_dir_f_sort eq '-') {
        @$arr_files_ref = reverse @$arr_files_ref;
    }
}

#
# _eval_suffix_highest_num (\$suffix)
#
# Internally called by split() to evaluate the highest
# existing subdir suffix number from the target directory
# in order to continue numbering where it stopped previously.
#

sub _eval_suffix_highest_num {
    my ($self, $scl_suffix_ref) = @_;

    # fetch target dirs
    opendir D, $$scl_target_dir_ref or
      croak qq~Could not open target dir $$scl_target_dir_ref for read-access: $!~;
    my @dirs =
      # ignore exist. files
      grep { opendir E, File::Spec->catfile($$scl_target_dir_ref, $_) }
      # skip . .. 
      grep { !/^\./ } 
      readdir D;
    closedir D or croak qq~Could not close target dir $$scl_target_dir_ref: $!~;

    # surpress warnings
    $$scl_suffix_ref = 0;
    my $sep = quotemeta ($suffix_sep);
    foreach (@dirs) {
        # extract exist. identifier (prefix)
        $_ =~ s/(.+?)$sep(.*)/$1/;
        # supplied identifier matches exist. one
        if ($sub_dir_ident eq $_) {
            # increase suffix to highest number
            if ($2 > $$scl_suffix_ref) { $$scl_suffix_ref = $2 }
        }
    }
    # suffix + 1 - avoid collisions with exist. subdirs
    $$scl_suffix_ref++;
}

#
# _eval_suffix_sum_up (\$suffix)
#
# Internally called by split() to sum up the suffix
# with a given amount of zeros and to concatenate
# the numbering at the end.
#

sub _eval_suffix_sum_up {
    my ($self, $scl_suffix_ref) = @_;

    # suffix length too small or too big
    if (length ($$scl_suffix_ref) < $suffix_l
      || length ($$scl_suffix_ref) > $suffix_l) {
        my $format = "%0.$suffix_l" . 'd';
        # adjust suffix length
        $$scl_suffix_ref = sprintf $format, $$scl_suffix_ref;
    }
}

1;

__END__

=head1 EXAMPLE

Assuming the source directory '/var/tmp/src' contains 9 files, the directory
tree in the target directory '/var/tmp/target' will look as following:

    + /var/tmp/target
    +- system.0001 / 2 file(s)
    +- system.0002 / 2 "
    +- system.0003 / 2 "
    +- system.0004 / 2 "
    +- system.0005 / 1 "

=head1 DEPENDENCIES

C<Perl 5.6.1>; C<File::Copy>, C<File::Path>, C<File::Spec>.

=head1 CAVEATS

Recursive source directory processing is not supported;
existing directories within the source directory will be ignored.

=head1 SEE ALSO

perl(1)

=head1 LICENSE

This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself.

=head1 AUTHOR

Steven Schubiger

=cut