						$count2 = 0;
						@fh = (_mergeFiles($opts, \@fh, _getTemp()));
						print "\nCreating temp files ...\n" if $$opts{V};
					($count1, $count2, @lines) = (0, ++$count2);
					if ($count2 >= $$opts{TF}) {
					push(@fh, _writeTemp($basename, $count2, \@lines, $opts));
					}
				$count1++;
				($aa, $bb, $fa, $fb);
				($count1, $count2, @lines) = (0, ++$count2);
				if ($count1 >= $$opts{Y}) {
				my $fh = _writeTemp($basename, $count2, \@lines, $opts);
				push(@fh, $fh);
				push(@lines, $line);
				}
			$$opts{D} = quotemeta($$opts{D});
			$$opts{F} ||= 0;
			$uniq = $fh{$first};
			($aa = $$opts{S}) =~ s/\$SORT/\$a/g;
			($aa, $bb, $fa, $fb) = map "(split(/$$opts{D}/, $_))[$$opts{F}]",
			($basename, $basedir) = fileparse($filein);
			($bb = $$opts{S}) =~ s/\$SORT/\$b/g;
			($fa = $$opts{S}) =~ s/\$SORT/\$fh{\$a}/g;
			($fb = $$opts{S}) =~ s/\$SORT/\$fh{\$b}/g;
			close(F);
			if (!$$opts{O} || !@{$$opts{I}});
			if (@lines) {
			open($filein, "< $filein\0") or croak($!);
			open(F, "< $filein\0") or croak($!);
			print "Creating temp files ...\n" if $$opts{V};
			print "Sorting file $filein ...\n" if $$opts{V};
			print $fh{$first};
			print $file $fh{$first};
			print $file $fh{$first};
			push(@fh, $filein);
			while (defined($line=<F>)) {
			}
			}
		$$opts{I} = [$filein];
		$$opts{I} = [(ref($$opts{I}) ? @{$$opts{I}} : $$opts{I})];
		$cmp = '<=>' if ($$opts{N} == 1);
		$count1, $count2, @lines, $lines, $line, @fh, $first, $opts,
		$filein, $uniq, $basedir, $basename, %sort1, %sort2,
		$opts = \%{$_[0]};
		($_ => scalar <$fh>);
		($bb, $aa, $fb, $fa) = ($aa, $bb, $fa, $fb)	if ($$opts{R} == 1);
		($filein, $$opts{O}, $$opts{V}, $$opts{Y}) = @_;
		($first) = (sort sort2 keys %fh);
		*sort1 = eval("sub {$aa $cmp $bb}");
		*sort2 = eval("sub {$fa $cmp $fb}");
		croak 'Usage: sort_file($filein, $fileout [, $verbose, $chunk])';
		croak 'Usage: sort_file({I=>FILEIN, O=>FILEOUT, %otheroptions})'
		croak if $@;
		croak if $@;
		defined($line=<$fh>) ? $fh{$first} = $line : delete $fh{$first};
		foreach $filein (@{$$opts{I}}) {
		foreach $filein (@{$$opts{I}}) {
		if ($$opts{D}) {
		if ($$opts{U} && $uniq && $uniq ne $fh{$first}) {
		local($^W) = 0;
		my $fh = $oth{$_};
		my $fh = $oth{$first};
		my($cmp, $aa, $bb, $fa, $fb) = ('cmp', '$a', '$b', '$fh{$a}', '$fh{$b}');
		open($file, "+> $file\0") || croak("Can't open $file: $!");
		}
		}
		}
		}
		} else {
		} elsif ($$opts{S}) {
	$$lines[-1] .= "\n" if ($$lines[-1] !~ m|\n$|);
	$$opts{N}	= $$opts{N} ? 1 : 0;
	$$opts{R}	= $$opts{R} ? 1 : 0;
	$$opts{TF}	||= 40;
	$$opts{Y}	||= 20000;
	%fh  = map {
	%oth = map {($o++ => $_)} @$fh;
	($a, $b, $count1, $count2) = (1, 1, 0, 0);
	);
	close(_mergeFiles($opts, \@fh, $$opts{O}));
	die "Change sortFile to sort_file, please.  Thanks and sorry.  :)\n";
	if (!$$opts{M}) {
	if (!$_[0] && (!ref($_[0]) || !$_[1])) {
	local $\;
	my $temp = _getTemp() or warn $!;
	my(
	my($basename, $count2, $lines, $opts) = @_;
	my($opts, $fh, $file) = @_;
	my($uniq, $first, $line, $o, %oth);
	print "  $temp\n" if $$opts{V};
	print "\nCreating sorted $file ...\n" if $$opts{V};
	print "\nDone!\n\n" if $$opts{V};
	print $temp sort sort1 @{$lines};
	return $file;
	return $temp;
	seek($file,0,0);
	seek($temp,0,0);
	unless (ref($file)) {
	while (keys %fh) {
	{
	}
	}
	}
	}
	}
	} else {
	} else {
	} elsif (!ref($_[0])) {
	} keys %oth;























































































         1: 185 secs (185.65 usr  0.00 sys = 185.65 cpu)
         2: 152 secs (152.43 usr  0.00 sys = 152.43 cpu)
         3: 195 secs (195.77 usr  0.00 sys = 195.77 cpu)
         4: 274 secs (274.58 usr  0.00 sys = 274.58 cpu)
    1=>q+`sort -o $ARGV[0].1 $ARGV[0]`+,
    2=>q+open(F,$ARGV[0]);open(F1,">$ARGV[0].4");@f=<F>;print F1 sort @f+,
    3=>q+sort_file({I=>$ARGV[0],O=>"$ARGV[0].2",Y=>200000})+,
    4=>q+sort_file({I=>$ARGV[0],O=>"$ARGV[0].3"})+,
    D=>DELIMITER, F=>FIELD,
    I=>FILEIN, O=>FILEOUT, V=>VERBOSE, 
    I=>[qw(file1_new file2_new)],
    M=>MERGE_ONLY, U=>UNIQUE_ONLY, 
    O=>'filex_new',
    R=>REVERSE, N=>NUMERIC,
    S=>SORT_THING,
    V=>1, Y=>1000, TF=>50, M=>1, U=>1, R=>1, N=>1,
    Y=>CHUNK, TF=>FILE_LIMIT, 
  # {$a cmp $b}
  # {(split(/\|/, $a))[1] cmp (split(/\|/, $b))[1]}
  #!perl -w
  Benchmark: timing 10 iterations of 1, 2, 3, 4...
  sort_file('file1','file1_new',1,1000);
  sort_file(FILEIN, FILEOUT [, VERBOSE, CHUNK]);
  sort_file({
  sort_file({
  sort_file({I=>'b', O=>'b.out', D=>'|', IDX=>1});
  sort_file({I=>'b', O=>'b.out', S=>'$SORT'});
  sort_file({I=>'b', O=>'b.out', S=>'(split(/\\|/, $SORT))[1]'});
  sort_file({I=>'b', O=>'b.out'});
  timethese(10,{
  use Benchmark;
  use File::Sort qw(sort_file);
  use File::Sort qw(sort_file);
  })
  });
  });
$SORT is the token representing your $a and $b.  For instance, these are
$VERSION = '0.18';
160MB RAM, VM on, and 100MB allocated to the MacPerl app).  The file
200,000 lines; Unix systems can get away with something like that because
=back
=cut
=head1 AUTHOR
=head1 BUGS
=head1 DESCRIPTION
=head1 EXPORT
=head1 HISTORY
=head1 NAME
=head1 SEE ALSO
=head1 SYNOPSIS
=head1 THANKS
=head1 VERSION
=item v0.01 (18 December 1997)
=item v0.02 (19 December 1997)
=item v0.03 (23 December 1997)
=item v0.10 (03 January 1998)
=item v0.11 (04 January 1998)
=item v0.16 (24 December 1998)
=item v0.17 (30 December 1998)
=item v0.18 (31 January 1998)
=over 4
@EXPORT = ();
@EXPORT_OK = qw(sort_file sortFile);
@ISA = qw(Exporter);
A default value of 40 is given in the module.  The standard port of
Added reverse and numeric sorting options.
Added unique and merge-only options.
Also now use C<IO::File> to create temp files, so the TMPDIR option is
Also, I will have the module use sort(1) if it is available.
Andrew M. Langmead E<lt>aml@world.std.comE<gt>,
Brian L. Matthews E<lt>blm@halcyon.comE<gt>,
C<IO::File> object.
Chris Nandor E<lt>pudge@pobox.comE<gt>
Copyright (c) 1998 Chris Nandor.  All rights reserved.  This program is free
DELIMITER at the end of the field (i.e., the field ends in DELIMITER, not
Exports C<sort_file> on request.  C<sortFile> is no longer the function
FILEIN, but it is required.  VERBOSE is off by default.  CHUNK is how many
FILE_LIMIT is the system's limit to how many files can be opened at once. 
File::Sort - Sort a file or merge sort multiple files
First release.
Fixed bug in C<_mergeFiles> that tried to C<open> a passed
Fixed up docs and did some more tests and benchmarks.
Gene Hsu E<lt>gene@moreinfo.comE<gt>,
Gurusamy Sarathy E<lt>gsar@activestate.comE<gt>.
Here are some benchmarks that might be of interest (PowerBook G3/292 with
I did make the default for CHUNK larger, though.
If MERGE_ONLY is true, then C<File::Sort> will assume the files on input are
If given a DELIMITER (which will be passed through C<quotemeta>), then each
MacPerl has 8MB allocated, the results for tests 1 and 4 are about the
MacPerl has a small amount of memory allocated (like 8MB).  But when
Made CHUNK default a lot larger, which improves performance.  On
Matthias Neeracher E<lt>neeri@iis.ee.ethz.chE<gt>,
Mike Blazer E<lt>blazer@mail.nevalink.ruE<gt>,
Miko O'Sullivan E<lt>miko@idocs.comE<gt>,
More cleanup; fixed special case of no linebreak on last line; wrote test 
NOTE: `sort` calls the MPW sort tool here, which has a slightly
None!  :)  I plan on making CHUNK and FILE_LIMIT more intelligent somehow.
Note that if FILEIN does not have a linebreak terminating the last line,
Note that tests 2 and 3 cannot be performed on the given dataset when
One year between releases was too long.  I made changes Miko O'Sullivan
Perl itself.
Rich Morin E<lt>rdm@cfcl.comE<gt>,
SORT_THING is so you can pass in any arbitrary sort thing you want, where
SORT_THING will still need R and N for reverse and numeric sorts.
Some cleanup; made it not subject to system file limitations; separated 
Tests 3 and 4 failed because we hit the open file limit in the
That all having been noted, there are plans to have this module use sort(1)
There are two primary syntaxes:
This time, FILEIN can be a filename or an reference to an array of filenames. 
This will sort FILEIN to FILEOUT.  The FILEOUT can be the same as the
Tom Phoneix E<lt>rootbeer@teleport.comE<gt>,
Version 0.17 (30 December 1998)
Vicki Brown E<lt>vlb@cfcl.comE<gt>,
WARNING Part Deux: This module is subject to change in every way, including
WARNING: This is probably going to be MUCH SLOWER than using sort(1)
__END__
a native newline character will be added to it.
all others.
allocation as done below.  So inevitably you will get much better
already sorted.  UNIQUE_ONLY, if true, only outputs unique lines, removing
different default sort order than C<sort_file> does.
doing its job properly.  :)
equivalent:
faster, while supporting more options for sorting, including delimited
field, it is best if the last field in the line, if used for sorting, has
file of several megabytes), nor does everyone have access to sort(1).
files), changed docs.  (Mike Blazer, Gurusamy Sarathy)
for better performance, decrease if you run out of memory.
for total number of temp files from 50 to 40 (leave room for other open
http://pudge.net/
if it is available.  Still.
improve performance increase the number, and if you are getting failures,
in the fact that it exists.  But it seems much less subject to change now
infinite amounts of memory (thus they cannot necessarily slurp in a text
line will be sorted on the nth FIELD (default FIELD is 0).  If sorting by
lines to deal with at a time (as opposed to how much memory to deal with at a
low-memory systems, or where (e.g.) the MacPerl binary is not allocated
many parts out into separate functions.
much RAM, it might need to be lowered.
name.
newline).
no longer supported.  Hopefully made the whole thing more robust and
no strict 'refs';
of VM, while Mac OS systems cannot, unless you bump up the memory
package File::Sort;
performance with large files on Unix than you will on Mac OS.  C'est la
perl5.004_02 for Win32 has a limit of 50 open files, so 40 is safe.  To
same as when MacPerl has 100MB allocated, showing that the module is
software; you can redistribute it and/or modify it under the same terms as
some perls (specifically, MacPerl) do not have access to potentially
sort(1).
sorts, and arbitrary sorts.
standard Windows port of perl5.004_02 (50).  Adjusted the default
sub _getTemp {IO::File->new_tmpfile}
sub _mergeFiles {
sub _writeTemp {
sub sortFile {
sub sort_file {
suite; fixed warning for redefined subs (sort1 and sort2).
than it did at first.
that comes with most Unix boxes.  This was developed primarily because
time, like sort(1); this might change).  The default for Y is 20,000; increase
to C<_getTemp>, you've probably hit your limit.
try decreasing it.  If you get a warning in C<_writeTemp>, from the call
use Carp;
use Exporter;
use File::Basename;
use IO::File;
use strict;
use vars qw(@ISA @EXPORT_OK @EXPORT $VERSION *sort1 *sort2 %fh);
vie.
wanted, and I didn't even know I had made them.
was a mail file around 6MB.  Note that once was with a CHUNK value of
}
}
}
}
