#!/usr/bin/perl -w

# grepmail

$VERSION = '4.91';

# Grepmail searches a normal, gzip'd, tzip'd, or bzip2'd mailbox for a given
# regular expression and returns those emails that match the query. It also
# supports piped compressed or ascii input, and searches constrained by date
# and size. 

# Visit the grepmail project homepage at http://grepmail.sourceforge.net/
# There you can join the announcements mailing list to be notified of updates,
# grab the development environment via CVS, participate in chats and mailing
# lists, report bugs, submit patches, etc.

# Do a pod2text on this file to get full documentation, or pod2man to get
# man pages.

# Written by David Coppit (david@coppit.org, http://coppit.org/) with lots of
# debugging and patching by others -- see the CHANGES file for a complete
# list.

# This code is distributed under the GNU General Public License (GPL). See
# http://www.opensource.org/gpl-license.html and http://www.opensource.org/.

require 5.00396;

use vars qw( %opts $pattern $commandLine $VERSION %message_ids_seen $USE_CACHING );

use Getopt::Std;

use strict;
use FileHandle;
use Carp;

# Set to 1 to enable experimental caching capability
$USE_CACHING = 0;

################################################################################

package Mail::Folder::SlowReader;

require Exporter;

no strict;

@ISA = qw(Exporter DynaLoader);
@EXPORT = qw();
@EXPORT_OK = qw( $DEBUG SETUP_CACHE );

use strict;
use vars qw( $VERSION $DEBUG );

$VERSION = '1.01';

my %HEADER_PATTERNS = (
'^TO:' =>
  '(^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope|Apparently(-Resent)?)-To):)',
'^FROM_DAEMON:' =>
  '(^(Mailing-List:|Precedence:.*(junk|bulk|list)|To: Multiple recipients of |(((Resent-)?(From|Sender)|X-Envelope-From):|>?From )([^>]*[^(.%@a-z0-9])?(Post(ma?(st(e?r)?|n)|office)|(send)?Mail(er)?|daemon|m(mdf|ajordomo)|n?uucp|LIST(SERV|proc)|NETSERV|o(wner|ps)|r(e(quest|sponse)|oot)|b(ounce|bs\.smtp)|echo|mirror|s(erv(ices?|er)|mtp(error)?|ystem)|A(dmin(istrator)?|MMGR|utoanswer))(([^).!:a-z0-9][-_a-z0-9]*)?[%@>\t ][^<)]*(\(.*\).*)?)?))',
'^FROM_MAILER:' =>
  '(^(((Resent-)?(From|Sender)|X-Envelope-From):|>?From)([^>]*[^(.%@a-z0-9])?(Post(ma(st(er)?|n)|office)|(send)?Mail(er)?|daemon|mmdf|n?uucp|ops|r(esponse|oot)|(bbs\.)?smtp(error)?|s(erv(ices?|er)|ystem)|A(dmin(istrator)?|MMGR))(([^).!:a-z0-9][-_a-z0-9]*)?[%@>\t][^<)]*(\(.*\).*)?)?$([^>]|$))',
);

# Need this for a lookahead.
my $READ_CHUNK_SIZE = 20000;

$DEBUG = 0;

# The class-wide cache, which will be read and written when necessary. i.e.
# read when an folder reader object is created which uses caching, and
# written when a different cache is specified, or when the program exits, 
my $CACHE = undef;

my %DEFAULT_CACHE_OPTIONS = (
  'file_name' => "$ENV{'HOME'}/.grepmail-cache",
);

my %CACHE_OPTIONS;

my $USING_CACHE = 0;

my $UPDATING_CACHE = 0;

my $CACHE_MODIFIED = 0;

#-------------------------------------------------------------------------------

# Outputs debug messages if $DEBUG is true. Be sure to return 1 so code like
# 'dprint "blah\n" and exit' works.

sub dprint
{
  return 1 unless $DEBUG;

  my $message = join '',@_;

  foreach my $line (split /\n/, $message)
  {
    warn "DEBUG (" . __PACKAGE__ . "): $line\n";
  }

  return 1;
}

#-------------------------------------------------------------------------------

sub SETUP_CACHE
{
  my $options = shift;

  # Load Storable if we need to
  unless (defined $Storable::VERSION)
  {
    if (eval 'require Storable;')
    {
      import Storable;
    }
    else
    {
      warn __PACKAGE__ . ": caching is enabled, " .
        "but you do not have Storable. " .
        "Get it from CPAN.\n";
      exit 1;
    }
  }

  # See if the client is setting up a different cache
  if (defined $options && exists $options->{file_name} &&
    $options->{file_name} ne $CACHE_OPTIONS{file_name})
  {
    dprint "New cache file specified--writing old cache if necessary.";
    _WRITE_CACHE() if $USING_CACHE && $CACHE_MODIFIED;
    undef $CACHE;
  }

  # Override the default settings with the user-specified ones
  %CACHE_OPTIONS = %DEFAULT_CACHE_OPTIONS;
  map { $CACHE_OPTIONS{$_} = $options->{$_} } keys %$options;

  _READ_CACHE();

  $USING_CACHE = 1;
  $CACHE_MODIFIED = 0;
}

#-------------------------------------------------------------------------------

sub _READ_CACHE
{
  my $self = shift;

  dprint "Reading cache";

  unless (-e $CACHE_OPTIONS{file_name})
  {
    dprint "Can't find cache file \"$CACHE_OPTIONS{file_name}\" to read";
    return;
  }

  # Unserialize using Storable
  $CACHE = retrieve($CACHE_OPTIONS{file_name});
}

#-------------------------------------------------------------------------------

sub _WRITE_CACHE
{
  # In case this is called during cleanup following an error loading
  # Storable
  return unless defined $Storable::VERSION;

  dprint "Writing cache";

  # Serialize using Storable
  store($CACHE, $CACHE_OPTIONS{file_name});
}

#-------------------------------------------------------------------------------

# Write the cache when the program exits
sub END
{
  dprint "Program is exiting. Writing cache.";

  _WRITE_CACHE() if $USING_CACHE && $CACHE_MODIFIED;
}

#-------------------------------------------------------------------------------

# Options: 
# - file_name: the name of the file. This must be set for caching to occur.
# - file_handle: the file handle to read from
# - use_cache: set to true if you want to use the cache
# - cache_options: a reference to a hash containing cache options
sub new
{
  my ($proto, $options) = @_;

  my $class = ref($proto) || $proto;
  my $self  = {};
  bless ($self, $class);

  $self->{line_number} = 1;

  $self->{file_handle} = undef;
  $self->{file_handle} = $options->{file_handle}
    if exists $options->{file_handle};

  # The buffer information. (Used when caching is not enabled)
  $self->{read_buffer} = '';
  $self->{start} = 0;
  $self->{end} = 0;

  $self->{end_of_file} = 0;

  # The line number of the last read email.
  $self->{email_line_number} = 0;


  # This is the 0-based number of the email. We'll use it as an index into the
  # cache, if the cache is being used.
  $self->{email_number} = 0;

  # We need the file name as a key to the cache
  $self->{file_name} = $options->{file_name};

  $self->_print_debug_information();

  $self->_validate_and_initialize_cache_entry() if $USING_CACHE;

  return $self;
}

#-------------------------------------------------------------------------------

sub _print_debug_information
{
  my $self = shift;

  return unless $DEBUG;

  dprint "Version: $VERSION";

  dprint "Email file: $self->{file_name}";
  dprint "Valid cache entry exists: " .
    ($#{ $CACHE->{$self->{file_name}}{lengths} } != -1 ? "Yes" : "No");
}

#-------------------------------------------------------------------------------

sub _validate_and_initialize_cache_entry
{
  my $self = shift;

  if (!defined $self->{file_name})
  {
    warn __PACKAGE__ . ": no file name, so caching is disabled.\n";

    $USING_CACHE = 0;
  }
  else
  {
    my @stat = stat $self->{file_name};

    # The file should always exist at this point
    die "The file $self->{file_name} does not exist!"
      unless scalar(@stat);

    my $size = $stat[7];
    my $time_stamp = $stat[9];

    if (exists $CACHE->{$self->{file_name}})
    {
      if ($CACHE->{$self->{file_name}}{size} != $size ||
        $CACHE->{$self->{file_name}}{time_stamp} != $time_stamp)
      {
        dprint "Size or time stamp has changed for file " .
          $self->{file_name} . ". Invalidating cache entry";

        delete $CACHE->{$self->{file_name}};
      }
    }

    if (exists $CACHE->{$self->{file_name}})
    {
      $UPDATING_CACHE = 0;
    }
    else
    {
      $CACHE->{$self->{file_name}}{size} = $size;
      $CACHE->{$self->{file_name}}{time_stamp} = $time_stamp;
      $CACHE->{$self->{file_name}}{lengths} = [];
      $UPDATING_CACHE = 1;
    }
  }
}

#-------------------------------------------------------------------------------

# Returns true if the file handle has been fully read
sub end_of_file
{
  my $self = shift;

  return $self->{end_of_file};
}

#-------------------------------------------------------------------------------

# The line number of the last email read
sub line_number
{
  my $self = shift;

  return $self->{email_line_number};
}

#-------------------------------------------------------------------------------

# Reads an email from the file and returns it.
# Preconditions:
# - file handle is set and open
# - not end of file
sub _cache_read_next_email
{
  my $self = shift;

  dprint "Using cache" if $DEBUG;

  $self->{email_line_number} = $self->{line_number};
  my $email_length = 
    $CACHE->{$self->{file_name}}{lengths}[$self->{email_number}];

  while (read($self->{file_handle}, $self->{read_buffer}, $email_length))
  {
    last if $email_length <= length($self->{read_buffer});
  }

  $self->{start} = 0;
  $self->{end} = $email_length;

  if (eof $self->{file_handle} &&
    $self->{end} == length($self->{read_buffer}))
  {
    $self->{end_of_file} = 1;
  }

  $self->{line_number} +=
    $CACHE->{$self->{file_name}}{line_numbers}[$self->{email_number}];

  $self->{email_number}++;
}

#-------------------------------------------------------------------------------

# Reads an email from the file and returns it.
# Preconditions:
# - file handle is set and open
# - not end of file
sub _noncache_read_next_email
{
  my $self = shift;

  dprint "Using cache" if $DEBUG;

  $self->{email_line_number} = $self->{line_number};

  $self->{start} = $self->{end};

  # Look for the start of the next email
  LOOK_FOR_NEXT_HEADER:
  while ($self->{read_buffer} =~ m/^From\s
    (
      # Skip names, months, days
      (?> [^:]+ ) 
      # Match time
      (?: :\d\d){1,2}
      # Match time zone (EST), hour shift (+0500), and-or year
      (?: \s+ (?: [A-Z]{2,3} | [+-]?\d{4} ) ){1,3}
    )$/xmg)
  {
    $self->{end} = pos($self->{read_buffer}) - length($1) - 5;

    # Don't stop on email header for the first email in the buffer
    next unless $self->{end};

    # Keep looking if the header we found is part of a "Begin Included
    # Message".
    my $end_of_string = substr($self->{read_buffer}, $self->{end}-200, 200);
    next if $end_of_string =~
        /\n-----(?: Begin Included Message |Original Message)-----\n[^\n]*\n*$/i;

    # Found the next email!
    my $email = substr($self->{read_buffer}, $self->{start}, $self->{end}-$self->{start});
    $self->{line_number} += ($email =~ tr/\n//);

    if ($UPDATING_CACHE)
    {
      dprint("Storing data into cache, length " . length($email)) if $DEBUG;

      $CACHE->{$self->{file_name}}{lengths}[$self->{email_number}] =
        length($email);

      $CACHE->{$self->{file_name}}{line_numbers}[$self->{email_number}] =
        $self->{email_line_number};

      $CACHE_MODIFIED = 1;

      $self->{email_number}++;
    }

    return $email;
  }

  # Didn't find next email in current buffer. Most likely we need to read some
  # more of the mailbox. Shift the current email to the front of the buffer
  # unless we've already done so.
  $self->{read_buffer} = substr($self->{read_buffer},$self->{start})
    unless $self->{start} == 0;
  $self->{start} = 0;

  # Start looking at the end of the buffer, but back up some in case the edge
  # of the newly read buffer contains the start of a new header. I believe the
  # RFC says header lines can be at most 90 characters long.
  my $search_position = length($self->{read_buffer}) - 90;
  $search_position = 0 if $search_position < 0;

  # Can't use sysread because it doesn't work with ungetc
  if ($READ_CHUNK_SIZE == 0)
  {
    local $/ = undef;

    if (eof $self->{file_handle})
    {
      $self->{end_of_file} = 1;

      if ($UPDATING_CACHE)
      {
        dprint("Storing data into cache, length " .
          length($self->{read_buffer})) if $DEBUG;

        $CACHE->{$self->{file_name}}{lengths}[$self->{email_number}] =
          length($self->{read_buffer});

        $CACHE->{$self->{file_name}}{line_numbers}[$self->{email_number}] =
          $self->{email_line_number};

        $CACHE_MODIFIED = 1;

        $self->{email_number}++;
      }

      return $self->{read_buffer};
    }
    else
    {
      # < $self->{file_handle} > doesn't work, so we use readline
      $self->{read_buffer} = readline($self->{file_handle});
      pos($self->{read_buffer}) = $search_position;
      goto LOOK_FOR_NEXT_HEADER;
    }
  }
  else
  {
    if (read($self->{file_handle}, $self->{read_buffer}, $READ_CHUNK_SIZE,
      length($self->{read_buffer})))
    {
      pos($self->{read_buffer}) = $search_position;
      goto LOOK_FOR_NEXT_HEADER;
    }
    else
    {
      $self->{end_of_file} = 1;

      if ($UPDATING_CACHE)
      {
        dprint("Storing data into cache, length " .
          length($self->{read_buffer})) if $DEBUG;

        $CACHE->{$self->{file_name}}{lengths}[$self->{email_number}] =
          length($self->{read_buffer});

        $CACHE->{$self->{file_name}}{line_numbers}[$self->{email_number}] =
          $self->{email_line_number};

        $CACHE_MODIFIED = 1;

        $self->{email_number}++;
      }

      return $self->{read_buffer};
    }
  }
}

#-------------------------------------------------------------------------------

# Reads an email from the file and returns it.
# Preconditions:
# - file handle is set and open
# - not end of file
sub read_next_email
{
  my $self = shift;

  if ($USING_CACHE && !$UPDATING_CACHE)
  {
    $self->_cache_read_next_email();
    return $self->{read_buffer};
  }
  else
  {
    return $self->_noncache_read_next_email();
  }
}

################################################################################

package main;

# Internal function return values.
my $PRINT                 = 0;
my $DONE                  = 1;
my $SKIP                  = 2;
my $CONTINUE              = 3;
my $NONE                  = 4;
my $BEFORE                = 5;
my $AFTER                 = 6;
my $ON                    = 7;
my $NODATE                = 8;
my $BETWEEN               = 9;
my $LESS_THAN             = 10;
my $LESS_THAN_OR_EQUAL    = 11;
my $GREATER_THAN          = 12;
my $GREATER_THAN_OR_EQUAL = 13;
my $EQUAL                 = 14;

#-------------------------------------------------------------------------------

# Outputs debug messages with the -D flag. Be sure to return 1 so code like
# 'dprint "blah\n" and exit' works.

sub dprint
{
  return 1 unless $opts{'D'};

  my $message = join '',@_;

  foreach my $line (split /\n/, $message)
  {
    warn "DEBUG: $line\n";
  }

  return 1;
}

#-------------------------------------------------------------------------------

# Print a nice error message before exiting

sub reportAndExit
{
  my $message = shift;

  warn "grepmail: $message.\n";
  exit 1;
}

#-------------------------------------------------------------------------------

# Filter signals to print error messages when CTRL-C is caught, a pipe is
# empty, a pipe is killed, etc.

my %signals_and_messages = (
  'PIPE' => 'Broken Pipe',
  'HUP' => 'Hangup',
  'INT' => 'Canceled',
  'QUIT' => 'Quit',
  'SEGV' => 'Segmentation violation',
  'TERM' => 'Terminated',
);

# We'll store a copy of the original signal handlers and call them when we're
# done. This helps when running under the debugger.
my %old_SIG = %SIG;

sub signalHandler
{
  my $signal = $_[0];

  $old_SIG{$signal}->(@_) if $old_SIG{$signal};

  reportAndExit($signals_and_messages{$signal});
}

# Delete the HUP signal for Windows, where it doesn't exist
delete $signals_and_messages{HUP} if $^O eq 'MSWin32';

# We have to localize %SIG to prevent odd bugs from cropping up (see
# changelog). Using an array slice on %SIG, I assign an array consisting of as
# many copies of \&signalHandler as there are keys in %signals_and_messages.
local @SIG{keys %signals_and_messages} =
  (\&signalHandler) x keys %signals_and_messages;

################################ MAIN PROGRAM #################################

# PROCESS ARGUMENTS
my ($opts_ref,@remaining_arguments) = Get_Options(@ARGV);
%opts = %$opts_ref;

# Initialize seen messages data structure to empty.
%message_ids_seen = ();

# Save the command line for later when we try to decompress standard input
{
  # Need to quote arguments with spaces
  my @args = @ARGV;
  grep { $_ = "'$_'" if index($_, ' ') != -1; $_ } @args;

  $commandLine = "$0 @args";
}

Print_Debug_Information($commandLine);

sub ProcessDate($);
sub ProcessSize($);
sub GetFiles(@);

# Check for -E flag incompatibilities.
if ($opts{'E'})
{
  # Have to do -Y before -h because the former implies the latter
  my @options = qw(e M S Y);
  for my $option (@options)
  {
    if ($opts{$option})
    {
      reportAndExit "-$option can not be used with -E";
    }
  }

  if ($opts{'i'})
  {
    reportAndExit "-i can not be used with -E. Use -E '\$email =~ /pattern/i' instead";
  }

  if ($opts{'b'})
  {
    reportAndExit "-b can not be used with -E. Use -E '\$email_body =~ /pattern/' instead";
  }

  if ($opts{'h'})
  {
    reportAndExit "-h can not be used with -E. Use -E '\$email_header =~ /pattern/' instead";
  }
}

# Make the pattern insensitive if we need to
$pattern = "(?i)$pattern" if ($opts{'i'});

my ($dateRestriction, $date1, $date2);

if (defined $opts{'d'})
{
  ($dateRestriction,$date1,$date2) = ProcessDate($opts{'d'});
}
else
{
  $dateRestriction = $NONE;
}

my ($sizeRestriction, $size1, $size2);

if (defined $opts{'s'})
{
  ($sizeRestriction,$size1,$size2) = ProcessSize($opts{'s'});
}
else
{
  $sizeRestriction = $NONE;
}

dprint "PATTERN: $pattern\n";
dprint "FILES: @remaining_arguments\n";
dprint "DATE RESTRICTION: $dateRestriction\n";
dprint "FIRST DATE: $date1\n" unless $dateRestriction == $NONE;
dprint "SECOND DATE: $date2\n" unless $dateRestriction == $NONE;
dprint "SIZE RESTRICTION: $sizeRestriction\n";
dprint "FIRST SIZE: $size1\n" unless $sizeRestriction == $NONE;
dprint "SECOND SIZE: $size2\n" unless $sizeRestriction == $NONE;

ValidatePattern($pattern);

my @files = GetFiles(@remaining_arguments);

# If the user provided input files...
if (@files)
{
  HandleInputFiles(@files);
}
# Using STDIN
else
{ 
  HandleStandardInput();
}

exit 0;

#-------------------------------------------------------------------------------

sub Get_Options
{
  local @ARGV = @_;

  # Print usage error if no arguments given
  warn "No arguments given.\n\n" . usage() and exit(1) unless @ARGV;

  # Check for --help, the standard usage command, or --version.
  print help() and exit(0) if grep { /^--help$/ } @ARGV;
  print "$VERSION\n" and exit(0) if grep { /^--version$/ } @ARGV;

  my @valid_options =
    qw( D e E f F i q h b v l r R a M m s u n Z H S d V X Y );

  my %opts;

  # Initialize all options to zero.
  map { $opts{$_} = 0; } @valid_options;

  # And some to non-zero.
  $opts{'d'} = $opts{'V'} = undef;
  $opts{'X'} = '^-- $';

  # Ensure valid options. ALSO UPDATE 2ND GETOPT CALL BELOW
  getopt("efEdsXY",\%opts);

  # Here we have to deal with the possibility that the user specified the
  # search pattern without the -e flag.

  # getopts stops as soon as it sees a non-flag, so $ARGV[0] may contain the
  # pattern with more flags after it.
  unless ($opts{'e'} || $opts{'E'})
  {
    my $missing_flags = '';

    foreach my $flag (keys %opts)
    {
      $missing_flags .= $flag unless $opts{$flag};
    }

    $missing_flags = "[$missing_flags]";

    # If it looks like more flags are following, then grab the pattern and
    # process them.
    if ( $#ARGV > 0 && $ARGV[1] =~ /^-$missing_flags$/ )
    {
      $pattern = shift @ARGV;
      getopt("fdsXY",\%opts);
    }
    # If we've seen a -d, -f, -s, or -u flag, and it doesn't look like there are
    # flags following $ARGV[0], then look at the value in $ARGV[0]
    elsif ( ( defined $opts{'d'} || $opts{'f'} || $opts{'s'} || $opts{'u'} ) &&
         ( $#ARGV <= 0 ||
           ( $#ARGV > 0 && $ARGV[1] !~ /^-$missing_flags$/ )
         )
       )
    {
      # If $ARGV[0] looks like a file we assume there was no pattern and
      # set a default pattern of "." to match everything.
      if ($#ARGV != -1 && -f $ARGV[0])
      {
        $pattern = '.';
      }
      # Otherwise we take the pattern and move on
      else
      {
        $pattern = shift @ARGV;
      }
    }
    # If we still don't have a pattern or any -d, -f, -s, or -u flag, we assume
    # that $ARGV[0] is the pattern
    elsif (!defined $opts{'d'} && !$opts{'f'} && !$opts{'s'} && !$opts{'u'})
    {
      $pattern = shift @ARGV;
    }
  }

  if ($opts{'e'} || $opts{'E'})
  {
    warn "You specified two search patterns.\n" and exit (1)
      if defined $pattern;

    if ($opts{'e'})
    {
      $pattern = $opts{'e'};
    }
    else
    {
      $pattern = $opts{'E'};
    }
  }
  elsif (defined $opts{'V'})
  {
    # Print version and exit if we need to
    print "$VERSION\n";
    exit (0);
  }
  elsif (!defined $pattern)
  {
    # The only times you don't have to specify the pattern is when -d, -f, -s, or -u
    # is being used. This should catch people who do "grepmail -h" thinking
    # it's help.
    warn "Invalid arguments.\n\n" . usage() and exit (1)
      unless defined $opts{'d'} || $opts{'f'} || $opts{'s'} || $opts{'u'};

    $pattern = '.';
  }

  if (defined $opts{'d'})
  {
    if (eval 'require Date::Parse;')
    {
      import Date::Parse;
    }
    else
    {
      warn 'You specified -d, but do not have Date::Parse. ' .
            "Get it from CPAN.\n";
      exit (1);
    }
  }

  $opts{'h'} = 1 if $opts{'Y'};

  # Make sure no unknown flags were given
  foreach my $option (keys %opts)
  {
    unless (grep {/^$option$/} @valid_options)
    {
      warn "Invalid argument: \"$option\".\n\n" . usage() and exit (1)
    }
  }

  return (\%opts, @ARGV);
}

#-------------------------------------------------------------------------------

sub Print_Debug_Information
{
  my $commandLine = shift;

  return unless $opts{'D'};

  dprint "Version: $VERSION";

  dprint "Command line was (special characters not escaped):";
  dprint "  $commandLine";

  if (defined $Date::Parse::VERSION)
  {
    dprint "Date::Parse VERSION: $Date::Parse::VERSION";
  }

  dprint "Options are:";
  foreach my $i (sort keys %opts)
  {
    if (defined $opts{$i})
    {
      dprint "  $i: $opts{$i}";
    }
    else
    {
      dprint "  $i: undef";
    }
  }

  dprint "INC is:";
  foreach my $i (@INC)
  {
    dprint "  $i";
  }
}

#-------------------------------------------------------------------------------

# Dies if the given pattern's syntax is invalid
sub ValidatePattern
{
  my $pattern = shift;

  local $@;

  if ($opts{'E'})
  {
    eval {if ($pattern) {}};
    reportAndExit "The match condition \"$pattern\" is invalid.\n" if $@;
  }
  else
  {
    eval {'string' =~ /$pattern/};
    reportAndExit "The pattern \"$pattern\" is invalid.\n" if $@;
  }
}

#-------------------------------------------------------------------------------

# Get a list of files, taking recursion into account if necessary.

sub GetFiles(@)
{
  my @files_and_directories = @_;

  # We just return what we were given unless we need to recurse subdirectories.
  return @files_and_directories unless $opts{'R'};

  my @files;

  foreach my $arg (@files_and_directories)
  {
    if (-f $arg)
    {
      push @files, $arg;
    }
    elsif( -d $arg)
    {
      dprint "Recursing directory $arg looking for files...";

      unless (eval "require File::Find;")
      {
        warn "You specified -R, but do not have File::Find. ".
              "Get it from CPAN.\n";
        exit (1);
      }

      import File::Find;

      # Gets all plain files in directory and descendents. Puts them in @files
      $File::Find::name = '';
      find(sub {push @files,"$File::Find::name" if -f $_}, $arg);
    }
    else
    {
      # Ignore unknown file types
    }
  }

  return @files;
}

#-------------------------------------------------------------------------------

sub HandleInputFiles
{
  my @files = @_;

  # For each input file...
  foreach my $file (@files)
  {
    dprint '#'x70;
    dprint "Processing file $file";

    # First of all, silently ignore empty files...
    next if -z $file;

    # ...and also ignore directories.
    if (-d $file)
    {
      warn "** Skipping directory: '$file'\n" unless $opts{'q'};
      next;
    }

    $file = Search_Mailbox_Directories($file) unless -e $file;

    my $fileHandle = new FileHandle;
    my ($filter,$filterError);

    # If it's not a compressed file
    if ($file !~ /\.(gz|Z|bz2|tz)$/)
    {
      if (-B $file)
      {
        warn "** Skipping binary file: '$file'\n" unless $opts{'q'};
        next;
      }

      unless ($fileHandle->open($file))
      {
        warn "** Can't open $file: $!, skipping\n" unless $opts{'q'};
        next;
      }
    }
    # If it is a tzipped file
    elsif ($file =~ /\.tz$/)
    {
      dprint "Calling tzip to decompress file.";

      $filter = 'tzip';

      use vars qw(*OLDSTDERR);
      open OLDSTDERR,">&STDERR" or reportAndExit "Can't save STDERR: $!\n";
      open STDERR,">/dev/null"
        or reportAndExit "Can't redirect STDERR to /dev/null: $!\n";

      unless ($fileHandle->open("tzip -cd '$file'|"))
      {
        $filterError = $!;
      }

      open STDERR,">&OLDSTDERR" or reportAndExit "Can't restore STDERR: $!\n";
    }
    # If it is a gzipped file
    elsif ($file =~ /\.(gz|Z)$/)
    {
      dprint "Calling gunzip to decompress file.";

      $filter = 'gunzip';

      use vars qw(*OLDSTDERR);
      open OLDSTDERR,">&STDERR" or reportAndExit "Can't save STDERR: $!\n";
      open STDERR,">/dev/null"
        or reportAndExit "Can't redirect STDERR to /dev/null: $!\n";

      unless ($fileHandle->open("gzip -dc '$file'|"))
      {
        $filterError = $!;
      }

      open STDERR,">&OLDSTDERR" or reportAndExit "Can't restore STDERR: $!\n";
    }
    # If it is a bzipped file
    elsif ($file =~ /\.bz2$/)
    {
      dprint "Calling bzip2 to decompress file.";

      $filter = 'bzip2';

      use vars qw(*OLDSTDERR);
      open OLDSTDERR,">&STDERR" or reportAndExit "Can't save STDERR: $!\n";
      open STDERR,">/dev/null"
        or reportAndExit "Can't redirect STDERR to /dev/null: $!\n";

      unless ($fileHandle->open("bzip2 -dc '$file'|"))
      {
        $filterError = $!;
      }

      open STDERR,">&OLDSTDERR" or reportAndExit "Can't restore STDERR: $!\n";
    }

    if ($filterError)
    {
      warn "** Can't execute \"$filter\" for file \"$file\": $filterError, ".
           "skipping\n" unless $opts{'q'};
      next;
    }

    unless (DataOnFileHandle($fileHandle))
    {
      unless ($fileHandle->close())
      {
        warn "** Can't execute \"$filter\" for file \"$file\": ".
             "skipping\n" unless $opts{'q'};
      }
      next;
    }

    if (!$opts{'F'} && !IsMailbox($fileHandle))
    {
      warn "** Skipping non-mailbox ASCII file: '$file'\n" unless $opts{'q'};
      next;
    }

    ProcessMailFile($fileHandle,$file,$#files+1);

    $fileHandle->close();
  }
}

#-------------------------------------------------------------------------------

sub Search_Mailbox_Directories
{
  my $file = shift;

  return $file unless exists $ENV{MAIL};

  my $mail_folder = $ENV{MAIL};

  for ('', 'mail/', 'Mail/', 'Mailbox/'){
    my $path_and_file = "$mail_folder/${_}$file";
    return $path_and_file if -e $path_and_file;
  }

  return $file;
}

#-------------------------------------------------------------------------------

sub HandleStandardInput
{
  dprint "Handling STDIN";

  # We have to implement our own -B and -s, because STDIN gets eaten by them
  binmode STDIN;

  my ($testChars,$isEmpty,$isBinary);

  my $fileHandle = new FileHandle;
  $fileHandle->open('-');

  $isEmpty = 0;
  $isBinary = 0;

  my $readResult = read($fileHandle,$testChars,200);

  reportAndExit "Can't read from standard input" unless defined $readResult;

  $isEmpty = 1 if $readResult == 0;

  reportAndExit "No data on standard input" if $isEmpty;

  # Do -B on the data stream
  unless ($isEmpty)
  {
    my $data_length = length $testChars;
    my $bin_length = $testChars =~ tr/[\t\n\x20-\x7e]//c;
    my $non_bin_length = $data_length - $bin_length;
    $isBinary = ($non_bin_length / $data_length) > .70 ? 0 : 1;
  }

  PutBackString($fileHandle,$testChars);

  # If it looks binary and is non-empty, try to uncompress it. Here we're
  # calling another copy of grepmail through the open command.
  if ($isBinary)
  {
    my $filter;

    # This seems to work. I'm not sure what the "proper" way to distinguish
    # between gzip'd and bzip2'd and tzip'd files is.
    if (substr($testChars, 0, 2) eq 'TZ')
    {
      dprint 'Trying to decompress using tzip.';
      $filter = 'tzip -dc';
    }
    elsif (substr($testChars, 0, 2) eq 'BZ')
    {
      dprint 'Trying to decompress using bzip2.';
      $filter = 'bzip2 -d';
    }
    else
    {
      dprint 'Trying to decompress using gunzip2.';
      $filter = 'gzip -dc';
    }

    # Implicit fork
    my $decompressedFileHandle = new FileHandle;
    my $pid = $decompressedFileHandle->open('-|');

    unless (defined($pid))
    {
      warn "** Can't fork to decompress standard input, stopping\n"
        unless $opts{'q'};
      return;
    }

    # In child. Write to the parent, giving it all the data to decompress.
    # We have to do it this way because other methods (e.g. open2) require us
    # to feed the filter as we use the filtered data. This method allows us to
    # keep the remainder of the code the same for both compressed and
    # uncompressed input.
    unless ($pid)
    {
      open(FRONT_OF_PIPE, "|$filter 2>/dev/null")
        or warn "** Can't execute \"$filter\" on STDIN: $!\n"
          unless $opts{'q'};

      while (!eof $fileHandle)
      {
        my $temp = <$fileHandle>;
        print FRONT_OF_PIPE $temp;
      }

      $fileHandle->close()
        or warn "** Can't execute \"$filter\" on STDIN: $!\n"
          unless $opts{'q'};
      # We intentionally don't check for error here. This is because the
      # parent may have aborted, in which case we let it take care of
      # error messages. (e.g. Non-mailbox standard input.)
      close FRONT_OF_PIPE;

      return;
    }

    # In parent
    if (!$opts{'F'} && !IsMailbox($decompressedFileHandle))
    {
      warn "** Skipping non-mailbox standard input\n" unless $opts{'q'};

      $decompressedFileHandle->close()
        or warn "** Can't execute \"$filter\" on STDIN: $!\n"
          unless $opts{'q'};

      return;
    }

    ProcessMailFile($decompressedFileHandle,"Standard input",1);

    $decompressedFileHandle->close()
      or warn "** Can't execute \"$filter\" on STDIN: $!\n"
        unless $opts{'q'};
  }
  # Otherwise process it directly
  else
  {
    if (!$opts{'F'} && !IsMailbox($fileHandle))
    {
      warn "** Skipping non-mailbox standard input\n" unless $opts{'q'};
      return;
    }

    ProcessMailFile($fileHandle,"Standard input",1);

    $fileHandle->close()
      or warn "** Can't process STDIN: $!\n"
        unless $opts{'q'};
  }
}

#-------------------------------------------------------------------------------

# Checks to see if there is data on a filehandle, without reading that data.

sub DataOnFileHandle
{
  my $fileHandle = shift;

  my $buffer = <$fileHandle>;

  return 0 unless defined $buffer;

  PutBackString($fileHandle,$buffer);

  return $buffer ? 1 : 0;
}

#-------------------------------------------------------------------------------

# Puts a string back on a file handle

sub PutBackString
{
  my $fileHandle = shift;
  my $string = shift;

  for (my $char_position=length($string)-1;$char_position >=0; $char_position--)
  {
    $fileHandle->ungetc(ord(substr($string,$char_position,1)));
  }
}

#-------------------------------------------------------------------------------

# Detects whether an ASCII file is a mailbox, based on whether it has
# a line whose prefix is 'From' or 'X-From-Line:' or 'X-Draft-From:',
# and another line whose prefix is 'Received ', 'Date:', 'Subject:',
# 'X-Status:', 'Status:', or 'To:'.

sub IsMailbox
{
  my $fileHandle = shift @_;

  # Read whole paragraphs
  local $/ = "\n\n";

  # Read a paragraph to get the header.
  my $buffer = <$fileHandle>;
  PutBackString($fileHandle,$buffer);
  
  # X-From-Line is used by Gnus, and From is used by normal Unix
  # format. Newer versions of Gnus use X-Draft-From
#  if ($buffer =~ /^(X-Draft-From:|X-From-Line:|From)\s/im &&
#      $buffer =~ /^(Date|Subject|X-Status|Status|To):\s/im)
  if ($buffer =~ /^(X-Draft-From:|X-From-Line:|From:?)\s/im &&
      $buffer =~ /^(Date|To|Bcc):\s/im)
  {
    return 1;
  }
  else
  {
    return 0;
  }
}

#-------------------------------------------------------------------------------

sub Do_Simple_Pattern_Matching
{
  my $email = shift;
  my $email_header = shift;
  my $email_body = shift;
  my $fileHandle = shift;
  my $fileName = shift;
  my $number_files = shift;
  my $numberOfMatches = shift;
  my $line = shift;

  dprint "Checking for early match or abort based on header information."
    if $opts{'D'};

  my ($result,$matchesHeader) =
    AnalyzeHeader($email_header,$email,$fileHandle,$pattern,1);

  if ($result == $SKIP)
  {
    dprint "Doing an early abort based on header." if $opts{'D'};
    return ($CONTINUE,$numberOfMatches);
  }

  if ($result == $PRINT)
  {
    dprint "Doing an early printout based on header." if $opts{'D'};
    if ($opts{'l'})
    {
      print "$fileName\n";
  
      # We can return since we found at least one email that matches.
      return ($DONE,$numberOfMatches);
    }
    elsif ($opts{'r'})
    {
      $numberOfMatches++;
      return ($CONTINUE,$numberOfMatches);
    }
    else
    {
      ConvertEmailToMboxAndPrintIt($fileName,$$email_header,$$email_body,$number_files,$line)
        if $opts{'u'} && NotADuplicate($$email_header) || !$opts{'u'};

      return ($CONTINUE,$numberOfMatches);
    }
  }

  #----------------------------------------------------------------

  my $matchesBody = 0;

  my $signature_offset = undef;

  if ($opts{'S'} && $$email_body =~ m/($opts{'X'})/mg)
  {
    $signature_offset = pos($$email_body) - length($1);
    pos($$email_body) = 0;
  }

  # Ignore the MIME attachments if -M was specified
  if ($opts{'M'} &&
     ($$email_header =~ /^Content-Type:.*?boundary=(?:"([^"]*)"|([^\n]*))/ism))
  {
    my $boundary;
    $boundary = $1 if defined $1;
    $boundary = $2 if defined $2;

    dprint "Found attachments with boundary:\n  $boundary" if $opts{'D'};

    my @attachment_positions;

    # Get each of the binary attachment beginnings and endings.
    while ($$email_body =~ m/\n((?:--)?\Q$boundary\E(?:--)?\n(?:(.*?)\n\n)?)/sg)
    {
      my $position = pos($$email_body) - length($1);
      my $header = $2;

      # Remember that the beginning of the next attachment is the
      # end of the previous.
      $attachment_positions[-1]{'end'} = $position
        if @attachment_positions;

      # If it's the beginning of a binary attachment, store the position
      if (defined $header && $header =~ /^Content-Type:\s+(?!text)/i)
      {
        $attachment_positions[$#attachment_positions+1]{'beginning'} =
          $position;
      }
    }

    pos($$email_body) = 0;

    # Now search the body, ignoring any matches in binary
    # attachments.
    # Avoid perl 5.6 bug which causes spurious warning even though
    # $pattern is defined.
    local $^W = 0 if $] >= 5.006 && $] < 5.8;
    SEARCH: while ($$email_body =~ m/($pattern)/omg)
    {
      my $position = pos($$email_body) - length($1);

      last SEARCH if $opts{'S'} &&
        defined $signature_offset && $position > $signature_offset;

      foreach my $attachment (@attachment_positions)
      {
        next SEARCH
          if ($position > $attachment->{'beginning'} &&
              $position < $attachment->{'end'});
      }

      $matchesBody = 1;
      last;
    }
  }
  else
  {
    # Avoid perl 5.6 bug which causes spurious warning even though
    # $pattern is defined.
    local $^W = 0 if $] >= 5.006 && $] < 5.8;
    if ($$email_body =~ m/($pattern)/omg)
    {
      my $position = pos($$email_body) - length($1);

      $matchesBody = 1 unless $opts{'S'} && 
        defined $signature_offset && $position > $signature_offset;
    }
  }

  #----------------------------------------------------------------

  my $matchesSize = IsInSize($email,$sizeRestriction,$size1,$size2);

  #----------------------------------------------------------------

  dprint "Checking for early match or abort based on header, body, " .
    "and size information." if $opts{'D'};

  my $isMatch = 1;
  
  $isMatch = 0 if  $opts{'s'} && !$matchesSize ||
    $opts{'b'} && !$matchesBody ||
    $opts{'h'} && !$matchesHeader ||
    !$opts{'b'} && !$opts{'h'} && !($matchesBody || $matchesHeader);

  if (!$isMatch && !$opts{'v'})
  {
    dprint "Doing an early abort based on header, body, and size."
      if $opts{'D'};
    return ($CONTINUE,$numberOfMatches);
  }
  elsif (!$isMatch && $opts{'v'})
  {
    dprint "Doing an early printout based on header, body, and size."
      if $opts{'D'};
  
    if ($opts{'l'})
    {
      print "$fileName\n";
      
      # We can return since we found at least one email that matches.
      return ($DONE,$numberOfMatches);
    }
    elsif ($opts{'r'})
    {
      $numberOfMatches++;
      return ($CONTINUE,$numberOfMatches);
    }
    else
    {
      ConvertEmailToMboxAndPrintIt($fileName,$$email_header,
        $$email_body,$number_files,$line)
          if $opts{'u'} && NotADuplicate($$email_header) || !$opts{'u'};

      return ($CONTINUE,$numberOfMatches);
    }
  }

  #----------------------------------------------------------------

  dprint "Checking date constraint." if $opts{'D'};

  $isMatch = 1;

  {
    my $matchesDate = Email_Matches_Date($email_header);
    $isMatch = 0 if defined $opts{'d'} && !$matchesDate;

    dprint "Email matches date constraint\n"
      if $opts{'D'} && defined $opts{'d'} && $matchesDate;
    dprint "Email doesn't match date constraint\n"
      if $opts{'D'} && defined $opts{'d'} && !$matchesDate;
  }

  $isMatch = !$isMatch if $opts{'v'};

  # If the match occurred in the right place...
  if ($isMatch)
  {
    dprint "Email matches all patterns and constraints." if $opts{'D'};

    if ($opts{'l'})
    {
      print "$fileName\n";

      # We can return since we found at least one email that matches.
      return ($DONE,$numberOfMatches);
    }
    elsif ($opts{'r'})
    {
      $numberOfMatches++;
    }
    else
    {
      ConvertEmailToMboxAndPrintIt($fileName,$$email_header,
        $$email_body,$number_files,$line)
          if $opts{'u'} && NotADuplicate($$email_header) || !$opts{'u'};
    }
  }
  else
  {
    dprint "Email did not match all patterns and constraints." if $opts{'D'};
  }

  return ($CONTINUE,$numberOfMatches);
}

#-------------------------------------------------------------------------------

sub Do_Complex_Pattern_Matching
{
  my $email = shift;
  my $email_header = shift;
  my $email_body = shift;
  my $fileHandle = shift;
  my $fileName = shift;
  my $number_files = shift;
  my $numberOfMatches = shift;
  my $line = shift;

  dprint "Checking for early match or abort based on header information."
    if $opts{'D'};

  my ($result,$matchesHeader) =
    AnalyzeHeader($email_header,$email,$fileHandle,$pattern,0);

  if ($result == $SKIP)
  {
    dprint "Doing an early abort based on header." if $opts{'D'};
    return ($CONTINUE,$numberOfMatches);
  }

  if ($result == $PRINT)
  {
    dprint "Doing an early printout based on header." if $opts{'D'};
    if ($opts{'l'})
    {
      print "$fileName\n";
  
      # We can return since we found at least one email that matches.
      return ($DONE,$numberOfMatches);
    }
    elsif ($opts{'r'})
    {
      $numberOfMatches++;
      return ($CONTINUE,$numberOfMatches);
    }
    else
    {
      ConvertEmailToMboxAndPrintIt($fileName,$$email_header,$$email_body,$number_files,$line)
        if $opts{'u'} && NotADuplicate($$email_header) || !$opts{'u'};

      return ($CONTINUE,$numberOfMatches);
    }
  }

  #----------------------------------------------------------------

  my $modified_pattern = $pattern;

  $modified_pattern =~ s/\$email_header\b/\$\$email_header/g;
  $modified_pattern =~ s/\$email_body\b/\$\$email_body/g;
  $modified_pattern =~ s/\$email\b/\$\$email/g;
  $modified_pattern =~ s#(=~\s*)/([^/]*)/#$1/$2/om#g;

  my $matchesEmail;
  eval " \$matchesEmail = $modified_pattern ? 1 : 0 ";

  #----------------------------------------------------------------

  my $isMatch = 1;
  $isMatch = 0 unless $matchesEmail;

  if (!$isMatch && !$opts{'v'})
  {
    dprint "Doing an early abort based on header, body, and size."
      if $opts{'D'};
    return ($CONTINUE,$numberOfMatches);
  }
  elsif (!$isMatch && $opts{'v'})
  {
    dprint "Doing an early printout based on header, body, and size.";
  
    if ($opts{'l'})
    {
      print "$fileName\n";
      
      # We can return since we found at least one email that matches.
      return ($DONE,$numberOfMatches);
    }
    elsif ($opts{'r'})
    {
      $numberOfMatches++;
      return ($CONTINUE,$numberOfMatches);
    }
    else
    {
      ConvertEmailToMboxAndPrintIt($fileName,$$email_header,
        $$email_body,$number_files,$line)
          if $opts{'u'} && NotADuplicate($$email_header) || !$opts{'u'};

      return ($CONTINUE,$numberOfMatches);
    }
  }

  #----------------------------------------------------------------

  dprint "Checking date constraint." if $opts{'D'};

  $isMatch = 1;

  {
    my $matchesDate = Email_Matches_Date($email_header);
    $isMatch = 0 if defined $opts{'d'} && !$matchesDate;

    dprint "Email matches date constraint\n"
      if $opts{'D'} && defined $opts{'d'} && $matchesDate;
    dprint "Email doesn't match date constraint\n"
      if $opts{'D'} && defined $opts{'d'} && !$matchesDate;
  }

  $isMatch = !$isMatch if $opts{'v'};

  # If the match occurred in the right place...
  if ($isMatch)
  {
    dprint "Email matches all patterns and constraints." if $opts{'D'};

    if ($opts{'l'})
    {
      print "$fileName\n";

      # We can return since we found at least one email that matches.
      return ($DONE,$numberOfMatches);
    }
    elsif ($opts{'r'})
    {
      $numberOfMatches++;
    }
    else
    {
      ConvertEmailToMboxAndPrintIt($fileName,$$email_header,
        $$email_body,$number_files,$line)
          if $opts{'u'} && NotADuplicate($$email_header) || !$opts{'u'};
    }
  }
  else
  {
    dprint "Email did not match all patterns and constraints." if $opts{'D'};
  }

  return ($CONTINUE,$numberOfMatches);
}

#-------------------------------------------------------------------------------

# This algorithm is complicated by code to short-circuit some
# computations. For example, if the user specified -h but not -b, when
# we can analyze the header for a match and avoid needing to search
# the body, which may be much larger.

sub ProcessMailFile
{
  my $fileHandle = shift @_;
  my $fileName = shift @_;
  my $number_files = shift @_;

  my $numberOfMatches = 0;

  my $folder_reader;

  $Mail::Folder::SlowReader::DEBUG = $opts{'D'};

  if ($fileName eq 'Standard input')
  {
    $folder_reader =
      new Mail::Folder::SlowReader( {
        'file_handle' => $fileHandle,
        'use_cache' => 0,
      } );
  }
  else
  {
    Mail::Folder::SlowReader::SETUP_CACHE() if $USE_CACHING;
    $folder_reader =
      new Mail::Folder::SlowReader( {
        'file_name' => $fileName,
        'file_handle' => $fileHandle,
        'use_cache' => $USE_CACHING,
      } );
  }

  # This is the main loop. It's executed once for each email
  while(1)
  {
    dprint "Reading email" if $opts{'D'};

    # Direct access for performance reasons
    #last if $folder_reader->end_of_file();
    last if $folder_reader->{end_of_file};

    my $email = $folder_reader->read_next_email();

    # Direct access for performance reasons
    #my $line = $folder_reader->line_number();
    my $line = $folder_reader->{email_line_number};

    my ($email_header,$email_body) = split /\n\n/,$email,2;
    $email_header .= "\n\n";

    PrintEmailStatistics($email) if $opts{'D'};

    #----------------------------------------------------------------

    if ($opts{'E'})
    {
      my $result;
      ($result, $numberOfMatches) = 
        Do_Complex_Pattern_Matching(\$email, \$email_header, \$email_body,
          $fileHandle, $fileName, $number_files, $numberOfMatches, $line);

      return if $result == $DONE;
    }
    else
    {
      my $result;
      ($result, $numberOfMatches) = 
        Do_Simple_Pattern_Matching(\$email, \$email_header, \$email_body,
          $fileHandle, $fileName, $number_files, $numberOfMatches, $line);

      return if $result == $DONE;
    }

  }

  print "$fileName: $numberOfMatches\n" if ($opts{'r'});
}

#-------------------------------------------------------------------------------

# Checks that an email is not a duplicate of one already printed. This should
# only be called when $opts{'u'} is true. Also, as a side-effect, it updates
# the %message_ids_seen when it sees an email that hasn't been printed yet.

{
my $tried_to_load_digest_md5;

sub NotADuplicate
{
  my $email_header = shift;

  my ($message_id) = $email_header =~ /^Message-Id:\s*<([^>]+)>/mi;

  if (defined $message_id)
  {
    dprint "Checking uniqueness of message id: $message_id";
  }
  else
  {
    dprint "Email does not have a message id";

    # Try to load Digest::MD5 if we haven't already
    unless (defined $tried_to_load_digest_md5)
    {
      $tried_to_load_digest_md5 = 1;

      if (eval "require Digest::MD5")
      {
        dprint "Digest::MD5 VERSION: $Digest::MD5::VERSION";
        # To prevent warning about variable being used only once
        my $dummy = $Digest::MD5::VERSION;
      }
      else
      {
        dprint "Digest::MD5 could not be loaded";
      }
    }

    # Now create a message id
    if (defined $Digest::MD5::VERSION)
    {
      $message_id = Digest::MD5::md5_hex($email_header);
      dprint "Generated message id $message_id with Digest::MD5";
    }
    else
    {
      $message_id = $email_header;
      dprint "Using email header as message id.";
    }
  }


  my $result;

  if (exists $message_ids_seen{$message_id})
  {
    $result = 0;
    dprint "Found duplicate message";
  }
  else
  {
    $result = 1;
    dprint "Found non-duplicate message";
    $message_ids_seen{$message_id} = 1;
  }

  return $result;
}
}

#-------------------------------------------------------------------------------

# Get the email author from the header or email. Return undef if it can't be
# found

sub GetFromLine
{
  my $email = shift;

  # Remove the body.
  my $header = $email;
  $header =~ s/\n\n.*/\n/s;

  # Avoid perl 5.6 bug which causes spurious warning even though $header is
  # defined.
  local $^W = 0 if $] >= 5.006 && $] < 5.8;
  if ($header =~ /^(From:\s.*)$/im)
  {
    return $1;
  }
  elsif ($header =~ /^(From\s.*)$/im)
  {
    return $1;
  }
  else
  {
    return undef;
  }
}

#-------------------------------------------------------------------------------

# Get the email author from the header or email. Return undef if it can't be
# found

sub GetSubjectLine
{
  my $email = shift;

  # Remove the body.
  my $header = $email;
  $header =~ s/\n\n.*/\n/s;

  # Avoid perl 5.6 bug which causes spurious warning even though $header is
  # defined.
  local $^W = 0 if $] >= 5.006 && $] < 5.8;
  if ($header =~ /^(Subject:\s.*)$/im)
  {
    return $1;
  }
  else
  {
    return undef;
  }
}

#-------------------------------------------------------------------------------

# Get the email author from the header or email. Return undef if it can't be
# found

sub GetDateLine
{
  my $email = shift;

  # Remove the body.
  my $header = $email;
  $header =~ s/\n\n.*/\n/s;

  # Avoid perl 5.6 bug which causes spurious warning even though $pattern is
  # defined.
  local $^W = 0 if $] >= 5.006 && $] < 5.8;
  if ($header =~ /^(Date:\s.*)$/im)
  {
    return $1;
  }
  else
  {
    return undef;
  }
}

#-------------------------------------------------------------------------------

# Print the email author and subject.

sub PrintEmailStatistics
{
  my $email = shift;

  dprint '-'x70;
  dprint "Processing email:";

  my $author = GetFromLine($email);

  if (defined $author)
  {
    dprint "  $author";
  }
  else
  {
    dprint "  [No from line found]";
  }

  my $subject = GetSubjectLine($email);

  if (defined $subject)
  {
    dprint "  $subject";
  }
  else
  {
    dprint "  [No subject line found]";
  }

  my $date = GetDateLine($email);

  if (defined $date)
  {
    dprint "  $date";
  }
  else
  {
    dprint "  [No subject line found]";
  }

  dprint "  Size: " . length $email;
}

#-------------------------------------------------------------------------------

# Returns:
# A result:
# - $PRINT if the email is a match and we need to print it
# - $SKIP if we should skip the current email and go on to the next one
# - $CONTINUE if we need to keep processing the email.
# A boolean for whether the header matches the pattern.
# A boolean for whether the header has the correct date.

# It turns out that -h, -b, -d, -s , -f, and -v have some nasty feature
# interaction. The easy cases are when a constraint is not met--either we skip
# if -v is not specified, or we print if -v is specified.
#
# If a constraint *is* met, we can still do an early abort of there are no other
# constraints, or if we know the values of previously checked constraints.
#
# Finally, -b must be taken into account when analyzing -h matching. Also, we
# don't analyze the date here because it is too darn slow.
sub AnalyzeHeader
{
  my $email_header = shift;
  my $email = shift;
  my $fileHandle = shift;
  my $pattern = shift;
  my $doHeaderMatch = shift;

  # See if the email fails the status flag restriction
  my $matchesStatus = 1;
  if ($opts{'f'})
  {
    foreach my $flag (split //,$opts{'f'})
    {
      $matchesStatus = 0 unless $$email_header =~ /^Status: .*(?i:$flag)/m;
    }

    # Easy cases
    return ($SKIP,0)  if !$opts{'v'} && !$matchesStatus;
    return ($PRINT,1) if  $opts{'v'} && !$matchesStatus;

    # If we know there are no other constraints
    return ($PRINT,1) if !$opts{'v'} &&  $matchesStatus &&
      !$opts{'s'} && !defined $opts{'d'} && $pattern eq '.';
    return ($SKIP,0)  if  $opts{'v'} &&  $matchesStatus &&
      !$opts{'s'} && !defined $opts{'d'} && $pattern eq '.';
  }

  # See if the email header fails the size restriction.
  my $matchesSize = 1;
  if ($opts{'s'})
  {
    $matchesSize = 0 if !IsInSize($email,$sizeRestriction,$size1,$size2);

    # Easy cases
    return ($SKIP,0)  if !$opts{'v'} && !$matchesSize;
    return ($PRINT,1) if  $opts{'v'} && !$matchesSize;

    # If we know there are no other constraints, or we know their values
    return ($PRINT,1) if !$opts{'v'} &&  $matchesSize &&
      $matchesStatus && !defined $opts{'d'} && $pattern eq '.';
    return ($SKIP,0)  if  $opts{'v'} &&  $matchesSize &&
      $matchesStatus && !defined $opts{'d'} && $pattern eq '.';
  }

  if ($doHeaderMatch)
  {
    # See if the header matches the pattern
    # Avoid perl 5.6 bug which causes spurious warning even though $pattern is
    # defined.
    local $^W = 0 if $] >= 5.006 && $] < 5.8;
    my $matchesHeader = Header_Matches_Pattern($email_header,$pattern);

    if ($opts{'h'})
    {
      # Easy cases
      return ($SKIP,0)  if !$opts{'v'} && !$matchesHeader;
      return ($PRINT,1) if  $opts{'v'} && !$matchesHeader;
    }

    # If we know there are no other constraints, or we know their values
    return ($PRINT,1) if !$opts{'v'} &&  $matchesHeader &&
      $matchesSize && $matchesStatus && !defined $opts{'d'} && !$opts{'b'};
    return ($SKIP,0)  if  $opts{'v'} &&  $matchesHeader &&
      $matchesSize && $matchesStatus && !defined $opts{'d'} && !$opts{'b'};


    return ($CONTINUE,$matchesHeader);
  }
  else
  {
    return ($CONTINUE,1);
  }
}

#-------------------------------------------------------------------------------

my $header_pattern = undef;

sub Header_Matches_Pattern
{
  my $email_header = ${shift @_};
  my $pattern = shift;

  return ($email_header =~ /$pattern/om) || 0 unless $opts{'Y'};

  dprint "Searching individual headers.";

  $email_header =~ s/\n(\s+)/$1/g;

  unless (defined $header_pattern)
  {
    $header_pattern = $opts{'Y'};

    for my $special_header_pattern (keys %HEADER_PATTERNS)
    {
      $header_pattern =~
        s/\Q$special_header_pattern\E/$HEADER_PATTERNS{$special_header_pattern}/g;
    }
  }

  for my $header (split(/\n/, $email_header))
  {
    if ($header =~ /$header_pattern/)
    {
      dprint "Header matched header pattern:\n  $header\n";
      return 1 if $header =~ /$pattern/om;
    }
  }
  
  return 0;
}

#-------------------------------------------------------------------------------

sub ConvertEmailToMboxAndPrintIt
{
  my $fileName = shift;
  my $header = shift;
  my $body = shift;
  my $number_files = shift;
  my $line_number = shift;

  ($header,$body) = ConvertEmailToMbox($header,$body);
  PrintEmail($fileName,$header,$body,$number_files,$line_number);
}

#-------------------------------------------------------------------------------

sub ConvertEmailToMbox
{
  my $header = shift;
  my $body = shift;

  dprint "Making email mbox format.";

  # Check for a Gnus email
  if ($header =~ /^X-From-Line:\s+/)
  {
    $header =~ s/^X-From-Line:\s+/From /;
  }

  return ($header,$body);
}

#-------------------------------------------------------------------------------

sub PrintEmail
{
  my $fileName = shift;
  my $header = shift;
  my $body = shift;
  my $number_files = shift;
  my $line_number = shift;

  dprint "Printing email.";

  if ($opts{'n'})
  {
    # Print line-by-line
    while ($header =~ /([^\n]*\n)/g)
    {
      my $line = $1;

      # Add the mailfolder to the headers if -m was given. Careful
      # about line numbers!
      if ($opts{'m'} && $line eq "\n")
      {
        print "$fileName:" if $number_files > 1;
        print "$line_number:X-Mailfolder: $fileName\n";
      }

      print "$fileName:" if $number_files > 1;
      print "$line_number:";
      print $line;
      $line_number++;
    }

    # Don't print the body if -H is specified
    return if $opts{'H'};

    while ($body =~ /([^\n]*\n)/g)
    {
      my $line = $1;

      print "$fileName:" if $number_files > 1;
      print "$line_number:$line";
      $line_number++;
    }
  }
  else
  {
    $header =~ s/\n\n/\nX-Mailfolder: $fileName\n\n/ if $opts{'m'};

    print $header;

    # Don't print the body if -H is specified
    return if $opts{'H'};

    # Print whatever body we've read already.
    print $body;
  }
}

#-------------------------------------------------------------------------------

# Checks to see if the date in the header matches the date specification. The
# date specification can be $NODATE, meaning that the email doesn't have
# a Date: line.

sub Email_Matches_Date($)
{
  my $header = ${shift @_};
  my ($emailDate, $isInDate);
  $emailDate = '';
  $isInDate = 0;

  return 1 unless defined $opts{'d'};

  # RFC 822 allows header lines to be continued on the next line, in
  # which case they must be preceded by whitespace. Let's remove the
  # continuations.
  $header =~ s/\n\s+(\S)/ $1/g;

  if ( ($opts{'a'} && ($header =~ /^Received:\s.*\;\s*(.*?)$/im)) ||
    $header =~ /^Date:\s*(.*)$/im ||
    $header =~ /^From \S+\s+(.*)$/im)
  {
    dprint "Date in email is: $1.";

    $emailDate = str2time($1);

    return IsInDate($emailDate,$dateRestriction,$date1,$date2)
      if defined $emailDate;
    
    warn "** Couldn't parse email date \"$1\". " .
      "Assuming it doesn't match the date constraint **\n";
    warn "  " . GetFromLine($header) . "\n"
        if defined GetFromLine($header);
    warn "  " . GetSubjectLine($header) . "\n"
        if defined GetSubjectLine($header);

    return 0;
  }
  # The email might not have a date. In this case, print out a warning.
  else
  {
    return 1 if $dateRestriction == $NODATE;

    warn "** Couldn't find a date. Assuming email doesn't match the " .
         "date constraint:\n";
    warn "  " . GetFromLine($header) . "\n"
      if defined GetFromLine($header);
    warn "  " . GetSubjectLine($header) . "\n"
      if defined GetSubjectLine($header);

    return 0;
  }
}

#-------------------------------------------------------------------------------

# This function tries to parse a date first with Date::Parse. If Date::Parse
# can't parse the date, then the function tries to use Date::Manip to parse
# it. Returns '' if the date can't be parsed.

{
my $loaded_date_manip = undef;

sub ParseDate
{
  my $date = shift;

  my $parsedDate;

  # First try to parse the date with Date::Parse;
  $parsedDate = str2time($date);
  return $parsedDate if defined $parsedDate;

  # Try to load Date::Manip if we haven't already
  unless (defined $loaded_date_manip)
  {
    if (eval "require Date::Manip")
    {
      $loaded_date_manip = 1;

      dprint "Date::Manip VERSION: $Date::Manip::VERSION";
      # To prevent warning about variable being used only once
      my $dummy = $Date::Manip::VERSION;
    }
    else
    {
      $loaded_date_manip = 0;
    }
  }

  return '' unless $loaded_date_manip;

  my $temp_date = Date::Manip::UnixDate(Date::Manip::ParseDate($date),'%s');

  return '' unless defined $temp_date;
  return $temp_date;
}
}

#-------------------------------------------------------------------------------

# Figure out what kind of date restriction they want, and what the dates in
# question are. An empty date string results in the type of date restriction
# being $NODATE.
sub ProcessDate($)
{
  my $datestring = shift;

  return ($NODATE,'','') if $datestring eq '';

  if ($datestring =~ /^before (.*)/i)
  {
    $dateRestriction = $BEFORE;
    $date1 = ParseDate($1);
    $date2 = '';

    reportAndExit "\"$1\" is not a valid date" unless $date1;
  }
  elsif ($datestring =~ /^(after|since)\s(.*)/i)
  {
    $dateRestriction = $AFTER;
    $date1 = ParseDate($2);
    $date2 = '';

    reportAndExit "\"$2\" is not a valid date" unless $date1;
  }
  elsif ($datestring =~ /^between (.+) and (.+)/i)
  {
    $dateRestriction = $BETWEEN;
    $date1 = ParseDate($1);
    $date2 = ParseDate($2);

    reportAndExit "\"$1\" is not a valid date" unless $date1;
    reportAndExit "\"$2\" is not a valid date" unless $date2;

    # Swap the dates if the user gave them backwards.
    if ($date1 > $date2)
    {
      my $temp;
      $temp = $date1;
      $date1 = $date2;
      $date2 = $temp;
    }

  }
  elsif (ParseDate($datestring) ne '')
  {
    $dateRestriction = $ON;
    $date1 = ParseDate($datestring);
    $date2 = '';
  }
  else
  {
    reportAndExit "\"$datestring\" is an invalid date specification. Use \"$0 -h\" for help";
  }

  return ($dateRestriction,$date1,$date2);
}

#-------------------------------------------------------------------------------

# Figure out what kind of size restriction they want, and what the sizes in
# question are.
sub ProcessSize($)
{
  my $sizestring = shift;

  if ($sizestring =~ /^\s*(<|<=|>|>=)\s*(\d+)\s*$/i)
  {
    if ($1 eq '<')
    {
      $sizeRestriction = $LESS_THAN;
    }
    elsif ($1 eq '<=')
    {
      $sizeRestriction = $LESS_THAN_OR_EQUAL;
    }
    elsif ($1 eq '>')
    {
      $sizeRestriction = $GREATER_THAN;
    }
    elsif ($1 eq '>=')
    {
      $sizeRestriction = $GREATER_THAN_OR_EQUAL;
    }

    $size1 = $2;
    $size2 = '';
  }
  elsif ($sizestring =~ /^\s*(\d+)\s*-\s*(\d+)\s*$/i)
  {
    $sizeRestriction = $BETWEEN;
    $size1 = $1;
    $size2 = $2;

    # Swap the sizes if the user gave them backwards.
    if ($size1 > $size2)
    {
      my $temp;
      $temp = $size1;
      $size1 = $size2;
      $size2 = $temp;
    }
  }
  elsif ($sizestring =~ /^\s*(\d+)\s*$/i)
  {
    $sizeRestriction = $EQUAL;
    $size1 = $1;
    $size2 = '';
  }
  else
  {
    reportAndExit "\"$sizestring\" is an invalid size specification. Use \"$0 -h\" for help";
  }

  return ($sizeRestriction,$size1,$size2);
}

#-------------------------------------------------------------------------------

sub IsInDate($$$$)
{
  my ($emailDate,$dateRestriction,$date1,$date2);
  $emailDate = shift @_;
  $dateRestriction = shift @_;
  $date1 = shift @_;
  $date2 = shift @_;

  # Now we do the date checking.
  return 1 if $dateRestriction == $NONE;
  return $emailDate < $date1 if $dateRestriction == $BEFORE;
  return $emailDate > $date1 if $dateRestriction == $AFTER;

  # Since these values are in seconds, we have to make sure that $emailDate
  # is within 24 hours after $date1
  return $emailDate > $date1 && $emailDate-$date1 < 24*60*60 if $dateRestriction == $ON;
  return $emailDate > $date1 && $emailDate < $date2 if $dateRestriction == $BETWEEN;

  return 0;
}

#-------------------------------------------------------------------------------

sub IsInSize($$$$)
{
  my ($string,$sizeRestriction,$size1,$size2);
  $string = shift @_;
  $sizeRestriction = shift @_;
  $size1 = shift @_;
  $size2 = shift @_;

  # Now we do the size checking.
  return 1 if $sizeRestriction == $NONE;
  return length $$string < $size1
    if $sizeRestriction == $LESS_THAN;
  return length $$string <= $size1
    if $sizeRestriction == $LESS_THAN_OR_EQUAL;
  return length $$string > $size1
    if $sizeRestriction == $GREATER_THAN;
  return length $$string >= $size1
    if $sizeRestriction == $GREATER_THAN_OR_EQUAL;
  return length $$string == $size1
    if $sizeRestriction == $EQUAL;

  return length $$string >= $size1 && length $$string <= $size2
    if $sizeRestriction == $BETWEEN;

  return 0;
}

#-------------------------------------------------------------------------------

sub usage
{
<<EOF;
usage: grepmail [--help] [-abBDFhHilmrRuv] [-s sizespec] [-d "datespec"] [[-eE] <expr>] <files...>
EOF
}

#-------------------------------------------------------------------------------

sub help
{
<<EOF;
grepmail $VERSION

usage:

  grepmail [--help|--version] [-abBDFhHilmrRuvV] [-f status] [-s sizespec]
    [-d <date-specification>] [-X <signature-pattern>] [-Y <header-pattern>]
    <files...>

  grepmail [--help|--version] [-abBDFhHilmrRuvV] [-f status] [-s sizespec]
    [-d <date-specification>] [-X <signature-pattern>] [-Y <header-pattern>]
    [[-e] <pattern>] <files...>

  grepmail [--help|--version] [-abBDFhHilmrRuvV] [-f status] [-s sizespec]
    [-d <date-specification>] [-X <signature-pattern>] [-Y <header-pattern>]
    [-E <expr>] <files...>

At least one of -s, -d, -u, -e, and -E must be specified, and can appear in
any relative order following the other flags. The -e flag is optional if
pattern appears immediately before -s or -d. Files can be plain ASCII or ASCII
files compressed with gzip, tzip, or bzip2. -E allows for complex pattern
matches involving logical operators. If no file is provided, normal or
compressed ASCII input is taken from STDIN.

-a Use received date instead of sent date for -d matching
-b Search must match body
-d Specify a required date range (see below)
-D Debug mode
-e Explicitly name pattern (when searching for strings beginning with "-")
-E Specify a complex search expression
-f Search must match status (A=answered, R=read, D=deleted, O=old, F=flagged)
-F Force processing of mailboxes which appear to be binary
-h Search must match header
-H Print headers but not bodies of matching emails
-i Ignore case in the search expression
-l Output the names of files having an email matching the expression
-M Do not search non-text mime attachments
-m Append "X-Mailfolder: <folder>" to all headers to indicate in which folder
   the match occurred
-n Print the line number info (and filename if necessary) for the emails
-q Quiet mode -- don't output warnings
-r Output the names of the files and the number of emails matching the
   expression
-R Recurse directories
-s Specify a size range in bytes (see below)
-S Ignore signatures
-u Ensure that no duplicate emails are output
-v Output emails that don't match the expression
-V Display the version number
-X Specify a regular expression for the signature separator
-Y Specify a header to search (implies -h)
--help Print a help message

Date constraints require Date::Parse. Date specifications must be of the
form of:
- a date like "today", "1st thursday in June 1992" (requires Date::Manip),
  "05/18/93", "12:30 Dec 12th 1880", "8:00pm december tenth",
- "before", "after", or "since", followed by a date as defined above,
- "between <date> and <date>", where <date> is defined as above.

Size constraints must be of the form of:
- 12345: match size of exactly 12345
- <12345, <=12345, >12345, >=12345: match size less than, less than or equal,
  greater than, or greater than or equal to 12345
- 10000-12345: match size between 10000 and 12345 inclusive
EOF
}

#-------------------------------------------------------------------------------

=head1 NAME

grepmail - search mailboxes for mail matching a regular expression

=head1 SYNOPSIS

  grepmail [--help|--version] [-abBDhHilmrRuvV] [-s sizespec]
    [-d <date-specification>] [-X <signature-pattern>] [-Y <header-pattern>]
    <files...>

  grepmail [--help|--version] [-abBDhHilmrRuvV] [-s sizespec]
    [-d <date-specification>] [-X <signature-pattern>] [-Y <header-pattern>]
    [[-e] <pattern>] <files...>

  grepmail [--help|--version] [-abBDhHilmrRuvV] [-s sizespec]
    [-d <date-specification>] [-X <signature-pattern>] [-Y <header-pattern>]
    [-E <expr>] <files...>

=head1 DESCRIPTION

=over 2

I<grepmail> looks for mail messages containing a pattern, and prints the
resulting messages on standard out.

By default I<grepmail> looks in both header and body for the specified pattern.

When redirected to a file, the result is another mailbox, which can, in turn,
be handled by standard User Agents, such as I<elm>, or even used as input for
another instance of I<grepmail>.

At least one of B<-E>, B<-e>, B<-d>, B<-s>, or B<-u> must be specified. The
pattern is optional if B<-d>, B<-s>, and/or B<-u> is used. The B<-e> flag is
optional if there is no file whose name is the pattern. The B<-E> option can
be used to specify complex search expressions involving logical operators.
(See below.)

If a mailbox can not be found, grepmail searches the $home/mail, $home/Mail,
$home/Mailbox directories (or the directory specified by the MAIL environment
variable).

=back

=head1 OPTIONS AND ARGUMENTS

Many of the options and arguments are analogous to those of grep.

=over 2

=item B<pattern>

The pattern to search for in the mail message.  May be any Perl regular
expression, but should be quoted on the command line to protect against
globbing (shell expansion). To search for more than one pattern, use the form
"(pattern1|pattern2|...)".

=item B<mailbox>

Mailboxes must be traditional, UNIX C</bin/mail> mailbox format.  The
mailboxes may be compressed by gzip, tzip, or bzip2, in which case
gunzip, tzip, or bzip2 must be installed on the system.

If no mailbox is specified, takes input from stdin, which can be compressed or
not. grepmail's behavior is undefined when ASCII and binary data is piped
together as input.

=item B<-a>

Use arrival date instead of sent date.

=item B<-b>

Asserts that the pattern must match in the body of the email. 

=item B<-D>

Enable debug mode, which prints diagnostic messages.

=item B<-d>

Date specifications must be of the form of:
  - a date like "today", "yesterday", "5/18/93", "5 days ago", "5 weeks ago",
  - OR "before", "after", or "since", followed by a date as defined above,
  - OR "between <date> and <date>", where <date> is defined as above.

Simple date expressions will first be parsed by Date::Parse. If this fails,
grepmail will attempt to parse the date with Date::Manip, if the module is
installed on the system. Use an empty pattern (i.e. B<-d "">) to find emails
without a "Date: ..." line in the header.

=item B<-E>

Specify a complex search expression using logical operators. The current
syntax allows the user to specify search expressions using Perl syntax. Three
values can be used: $email (the entire email message), $email_header (just the
header), or $email_body (just the body). A search is specified in the form
"$email =~ /pattern/", and multiple searches can be combined using "&&" and
"||" for "and" and "or".

For example, the expression

  $email_header =~ /^From: .*\@coppit.org/ && $email =~ /grepmail/i

will find all emails which originate from coppit.org (you must escape the "@"
sign with a backslash), and which contain the keyword "grepmail" anywhere in
the message, in any capitalization.

B<-E> is incompatible with B<-b>, B<-h>, and B<-e>. B<-i>, B<-M>, B<-S>, and
B<-Y> have not yet been implemented.

NOTE: The syntax of search expressions may change in the future. In
particular, support for size, date, and other constraints may be added. The
syntax may also be simplified in order to make expression formation easier to
use (and perhaps at the expense of reduced functionality).

=item B<-e>

Explicitly specify the search pattern. This is useful for specifying patterns
that begin with "-", which would otherwise be interpreted as a flag.

=item B<-f>

Asserts that the email "Status:" header must contain the given flags. Order
and case are not important, so use I<-f AR> or I<-f ra> to search for emails
which have been read and answered.

=item B<-F>

Force processing of ASCII files which don't appear to be mailboxes.

=item B<-h>

Asserts that the pattern must match in the header of the email.

=item B<-H>

Print the header but not body of matching emails.

=item B<-i>

Make the search case-insensitive (by analogy to I<grep -i>).

=item B<-l>

Output the names of files having an email matching the expression, (by analogy
to I<grep -l>).

=item B<-M>

Causes grepmail to ignore non-text MIME attachments. This removes false
positives resulting from binaries encoded as ASCII attachments.

=item B<-m>

Append "X-Mailfolder: <folder>" to all email headers, indicating which folder
contained the matched email.

=item B<-n>

Prefix each line with line number information. If multiple files are
specified, the filename will precede the line number. NOTE: When used in
conjunction with B<-m>, the X-Mailfolder header has the same line number as
the next (blank) line.

=item B<-q>

Quiet mode. Suppress the output of warning messages about non-mailbox files,
directories, etc.

=item B<-r>

Generate a report of the names of the files containing emails matching the
expression, along with a count of the number of matching emails.

=item B<-R>

Causes grepmail to recurse any directories encountered.

=item B<-s>

Return emails which match the size (in bytes) specified with this flag. Note
that this size includes the length of the header.

Size constraints must be of the form of:
 - 12345: match size of exactly 12345
 - <12345, <=12345, >12345, >=12345: match size less than, less than or equal,
   greater than, or greater than or equal to 12345
 - 10000-12345: match size between 10000 and 12345 inclusive

=item B<-S>

Ignore signatures. The signature consists of everything after a line
consisting of "-- ".

=item B<-u>

Output only unique emails, by analogy to I<sort -u>. Grepmail determines email
uniqueness by the Message-ID header.

=item B<-v>

Invert the sense of the search, by analogy to I<grep -v>. This results in the
set of emails printed being the complement of those that would be printed
without the B<-v> switch.

=item B<-V>

Print the version and exit.

=item B<-X>

Specify a regular expression for the signature separator. By default this
pattern is '^-- $'.

=item B<-Y>

Specify a pattern which indicates specific headers to be searched. The search
will automatically treat headers which span multiple lines as one long line.
This flag implies B<-h>.

In the style of procmail, special strings in the pattern will be expanded as
follows:

=over 2

If the regular expression contains "^TO:" it will be  substituted by
"^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope|Apparently(-Resent)?)-To):",
which should match all headers with destination addresses.

If the regular expression contains "^FROM_DAEMON:" it  will
be substituted by "(^(Mailing-List:|Precedence:.*(junk|bulk|list)|To: Multiple recipients of |(((Resent-)?(From|Sender)|X-Envelope-From):|>?From )([^>]*[^(.%@a-z0-9])?(Post(ma?(st(e?r)?|n)|office)|(send)?Mail(er)?|daemon|m(mdf|ajordomo)|n?uucp|LIST(SERV|proc)|NETSERV|o(wner|ps)|r(e(quest|sponse)|oot)|b(ounce|bs\.smtp)|echo|mirror|s(erv(ices?|er)|mtp(error)?|ystem)|A(dmin(istrator)?|MMGR|utoanswer))(([^).!:a-z0-9][-_a-z0-9]*)?[%@>\t ][^<)]*(\(.*\).*)?)?",
which should catch mails coming from most daemons.

If  the regular expression contains "^FROM_MAILER:" it will be substituted by
"(^(((Resent-)?(From|Sender)|X-Envelope-From):|>?From)([^>]*[^(.%@a-z0-9])?(Post(ma(st(er)?|n)|office)|(send)?Mail(er)?|daemon|mmdf|n?uucp|ops|r(esponse|oot)|(bbs\.)?smtp(error)?|s(erv(ices?|er)|ystem)|A(dmin(istrator)?|MMGR))(([^).!:a-z0-9][-_a-z0-9]*)?[%@>\t][^<)]*(\(.*\).*)?)?$([^>]|$))" (a stripped down version of
"^FROM_DAEMON:"), which should catch mails coming from most mailer-daemons.

=back

=item B<--help>

Print a help message summarizing the usage.

=back

=head1 EXAMPLES

Count the number of emails. ("." matches every email.)

  grepmail -r . sent-mail

Get all email between 2000 and 3000 bytes about books

  grepmail books -s 2000-3000 sent-mail

Get all email that you mailed yesterday

  grepmail -d yesterday sent-mail

Get all email that you mailed before the first thursday in June 1998 that
pertains to research (requires Date::Manip):

  grepmail research -d "before 1st thursday in June 1992" sent-mail

Get all email that you mailed before the first of June 1998 that
pertains to research:

  grepmail research -d "before 6/1/92" sent-mail

Get all email you received since 8/20/98 that wasn't about research or your
job, ignoring case:

  grepmail -iv "(research|job)" -d "since 8/20/98" saved-mail

Get all email about mime but not about Netscape. Constrain the search to match
the body, since most headers contain the text "mime":

  grepmail -b mime saved-mail | grepmail Netscape -v

Print a list of all mailboxes containing a message from Rodney. Constrain the
search to the headers, since quoted emails may match the pattern:

  grepmail -hl "^From.*Rodney" saved-mail*

Find all emails with the text "Pilot" in both the header and the body:

  grepmail -hb "Pilot" saved-mail*

Print a count of the number of messages about grepmail in all saved-mail
mailboxes:

  grepmail -br grepmail saved-mail*

Remove any duplicates from a mailbox:

  grepmail -u saved-mail

Convert a Gnus mailbox to mbox format:

  grepmail . gnus-mailbox-dir/* > mbox

Search for all emails to or from an address (taking into account wrapped
headers and different header names):

  grepmail -Y '(^TO:|^From:)' my@email.address saved-mail

Find all emails from postmasters:

  grepmail -Y '^FROM_MAILER:' . saved-mail

=head1 FILES

grepmail will I<not> create temporary files while decompressing compressed
archives. The last version to do this was 3.5. While the new design uses
more memory, the code is much simpler, and there is less chance that email
can be read by malicious third parties. Memory usage is determined by the size
of the largest email message in the mailbox.

=head1 ENVIRONMENT

The MAIL environment variable can be used to specify the default mail
directory. This directory will be searched if the specified mailbox can not be
found directly.

=head1 BUGS AND LIMITATIONS

=over 2

=item Test case 1 fails on some platforms

Bug not squashed yet. Any info would be appreciated.

=item File names that look like flags cause problems.

In some special circumstances, grepmail will be confused by files whose names
look like flags. In such cases, use the B<-e> flag to specify the search
pattern.

=back

=head1 AUTHOR

David Coppit, <david@coppit.org>, http://coppit.org/

=head1 SEE ALSO

elm(1), mail(1), grep(1), perl(1), printmail(1), Mail::Internet(3),
procmailrc(5). Crocker, D.  H., Standard for the Format of Arpa Internet Text
Messages, RFC822.

=cut
