#!/usr/bin/perl -w

# mboxget - get a message from a mailbox or maildir, from mass-check output
#
# usage: mboxget [-noannotate] [mass-check-mbox-or-file-id ...]
#
# example:
#
#   grep SUBJECT_FREQ spam.log | ./mboxget | grep Subject:
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

use strict;

my $prog = $0;
$prog =~ s@.*/@@;

sub mywarn;

use Getopt::Long;
use vars qw($opt_noannotate);
GetOptions("noannotate");

my $annotate = ($opt_noannotate ? 0 : 1);

my @inputs = @ARGV;

while (<>) {
  s/^[^\s:]+://;  # filenames, from "grep foo *"

  if (/^[Y\.]\s+-?\d+\s+(\S+)\s+\S+/) {
    # mass-check format
    handle_input($1);
  }
  else {
    next if /^#/;
    chomp;
    handle_input($_);
  }
}
exit;

sub handle_input {
  my $where = shift;
  my ($file, $offset);

  if (-f $where) {
    ($file, $offset) = ($where);
  } else {
    ($file, $offset) = ($where =~ m/(.*?)(?:\.(\d+))?$/);
  }

  if ($file =~ /\.gz$/) {
    open (INPUT, "gunzip -cd $file |") or mywarn "gunzip $file failed: $!";
  } elsif ($file =~ /\.bz2$/) {
    open (INPUT, "bzip2 -cd $file |") or mywarn "bunzip2 $file failed: $!";
  } else {
    open (INPUT, "<$file") or mywarn "open $file failed: $!";
  }

  if ($offset) {
    # TODO: steal open-file caching code from old revisions of
    # mass-check-results-to-mbox
    if (!seek(INPUT, $offset, 0)) {
      mywarn "$prog: seek $offset failed: $!\n";
      close INPUT;
      return;
    }
  }

  # read the message into @msg
  my $past = 0;
  my @msg = ();
  while (<INPUT>) {
    if ($past && $offset) {
      # only do this for mboxes
      last if substr($_,0,5) eq "From ";
    }
    else {
      $past = 1;
    }
    push (@msg, $_);
  }
  close INPUT;

  # now chop off the leading headers that may have come from a previous
  # run, or will interfere with insertion of the X-Mass-Check-Id hdr
  my $fromline = "From nobody\@nowhere  Wed Jan  1 00:00:00 2000\n";
  while (scalar @msg > 0 &&
      $msg[0] =~ /^(?:From|X-Mass-Check-Id:) /)
  {
    if ($msg[0] =~ /^From /) { $fromline = $msg[0]; }
    shift @msg;
  }

  # and output
  $annotate and unshift (@msg, "X-Mass-Check-Id: $where\n");
  print $fromline, @msg, "\n";
}

sub mywarn {
  warn @_;
  if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; }
}

