#!/usr/bin/perl -w

$CHILDREN  = 40;   # Number of children to spawn
$TIMEOUT   = 30;   # DNS timeout
$FLUSH     = 3000; # Flush buffer every $FLUSH lines
$DEBUG     = 0;

# ip2host v0.04 - Resolve IPs to hostnames in web server logs 
# Maurice Aubrey <maurice@classmates.com>
#
# $Id: ip2host,v 1.1.1.4 2000/04/14 12:33:41 maurice Exp $
#
# CHANGES:
#
#   0.05 Fri Apr 14 05:31:38 PDT 2000
#        - Add POD to allow inclusion in CPAN
#
#   0.04 Mon Nov 22 17:54:07 PST 1999
#        - Check socketpair() return value
#        - Updated documentation
# 
#   0.03 Thu Nov 18 16:57:53 PST 1999 
#        - Renamed $BUFFER to $FLUSH
#        - Improved documentation 
#
#   0.02 Sat Oct 16 00:05:29 PDT 1999
#        - Initial public release

use strict;
use vars qw( $CHILDREN $TIMEOUT $FLUSH $DEBUG %Buffer $Next_Line %Cache );
use Socket;
use IO::Handle;
use IO::Select;

my $cache_file = shift @ARGV;
if ($cache_file) { # Cache results to disk if asked
  require DB_File;
  tie %Cache, 'DB_File', $cache_file or die "unable to tie '$cache_file': $!";
}

# Write as many lines as we can until we come across one 
# that's missing (that means it's still pending DNS). 
sub flush_buffer {
  for (; exists $Buffer{ $Next_Line }; $Next_Line++) {
    print delete $Buffer{ $Next_Line };
  }
}

# Spawn the children
my $read_select  = new IO::Select;
my $write_select = new IO::Select;
for(my $child = 1; $child <= $CHILDREN; $child++) {
  my($child_fh, $parent_fh) = (new IO::Handle, new IO::Handle);
  socketpair($child_fh, $parent_fh, AF_UNIX, SOCK_STREAM, PF_UNSPEC)
    or die "socketpair failed: $!";
  $child_fh->autoflush;
  $parent_fh->autoflush;

  if (my $pid = fork) {
    close $parent_fh;
    $write_select->add( $child_fh ); # Start out writing to all children
  } else { # Child starts here
    die "cannot fork: $!" unless defined $pid;
    close $child_fh; close STDIN; close STDOUT; 
    $SIG{'ALRM'} = sub { die 'alarmed' };

    while(defined(my $ip = <$parent_fh>)) { # Get IP to resolve
      chomp($ip);
      my $host = undef;
      eval { # Try to resolve, but give up after $TIMEOUT seconds
        alarm( $TIMEOUT );
        my $ip_struct = inet_aton $ip;
        $host = gethostbyaddr $ip_struct, AF_INET;
        alarm(0);
      };
      # XXX Debug
      if ($DEBUG and $@ =~ /alarm/) {
        $host ||= 'TIMEOUT';
        # print STDERR "Alarming ($ip)...\n";
      }
      $host ||= $ip;
      print $parent_fh "$ip $host\n"; 
    }
    exit 0;
  }
}

$Next_Line = 1;
my $lineno = 0;
my %pending = ();

while(1) {

  # XXX Debug
  # print STDERR "buff[", scalar keys %Buffer, "] pend[", scalar keys %pending,
  #              "] cache[", scalar keys %Cache, "]\n";

  my($readable, $writable) = 
    IO::Select->select( $read_select, $write_select, undef );

  if (@$writable) { # One or more children ready for an IP
    my $line = '';
    while(@$writable and defined($line = <STDIN>)) {
      my($ip, $rest) = split / /, $line, 2;
      flush_buffer if ++$lineno % $FLUSH == 0;
      if (exists $Cache{ $ip }) { # We found this answer already 
        $Buffer{ $lineno } = "$Cache{ $ip } $rest";
      } elsif (exists $pending{ $ip }) { # We're still looking
        push @{ $pending{ $ip } }, [ $lineno, $rest ];
      } else { # Send IP to child
        my $write_fh = shift @$writable;
        print $write_fh "$ip\n";
        $pending{ $ip } = [ [ $lineno, $rest ] ];
        $write_select->remove( $write_fh ); # Move to read set
        $read_select->add( $write_fh );
      }
    }
    defined $line or undef $write_select; # Are we done with input?
  }

  while (@$readable) { # One or more children have an answer
    my $read_fh = shift @$readable; 
    my $str = <$read_fh>;
    chomp($str);
    my($ip, $host) = split / /, $str, 2;
    $Cache{ $ip } = $host;
    # Take all the lines that were pending for this IP and
    # toss them into the output buffer
    foreach my $pending (@{ $pending{ $ip } }) {
      $Buffer{ $pending->[0] } = "$host $pending->[1]";
    }
    delete $pending{ $ip };
    $read_select->remove( $read_fh ); # Move to write set
    $write_select->add( $read_fh ) if defined $write_select;
  }

  last if not defined $write_select and not keys %pending;
}

flush_buffer;

=pod

=head1 NAME

  ip2host - Resolve IPs to hostnames in web server logs

=head1 SYNOPSIS

  ip2host [cache_file] < infile > outfile

  infile  - Web server log file.  Any log format is acceptable, 
            as long as each line begins with the remote client's 
            IP address.

  outfile - Same as input file, but with all of the IPs resolved 
            to hostnames.        

=head1 DESCRIPTION

This script is a drop-in replacement for the logresolve.pl
script distributed with the Apache web server.

ip2host has the same basic design (fork children to handle
the DNS resolution in parallel), but multiplexes the communication.
This results in a significant speed improvement (approximately 10x
faster), and the performance degrades more gracefully as the DNS
timeout value ($TIMEOUT) is increased.        

This script is reported to work under Linux, FreeBSD, Solaris,
Tru64, and IRIX.

=head1 AUTHOR 

Maurice Aubrey E<lt>maurice@hevanet.comE<gt>

=head1 COPYRIGHT

Copyright 1999-2000, Maurice Aubrey E<lt>maurice@hevanet.comE<gt>.
All rights reserved.

This module is free software; you may redistribute it and/or
modify it under the same terms as Perl itself.

=head1 README

Drop-in replacement for the logresolve.pl script distributed
with the Apache web server that's approximately 10x faster.

=head1 SCRIPT CATEGORIES

Web        

=cut
