#!/usr/local/bin/perl
#########
# ProServer DAS Server
# Author:        rmp
# Maintainer:    $Author: rmp $
# Created:       2003-05-22
# Last Modified: $Date: 2006/07/03 10:05:07 $
#
=head1 AUTHOR

Roger Pettett <rmp@sanger.ac.uk>.

Based on example preforking POEserver at
http://poe.perl.org/?POE_Cookbook/Web_Server_With_Forking

Copyright (c) 2004 GRL (The Sanger Institute)

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.  See DISCLAIMER.txt for
disclaimers of warranty.

=cut

use lib qw(./blib/lib ../blib/lib . ..);
package poeserver;
use warnings;
use strict;
use Bio::Das::ProServer::Config;
use CGI qw(:cgi);
use Compress::Zlib;
use Getopt::Long;
use POE;                         # Base features.
use POE::Filter::HTTPD;          # For serving HTTP content.
use POE::Wheel::ReadWrite;       # For socket I/O.
use POE::Wheel::SocketFactory;   # For serving socket connections.
use POSIX;
use Sys::Hostname;
use Bio::Das::ProServer::SourceAdaptor;
use Bio::Das::ProServer::SourceHydra;

our $VERSION        = do { my @r = (q$Revision: 2.01 $ =~ /\d+/g); sprintf "%d."."%03d" x $#r, @r };
our $GZIP_THRESHOLD = 10000;
$ENV{'PATH'}        = "/bin:/usr/bin:/usr/local/bin";
our $WRAPPERS       = {
		       'dsn'          => {
					  'open'  => qq(<?xml version="1.0" standalone="no"?>\n<!DOCTYPE DASDSN SYSTEM 'http://www.biodas.org/dtd/dasdsn.dtd' >\n<DASDSN>\n),
					  'close' => qq(</DASDSN>\n),
					 },
		       'features'     => {
					  'open'  =>  qq(<?xml version="1.0" standalone="yes"?>\n<!DOCTYPE DASGFF SYSTEM "http://www.biodas.org/dtd/dasgff.dtd">\n<DASGFF>\n  <GFF version="1.01" href="http://%host:%port/das/%dsn/features">\n),
					  'close' => qq(  </GFF>\n</DASGFF>\n),
					 },
		       'dna'          => {
					  'open'  => qq(<?xml version="1.0" standalone="no"?>\n<!DOCTYPE DASDNA SYSTEM "http://www.biodas.org/dtd/dasdna.dtd">\n<DASDNA>\n),
					  'close' => qq(</DASDNA>\n),
					 },
		       'sequence'     => {
					  'open'  => qq(<!DOCTYPE DASSEQUENCE SYSTEM "http://www.biodas.org/dtd/dassequence.dtd">\n<DASSEQUENCE>\n),
					  'close' => qq(</DASSEQUENCE>\n),
					 },
		       'types'        => {
					  'open'  => qq(<?xml version="1.0" standalone="no"?>\n<!DOCTYPE DASTYPES SYSTEM "http://www.biodas.org/dtd/dastypes.dtd">\n<DASTYPES>\n  <GFF version="1.0" href="http://%host:%port/das/%dsn/types">\n),
					  'close' => qq(  </GFF>\n</DASTYPES>\n),
					 },
		       'entry_points' => {
					  'open'  => qq(<?xml version="1.0" standalone="no"?>\n<!DOCTYPE DASEP SYSTEM "http://www.biodas.org/dtd/dasep.dtd">\n<DASEP>\n  <ENTRY_POINTS href="http://%host:%port/das/%dsn/entry_points" version="1.0">\n),
					  'close' => qq(  </ENTRY_POINTS>\n</DASEP>\n),
					 },
                      'alignment'    => {
                                          'open'  => qq(<?xml version="1.0" standalone="no"?>\n<dasalignment 
xmlns="http://www.efamily.org.uk/xml/das/2004/06/17/dasalignment.xsd" 
xmlns:align="http://www.efamily.org.uk/xml/das/2004/06/17/alignment.xsd" 
xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance" 
xsd:schemaLocation="http://www.efamily.org.uk/xml/das/2004/06/17/dasalignment.xsd 
http://www.efamily.org.uk/xml/das/2004/06/17/dasalignment.xsd">\n),
                                          'close' => qq(</dasalignment>\n),
					},
                       'structure'    => {
					  'open'  => qq(<?xml version="1.0" standalone="no"?>\n<dasstructure xmlns="http://www.efamily.org.uk/xml/das/2004/06/17/dasstructure.xsd" xmlns:xsd="http://www.w
3.org/2001/XMLSchema-instance" xsd:schemaLocation="http://www.efamily.org.uk/xml/das/2004/06/17/dasstructure.xsd http://www.efamily.org.uk/xml/das/2004/06/17/dasstructure.xsd">\n),
                                          'close' => qq(</dasstructure>\n),
					 },
		      };

my $opts     = {};
my @saveargv = @ARGV;
my $result   = GetOptions(
			  $opts,
			  'debug',
                          'port=i',
                          'hostname=s',
                          'usage|help',
                          'inifile|config|c=s',
                          'X|x',
			 );

my $vstr = "ProServer DAS Server v$VERSION (c) GRL 2004...";

print "#"x(length($vstr)+6), "\n";
printf("#  %-@{[length($vstr)]}s  #\n", $vstr);
printf("#  %-@{[length($vstr)]}s  #\n", "http://www.sanger.ac.uk/proserver/");
print "#"x(length($vstr)+6), "\n";

@ARGV      = @saveargv;


if($opts->{'usage'}) {
  print qq(
 -debug           # Enable extra debugging
 -port   <9000>   # Listen on this port (overrides configuration file)
 -hostname <*>    # Listen on this interface x (overrides configuration file)
 -help            # This help
 -config          # Use this configuration file
 -x               # Development mode - disables server forking\n\n);
  exit(0);
}

if(!$opts->{'inifile'}) {
  $opts->{'inifile'} = "eg/proserver.ini";
  print STDERR qq(Using default '$opts->{'inifile'}' file.\n);
}

if(!-e $opts->{'inifile'}) {
  print STDERR qq(Invalid configuration file: $opts->{'inifile'}. Stopping.\n);
  exit;
}

my $config = Bio::Das::ProServer::Config->new($opts);

if(!$opts->{'X'} && fork()) {
  print STDERR qq(Parent process detached...\n);
  exit;

} elsif($opts->{'X'}) {
  $config->maxclients(0);
}

POSIX::setsid() or die "Can't setsid";

my $logfile = $config->logfile();
if (!defined $logfile) {
  my ($pidpath)  = ($config->pidfile()   ||"") =~ /^(.*)\//;
  my ($confpath) = ($config->{'inifile'} ||"") =~ /^(.*)\//;
  $pidpath     ||= $confpath;
  $pidpath      .= $pidpath?"/":"";
  $logfile       = sprintf("%sproserver.%s.log", $pidpath, &hostname());
}


open STDIN,  '</dev/null' or die "Can't open STDIN from /dev/null: [$!]\n";
if(!$opts->{'X'}) {
  print STDERR qq(Logging to $logfile\n);
  open STDOUT, ">$logfile"  or die "Can't open STDOUT to /dev/null: [$!]\n";
  open STDERR, '>&STDOUT'   or die "Can't open STDERR to STDOUT: [$!]\n";
}

for my $f (qw(port hostname debug)) {
  $config->{$f} = $opts->{$f} if(defined $opts->{$f});
}

if(exists $config->{'ensemblhome'}) {
  $ENV{'ENS_ROOT'}     = $config->{'ensemblhome'};
  print STDERR qq(Set ENS_ROOT to $ENV{'ENS_ROOT'}\n);
}

if(exists $config->{'oraclehome'}) {
  $ENV{'ORACLE_HOME'}  = $config->{'oraclehome'};
  print STDERR qq(Set ORACLE_HOME to $ENV{'ORACLE_HOME'}\n);
}

if(exists $config->{'bioperlhome'}) {
  $ENV{'BIOPERL_HOME'} = $config->{'bioperlhome'};
  print STDERR qq(Set BIOPERL_HOME to $ENV{'BIOPERL_HOME'}\n);
}

my $pidfile  = $config->pidfile() || sprintf("%s.%s.pid", $0||"proserver", &hostname() || "localhost");
&make_pidfile();

# Spawn up to max server processes, and then run them.  Exit
# when they are done.

server_spawn($config->maxclients());
$poe_kernel->run();
exit 0;

sub DEBUG ()         { return $opts->{'debug'}; } # Enable a lot of runtime information.
sub TESTING_CHURN () { 0 }              # Randomly shutdown children to test respawn.

### Spawn the main server.  This will run as the parent process.

sub server_spawn {
    my ($max_processes) = @_;

    POE::Session->create
      ( inline_states =>
          { _start         => \&server_start,
            _stop          => \&server_stop,
            do_fork        => \&server_do_fork,
            got_error      => \&server_got_error,
            got_sig_hup    => \&server_got_sig_hup,
            got_sig_int    => \&server_got_sig_int,
            got_sig_chld   => \&server_got_sig_chld,
            got_connection => \&server_got_connection,

            _child => sub { 0 },
          },
        heap =>
          { max_processes => $max_processes,
          },
      );
}

### The main server session has started.  Set up the server socket and
### bookkeeping information, then fork the initial child processes.

sub server_start {
    my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];

    $heap->{server} = POE::Wheel::SocketFactory->new
      ( BindAddress  => $config->interface()||undef,
	BindPort     => $config->port(),
        SuccessEvent => "got_connection",
        FailureEvent => "got_error",
        Reuse        => "yes",
      );

    $kernel->sig( CHLD  => "got_sig_chld" );
    $kernel->sig( INT   => "got_sig_int" );
    $kernel->sig( TERM  => "got_sig_int" );
    $kernel->sig( KILL  => "got_sig_int" );
    $kernel->sig( HUP   => "got_sig_hup" );
    $kernel->sig( USR1  => "got_sig_hup" );

    $heap->{children}   = {};
    $heap->{is_a_child} = 0;

    warn "Server $$ has begun listening on ", $config->host(), ":", $config->port(), "\n";

    $kernel->yield("do_fork");
}

### The server session has shut down.  If this process has any
### children, signal them to shutdown too.

sub server_stop {
    my $heap = $_[HEAP];
    DEBUG and warn "Server $$ stopped.\n";
    if ( my @children = keys %{ $heap->{children} } ) {
        DEBUG and warn "Server $$ is signaling children to stop.\n";
        kill INT => @children;
    }
    &remove_pidfile();
}

### The server session has encountered an error.  Shut it down.

sub server_got_error {
    my ( $heap, $syscall, $errno, $error ) = @_[ HEAP, ARG0 .. ARG2 ];
    DEBUG and
      warn( "Server $$ got $syscall error $errno: $error\n",
        "Server $$ is shutting down.\n",
      );
    delete $heap->{server};
}

### The server has a need to fork off more children.  Only honor that
### request form the parent, otherwise we would surely "forkbomb".
### Fork off as many child processes as we need.

sub server_do_fork {
    my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];

    return if $heap->{is_a_child};

    my $current_children = keys %{ $heap->{children} };
    for ( $current_children + 2 .. $heap->{max_processes} ) {

        DEBUG and warn "Server $$ is attempting to fork.\n";

        my $pid = fork();

        unless ( defined($pid) ) {
            DEBUG and
              warn( "Server $$ fork failed: $!\n",
                "Server $$ will retry fork shortly.\n",
              );
            $kernel->delay( do_fork => 1 );
            return;
        }

        # Parent.  Add the child process to its list.
        if ($pid) {
            $heap->{children}->{$pid} = 1;
            next;
        }

        # Child.  Clear the child process list.
        DEBUG and warn "Server $$ forked successfully.\n";
        $heap->{is_a_child} = 1;
        $heap->{children}   = {};
	$heap->{hitcount}   = 0;
        return;
    }
}

### The server session received SIGHUP.  Re-execute this process, remembering any argv options

sub server_got_sig_hup {
    DEBUG and warn "Server $$ received SIGHUP|USR1.\n";

    #########
    # shutdown children
    #
    &server_stop(@_);

    #########
    # exec(self)
    #
    print STDERR qq(0=$0, argv=@ARGV);
    exec $0, @ARGV;
}

### The server session received SIGINT.  Don't handle the signal,
### which in turn will trigger the process to exit gracefully.

sub server_got_sig_int {
    DEBUG and warn "Server $$ received SIGINT.\n";
    return 0;
}

### The server session received a SIGCHLD, indicating that some child
### server has gone away.  Remove the child's process ID from our
### list, and trigger more fork() calls to spawn new children.

sub server_got_sig_chld {
    my ( $kernel, $heap, $child_pid ) = @_[ KERNEL, HEAP, ARG1 ];

    if ( delete $heap->{children}->{$child_pid} ) {
        DEBUG and warn "Server $$ received SIGCHLD.\n";
        $kernel->yield("do_fork");
    }
    return 0;
}

### The server session received a connection request.  Spawn off a
### client handler session to parse the request and respond to it.

sub server_got_connection {
    my ( $heap, $socket, $peer_addr, $peer_port ) = @_[ HEAP, ARG0, ARG1, ARG2 ];

    DEBUG and warn "Server $$ received a connection.\n";

    POE::Session->create
      ( inline_states =>
          { _start      => sub { eval { &client_start(@_); }; warn $@ if($@); },
            _stop       => \&client_stop,
            got_request => sub { eval { &client_got_request(@_); }; warn $@ if($@); },
            got_flush   => \&client_flushed_request,
            got_error   => \&client_got_error,
            _parent     => sub { 0 },
          },
        heap =>
          { socket    => $socket,
            peer_addr => $peer_addr,
            peer_port => $peer_port,
          },
      );

    delete $heap->{server}
      if TESTING_CHURN and $heap->{is_a_child} and ( rand() < 0.1 );
}

### The client handler has started.  Wrap its socket in a ReadWrite
### wheel to begin interacting with it.

sub client_start {
    my $heap = $_[HEAP];

    $heap->{client} = POE::Wheel::ReadWrite->new
      ( Handle       => $heap->{socket},
        Filter       => POE::Filter::HTTPD->new(),
        InputEvent   => "got_request",
        ErrorEvent   => "got_error",
        FlushedEvent => "got_flush",
      );

    DEBUG and warn "Client handler $$/", $_[SESSION]->ID, " started.\n";
}

### The client handler has stopped.  Log that fact.

sub client_stop {
    DEBUG and warn "Client handler $$/", $_[SESSION]->ID, " stopped.\n";
}

### The client handler has received a request.  If it's an
### HTTP::Response object, it means some error has occurred while
### parsing the request.  Send that back and return immediately.
### Otherwise parse and process the request, generating and sending an
### HTTP::Response object in response.

sub client_got_request {
  my ( $heap, $request ) = @_[ HEAP, ARG0 ];

  DEBUG and
    warn "Client handler $$/", $_[SESSION]->ID, " is handling a request.\n";

  if ( $request->isa("HTTP::Response") ) {
    $heap->{client}->put($request);
    return;
  }

  #########
  # Handle DAS responses here
  #
  my $response     = HTTP::Response->new(200);
  my $uri          = $request->uri();
  my ($dsn, $call) = $uri =~ m|/das/([^/\?\#]+)(?:/([^/\?\#]+))?|;
  $call            = "homepage" if($dsn && !$call);
  $dsn           ||= "";

  if($dsn && $config->knows($dsn)) {
    my $mimetype     = ($call eq "homepage")?"text/html":"text/xml";
    $response->content_type($mimetype);
    my $cgi;

    #########
    # process the parameters
    #
    if ($request->method() eq 'GET') {
      my ($query) = $request->uri() =~ /\?(.*)$/;
      $cgi = CGI->new($query);

    } elsif ($request->method() eq 'POST') {
      $cgi = CGI->new($request->{'_content'});
    }

    my $method   = "das_$call";
    my $segments = [$cgi->param('segment')];
    my $features = [$cgi->param('feature_id')];
    my $groups   = [$cgi->param('group_id')];
    my $query    = $cgi->param('query'); 
    my $chains   = [$cgi->param('chain')];
    my $ranges   = [$cgi->param('range')];
    my $model    = [$cgi->param('model')];
    my $subjects = [$cgi->param('subject')];
    my $rows     = $cgi->param('rows');
    my $subCoos  = $cgi->param('subjectcoordsys');
    my $adaptor  = $config->adaptor($dsn);

    if($adaptor->implements($call) || $call eq "homepage") {

      my $use_gzip = 0;
      if ($call ne "homepage" && $request->header('Accept-Encoding') && ($request->header('Accept-Encoding') =~ /gzip/) ) {
	DEBUG and warn qq(Turning on compression);
	$use_gzip = 1;
      }

      eval {
	my $open    = $WRAPPERS->{$call}->{'open'}  || "";
	my $close   = $WRAPPERS->{$call}->{'close'} || "";
	$open       =~ s/\%host/@{[$config->host()]}/smg;
	$open       =~ s/\%port/@{[$config->port()]}/smg;
	$open       =~ s/\%dsn/$dsn/smg;
	my $content = $open.$adaptor->$method({
					       'segments' => $segments,
					       'features' => $features,
					       'groups'   => $groups,
					       'query'    => $query,
                                               'subjects' => $subjects,
                                               'chains'   => $chains,
                                               'ranges'   => $ranges,
                                               'model'    => $model,
                                               'rows'     => $rows,
                                               'subcoos'  => $subCoos
					      }).$close;

	if( ($use_gzip && length($content) > $GZIP_THRESHOLD) ) {
	  DEBUG and warn qq(Compressing content);
	  my $squashed = Compress::Zlib::memGzip($content);

	  if($squashed) {
	    $content   = $squashed;
	    $response->content_encoding('gzip');

	  } else {
	    warn ("Content compression failed: $!\n");
	  }
	}

	$response->content_length(length($content));
	$response->content($content);
      };
      if($@) {
	warn $@;
	$response->content_type("text/plain");
	$response->code(501); #?
	$response->content("source error");
      }

    } else {
      $response->content_type("text/plain");
      $response->code(501);
      $response->content(qq(call @{[$call||""]} unimplemented));
    }

  } elsif($dsn eq "dsn") {
    $response->content_type("text/xml");

    #########
    # Building this response here isn't particularly nice
    # but it saves the penalty of initialising another new sourceadaptor
    #
    $response->content_type("text/xml");

    my $dsnxml = qq(@{[map {
	sprintf(qq(  <DSN>\n    <SOURCE id="%s" version="%s">%s</SOURCE>\n    <MAPMASTER>%s</MAPMASTER>\n    <DESCRIPTION>%s</DESCRIPTION>\n  </DSN>\n),
		$_->dsn(),
		$_->dsnversion()  || "",
		$_->dsn(),
		$_->mapmaster()   || $_->config->{'mapmaster'}   || sprintf("http://%s:%s/das/%s", $config->host(), $config->port(), $_->dsn()) || "",
		$_->description() || $_->config->{'description'} || $_->dsn() || "",
		);
    } sort { $a->dsn() cmp $b->dsn() } $config->adaptors()]});
    $response->content($WRAPPERS->{'dsn'}->{'open'}.$dsnxml.$WRAPPERS->{'dsn'}->{'close'});

  } elsif($uri eq "/") {
    $response->content_type("text/html");
    $response->code(200);
    my $content = qq(<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 4.01 Strict//EN">
<html>
  <head>
    <title>Welcome to ProServer v$VERSION</title>
  </head>
  <body><h1>Welcome to ProServer v$VERSION</h1>
Perform a <a href="/das/dsn">DSN</a> request.\n);

    if(scalar $config->adaptors()) {
      $content .= qq(<table><tr><th>Source</th><th>Mapmaster</th><th>Description</th><th>Capabilities</th></tr>
@{[map {
  sprintf(qq(<tr><td><a href="/das/%s">%s</a></td><td>%s</td><td>%s</td><td>%s</td></tr>\n),
  $_->dsn(), $_->dsn(),
  $_->mapmaster()        || "",
  $_->description()      || "",
  $_->das_capabilities() || "",
)} $config->adaptors()]}
</table>\n);
    } else {
      $content .= qq(<br /><b>No adaptors configured.</b>\n);
    }

    $content .= qq(
<ul>
  <li>Bio::Das::ProServer::Config v$Bio::Das::ProServer::Config::VERSION</li>
  <li>Bio::Das::ProServer::SourceAdaptor v$Bio::Das::ProServer::SourceAdaptor::VERSION</li>
  <li>Bio::Das::ProServer::SourceHydra v$Bio::Das::ProServer::SourceHydra::VERSION</li>
</ul>

<br /><br /><br />
<center><small><a href="http://www.sanger.ac.uk/proserver/">ProServer homepage</a> | <a href="http://das.sanger.ac.uk/registry/">DAS registry</a> | <a href="http://biodas.org/">BioDAS.org</a></small></center>
</body>
</html>\n);

    $response->content($content);

  } elsif(substr($uri, 0, 5) ne "/das/") {
    $response->content_type("text/plain");
    $response->code(403);
    $response->content("forbidden");

  } else {
    $response->content_type("text/plain");
    $response->content(qq(unimplemented. uri=@{[$uri||""]}, dsn=@{[$dsn||""]}, call=@{[$call||""]}));
  }

  #########
  # Add custom X-DAS headers
  #
  $response->header("X-DAS-Version"      => "1.0");
  $response->header("X-DAS-Status"       => $response->code()||"");
  $response->header("X-DAS-Capabilities" => $config->adaptor($dsn)->das_capabilities()||"") if($dsn and $config->knows($dsn));

  #
  # Finished handling das responses
  #########

  $heap->{hitcount}++;
  $heap->{client}->put($response);
}

### The client handler received an error.  Stop the ReadWrite wheel,
### which also closes the socket.

sub client_got_error {
    my ( $heap, $operation, $errnum, $errstr ) = @_[ HEAP, ARG0, ARG1, ARG2 ];
    DEBUG and
      warn( "Client handler $$/", $_[SESSION]->ID,
        " got $operation error $errnum: $errstr\n",
        "Client handler $$/", $_[SESSION]->ID, " is shutting down.\n"
      );
    delete $heap->{client};
}

### The client handler has flushed its response to the socket.  We're
### done with the client connection, so stop the ReadWrite wheel.

sub client_flushed_request {
    my $heap = $_[HEAP];
    DEBUG and
      warn( "Client handler $$/", $_[SESSION]->ID,
        " flushed its response.\n",
        "Client handler $$/", $_[SESSION]->ID, " is shutting down.\n"
      );
    delete $heap->{client};
}

### We're done.

sub make_pidfile {
  my ($spidfile) = $pidfile =~ /([a-zA-Z0-9\.\/_\-]+)/;
  my $fh;
  print STDERR qq(Writing pidfile $spidfile\n);
  open ($fh, ">$spidfile") or die "Cannot create pid file: $!\n";
  print $fh "$$\n";
  close($fh);
  return($$);
}

sub remove_pidfile {
  my ($spidfile) = $pidfile =~ /([a-zA-Z0-9\.\/_\-]+)/;
  if(-f $spidfile) {
    unlink($spidfile);
    DEBUG and warn qq(Removed pidfile);
  }
}
