########################## -*- Mode: Perl -*- ##########################
##
## File             : Http_client.pm
##
## Description      : http client for SFgate
##                    This stuff is mainly fetched from SFproxy.
##
#
# Copyright (C) 1995, 1996 Ulrich Pfeifer, Norbert Goevert
#
# This file is part of SFgate.
#
# SFgate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# SFgate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SFgate; see the file COPYING.  If not, write to
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
#
##
## Author           : Norbert Goevert
## Created On       : Mon Sep 18 11:14:46 1995
##
## Last Modified By : Norbert Goevert
## Last Modified On : Wed Dec 11 16:11:52 1996
##
## $State: Exp $
##
## $Id: Http_client.pm,v 5.1.1.1 1996/12/23 12:50:58 goevert Exp goevert $
##
## $Log: Http_client.pm,v $
## Revision 5.1.1.1  1996/12/23 12:50:58  goevert
## patch6: use of SFgate::Config
##
## Revision 5.1  1996/11/05 16:55:27  goevert
## *** empty log message ***
##
## Revision 5.0.1.3  1996/11/04 13:10:24  goevert
## patch21: make perl happy
##
## Revision 5.0.1.2  1996/05/31 15:44:19  goevert
## patch14: setting for hostname
##
## Revision 5.0.1.1  1996/05/13 11:28:48  goevert
## patch1:
##
########################################################################


use strict;


package SFgate::Http_client;


use Sys::Hostname;
use Socket qw(SOCK_STREAM PF_INET);
require SFgate::Config;


my($hostname, $af_inet, $sock_stream, $sockaddr, $default_http_port);

$hostname          = hostname;

$af_inet           = PF_INET;
$sock_stream       = SOCK_STREAM;
$sockaddr          = 'S n a4 x8';

$default_http_port = 80;


## #################################################################
## do_http($url)
## #################################################################
## Fetches HTML document pointed to by $url with GET method.
## If the variable $http_proxy is set, the request is forwarded to
## the host $http_proxy on port $http_proxy_port.
##
## (string) $url: $url to fetch
##
## returns:
## - string: the fetched text
## - string: error messages
##
sub do_http
{
    my($url) = @_;
    ## return value
    my($response_body, $error);
    ## local variables
    my($serverprotocol, $serverhost, $serverport, $serverurl);
    my($req, $method, $protocol, $serverfh);
    my($response_header, $status_code, $reason_phrase, $content_type, $http_version);

    ($serverprotocol, $serverhost, $serverport, $serverurl, $error) = &parse_http_url($url);
    return ('', $error) if $error;

    if ($SFgate::Config{HTTPPROXY} && $serverhost ne $hostname) {
        $serverprotocol = "http";
        $serverhost = $SFgate::Config{HTTPPROXY};
        $serverport = $SFgate::Config{HTTPPROXYPORT};
        $serverurl = $url;
    }

    &::log("$serverhost, $serverport, $error, $hostname");
    ($serverfh, $error) = &open_server($serverhost, $serverport);
    return ('', $error) if $error;
    
    &put_request($serverfh, "GET $serverurl HTTP/1.0\r\n\r\n");

    ($response_header, $response_body, $status_code,
     $reason_phrase, $content_type, $http_version, $error) = &get_response($serverfh);
    return ('', $error) if $error;

    ## request successful?
    # Nothing to do if status code doesn't indicate success.
    return ('', "Error $status_code: $reason_phrase") if $status_code !~ /^20[0123]/;

    # try to construct text for some content types
    if ($content_type =~ m:image/(gif|jpeg):) {
        $response_body = "<HTML>\n<HEAD>\n" .
            "<TITLE>$1 image</TITLE>\n" .
                "</HEAD>\n<BODY>\n" .
                    "<IMG SRC=\"$url\" ALT=\"$1 image\">\n" .
                        "</BODY>\n</HTML>\n";
    }
    elsif ($content_type ne "text/html") {
        # Nothing to do if content type isn't text/html.
        return ('', 'content type is not text/html');
    }
    
    # Nothing to do if response body seems to be empty.
    return ('', 'response body seems to be empty') if $response_body !~ /[A-z]/;

    return ($response_body, '');
}


## #################################################################
## put_request($serverfh, $req)
## #################################################################
## Sends the request $req to the HTTP server, vie the filehandle
## $serverfh.
##
## (filehandle) $serverfh: filehandle to put request on
## (string) $req: request part of url
##
sub put_request
{
    my($serverfh, $req) = @_;

    no strict 'refs';
    print $serverfh $req;
}


## #################################################################
## get_response($serverfh)
## #################################################################
## Gets a response from the HTTP server (via $serverfh).  HTTP/1.0
## responses are understood.  If the response is HTTP/1.0, the return
## values are $response_header, $response_body, $status_code,
## $reason_phrase and $content_type.
## sets global variable $error if something goes wrong
##
## (filehandle) $serverfh: Filehandle to read response from
##
## returns:
## - string: response_header: The header of the response, as defined in
##           HTTP/1.0.  (Ie the part before the first empty line.)
## - string: response_body: The body of the response, as defined in HTTP/1.0.
##           (Ie the part after the first empty line.)
## - integer: status_code: The first line of the response contains a status code
##           and a reason phrase, as defined in HTTP/1.0.  This is the
##           status code part of that line.
## - string: reason_phrase: This is the reason phrase of that line.
## - string: content_type: The value of the Content-Type header field.
##
sub get_response
{
    my($serverfh) = @_;
    ## local variables
    my($line, $in_header, $first_line);
    my($header_field, $header_value, $http_version);
    my($full_response );
    ## return value
    my($response_header, $response_body, $status_code, $reason_phrase, $content_type, $error);

    ## At the beginning of the response, we're reading a header
    ## information.
    $in_header = 1;               ## true
    $first_line = 1;              ## true
    ## We assume we're talking HTTP 1.0.
    $http_version = "1.0";
    ## We assume we'll get a full response, as defined by HTTP 1.0.
    $full_response = 1;

    no strict 'refs';
    while ($line = <$serverfh>) {
        ## The first line gets special processing.  It decides between
        ## old (0.9) and new (1.0) HTTP.
        if ( $first_line ) {
            $first_line = 0;
            ## Weed out unknown protocol versions first.
            ($http_version, $status_code, $reason_phrase) =
                ($line =~ m:HTTP/([0-9.]*) ([0-9]*) (.*):o);

            if ( $http_version ne "1.0" ) {
                $error = "Unsupported protocol version $http_version";
                return ('', '', '', '', '', $error);
            }
            if ( $http_version eq "1.0" ) {
                $full_response = 1;
            }
        }
        $in_header=0 if $line =~ /^(\r)?$/;
        $in_header=0 if !$full_response;
        if ($in_header) {
            ($header_field, $header_value) =  $line =~ m/^([-a-z]+): (.*)$/i;
            $header_value =~ s/\r$//o;
            $header_field =~ tr/A-Z/a-z/;
            $content_type = $header_value if $header_field eq "content-type";
        }
        if ( $in_header ) {
            $response_header = $response_header . $line;
        }
        else {
            $response_body = $response_body . $line;
        }
    }

    return ($response_header, $response_body, $status_code,
            $reason_phrase, $content_type, $http_version, $error);
}


## #################################################################
## open_server($serverhostname, $port)
## #################################################################
## Opens a TCP connection on the $port given on the host
## $serverhostname.  Returns a file handle corresponding to this TCP
## connection.
## Sets global variable $error if something goes wrong
##
## (string) $serverhostname: host to contact
## (string) $port: port to contact
##
## returns:
## - filehandle: resulting filehandle 
##
sub open_server
{
    my($serverhostname, $port) = @_;
    ## local variables
    my($this, $that, $proto, $oldfh, $thisaddr, $thataddr);
    ## return value
    my($returnfh, $error);
    
    $proto = (getprotobyname('tcp'))[2]
        || ($error = "unknown prototype: tcp");
    if ($port !~ /^\d+$/) {
        $port = (getservbyname($port, 'tcp'))[2]
            || ($error = "unknown port: $port");
    }
    $thisaddr = (gethostbyname($hostname))[4]
        || ($error = "unknown hostname: $hostname");
    if ($serverhostname =~ /^\d+\.\d+\.\d+\.\d+$/) {
        # IP numbers
        $thataddr = pack("CCCC", split(/\./, $serverhostname));
    }
    else {
        # domain names
        $thataddr = (gethostbyname($serverhostname))[4]
            || ($error = "unknown hostname: $serverhostname");
    }

    $this = pack($sockaddr, $af_inet, 0, $thisaddr);
    $that = pack($sockaddr, $af_inet, $port, $thataddr);

    socket(SERVERFH, $af_inet, $sock_stream, $proto)
        || ($error = "socket: $!");
    bind(SERVERFH, $this)
        || ($error = "bind: $!");
    connect(SERVERFH, $that)
        || ($error = "connect failed to $serverhostname:$port: $!");

    # unbuffer
    $oldfh = select(SERVERFH);
    $| = 1;
    $returnfh = select($oldfh);

    return ($returnfh, $error);
}


## #################################################################
## parse_http_url($url)
## #################################################################
## The given $url is parsed, and its component parts are returned.  As
## an example, the component parts of the URL
## http://www.w3.org:80/home.html are:
##   - $protocol: is ftp, gopher, or http.  Here: http
##   - $host: the host name.  Here: www.w3.org
##   - $port: the port.  Defaults to 80 if not given.  Here: 80
##   - $path: the rest.  Here: /home.html
## sets global variable $error if something goes wrong
##
## (string) $url: url to parse
##
## returns:
## - string: protocol
## - string: host to contact
## - string: port to use
## - string: path
##
sub parse_http_url
{
    local($_) = @_;
    ## return values
    my($protocol, $host, $port, $path, $error);

    if (m@([a-zA-Z]+)://([^:/]+)(:[0-9]+)?(.*)@) {
        $protocol = $1;
        $host     = $2;
        $port     = $3;
        $path     = $4;
    }
    elsif (m@([a-zA-Z]+):(/[^/].*)@) {
        $protocol = $1;
        $host     = $hostname;
        $path     = $2;
    }
    else {
        $error = "Wrong format URL $_";
    }
    
    $port =~ s/^://;
    $port = $default_http_port if $port !~ /^[0-9]+/;
    
    return ($protocol, $host, $port, $path, $error);
}


1;
