# prot.tcl,v 1.1 1995/11/17 00:42:09 steve Exp
#
#	PASTIME Project
#	Cooperative Research Centre for Advanced Computational Systems
#	COPYRIGHT NOTICE AND DISCLAIMER.
#
#	Copyright (c) 1995 ANU and CSIRO
#	on behalf of the participants in
#	the CRC for Advanced Computational Systems (ACSys)
#
# This software and all associated data and documentation ("Software")
# was developed for research purposes and ACSys does not warrant that 
# it is error free or fit for any purpose.  ACSys disclaims all liability
# for all claims, expenses, losses, damages and costs any user may incur 
# as a result of using, copying or modifying the Software.
#
# You may make copies of the Software but you must include all of this
# notice on any copy.
###
# protocols.tcl handles the network protocols.
#
# All procedures are prefixed by PR.  Global variables use either PR or PS.

# PRloadDocument handles loading the data and invoking the content-type handler
# If type is specified then the data is forced through the handler for that
# type.
#
# win is a unique token for loading this document (typically the widget
# pathname that the document is to be loaded into).  From this token
# various state variables are generated.  By convention if win is not
# a pathname then array PS$win win will contain the window into which the
# document is being loaded.
#
# PRloadDocument returns 1 if a document was successfully loaded, 0 otherwise.
# NB. Errors are propagated normally, so a return value of 0 indicates that
# no document was found to be loaded, rather than an error occurring in loading
# the document.

proc PRloadDocument {url win renderState waitproc startproc {type {}}} {
    global PRtypes
    upvar #0 PR$win var
    upvar #0 $renderState ren

    # First, get the data
    if {[set result [PRloadData $url PR$win data]] == {}} {return 0};

    # Mutual pointers
    set var(renderState) $renderState
    set ren(dataState) PR$win

    if {$var(HDRcontent-type) == {} && $type == {} && $result == "PRfd"} {
	# Delay handing data to the content-type handler until the
	# network protocol handler can determine the content-type
	array set var "
	    handlerDelayed 1 cookie $win startproc $startproc waitproc $waitproc
	"
	$waitproc $var(fd) readable \
		"$var(read_handler) PR$win"
	return 0
    }
    set var(handlerDelayed) 0

    # Data is ready to be rendered, so find a content-type handler
    if {$type == {}} {set type $var(HDRcontent-type)}
    PRdispatch_handler $win $result $renderState $waitproc $startproc $type $data
    return 1
}

proc PRdispatch_handler {win dataSrc renderState waitproc startproc type data} {
    global PRtypes
    upvar #0 PR$win var

    catch {set read_handler $var(read_handler)}
    set read_handler_state PR$win

    if {[set handler [array get PRtypes $type]] == {}} {
	# Look for a more general handler
	if {[set handler [array get PRtypes \
		"[lindex [split $type /] 0]/*"]] == {}} {
	    # Look for the most general (catch-all) handler
	    set handler [array get PRtypes "*/*"]
	}
    }
    if {$handler == {}} {error "no handler found for content-type \"$type\""}

    # Find the data converter
    set hdlr [lindex $handler 1]
    set converters [lindex $hdlr 1]
    set data_converter [lindex [lindex $converters [lsearch -regexp $converters "^$dataSrc .*"]] 1]

    # Set start and continue values
    if {[lindex $hdlr 4] != {}} {
	set var(start) [lindex [lindex $hdlr 4] 0]
	set var(cont) [lindex [lindex $hdlr 4] 1]
    } else {
	set var(start) {}
	set var(cont) {}
    }

    # See if the content-type handler also has a close handler
    if {[set close_handler [lindex $hdlr 3]] != {}} {
	append close_handler " $win"
    }

    # Before dispatching the content-handler invoke the startproc, if any.
    # Pass most of the arguments given as parameters for context
    if {$startproc != {}} {
	eval $startproc \{$var(url)\} \{$win\} \{$var(HDRcontent-type)\} \{$renderState\}
    }

    # Dispatch the content-type handler.
    # If the handler returns any data then that data should be
    # prepended to any subsequent data.
    if {[array get var fd] != {}} {
	# Suspend the handler while the data is being processed
	$waitproc $var(fd) readable "
	    upvar #0 PR$win var
	    catch {set ehandler \[$waitproc \$var(fd) readable\]}
	    catch {$waitproc \$var(fd) readable {}}
	    if {\[catch {set var(putback_data) \[[lindex $hdlr 0] [subst -nocommands $data_converter] \"[lindex $hdlr 2] $win\" \$var(start)\]} result\]} {
		PRhandlerError $win \$result
	    } else {
		PRhandlerFinished $win \$var(readsize)
	    }
	    $close_handler
	    if {\[info exists var(fd)\]} {
		set var(start) \$var(cont)
		$waitproc \$var(fd) readable \$ehandler
	    }
	"
    } else {
	if {[catch {set var(putback_data) \
	    [[lindex $hdlr 0] [subst $data_converter] "[lindex $hdlr 2] $win" $var(start)]} result]} {
	    PRhandlerError $win $result
	} else {
	    PRhandlerFinished $win $var(readsize)
	}
	catch {eval $close_handler}
    }

    return 1
}

# Application callbacks for data loading, should be replaced by application

proc PRhandlerFinished {win size} {
    puts "==> handler for $win loaded $size bytes"
}
proc PRhandlerError {win result} {
    puts "==> handler error for $win - \"$result\""
}

# Currently supported protocols include file:, http:
#
# The "PRloadData" command has arguments {url state data}.
# "url" is the URL to be loaded, "State" is a global variable into which the
# library will store MIME attribute info, etc, and which must be passed to any
# handlers and "data" is the name of a variable to pass back the data.
#
# The return value is one of:
#	PRdata	the data itself has been returned
#	PRfd	the return value is a file descriptor from which the data may be read.  
#		The socket is set to non-blocking mode.  Handlers are provided for managing 
#		reading data (see below).
#	PRfile	the return value is the filename of a file from which the data may be read
#	PRimage	the return value is the name of an image type which has the requested 
#		image data loaded.
#
# The state variable also has elements set for the known MIME or HTTP attributes, if they
# are known at the time.  These values may be added at a later stage by the read handler.

set PRdocumentID 0
proc PRloadData {url state data} {
    global PRprotocols PRdocumentID
    upvar #0 $state var
    upvar $data data_return

    # Clean out any old values in the state array...
    catch {unset var}

    # Parse the URL, and stick the elements into the state array
    lassign [URL_parse $url] var(protocol) var(host) var(port) var(path) var(key) var(name)
    # Validity check: protocol must be specified
    if {$var(protocol) == {}} {
	error "invalid URL \"$url\""
    }

    # Initialise other elements of the state array
    array set var "
	url [URL_makeURI $var(protocol) $var(host) $var(port) $var(path) $var(key)]
	eof 0 HDRcontent-type {} HDRcontent-length 0 readsize 0 id [incr PRdocumentID]
    "

    # Check whether the URL has been cached
    if {[set cache_ret [Cache_get_document $state $data]] != {}} {
	return $cache_ret
    }

    # The URL was not cached, so call the protocol handler to fetch the data
    if {[array names PRprotocols $var(protocol)] != {}} {
	set handler_return [$PRprotocols($var(protocol)) $state $data]
    } else {
	error "unknown protocol \"$var(protocol)\""
    }

    # Post-process the results of the protocol handler.
    # If the document is an image, and we are in a Tk environment,
    # then create an image type for it
    if {[string match "image/*" $var(HDRcontent-type)] && \
	[info commands image] != {}} {
	# Create the image type
	switch $var(HDRcontent-type) {
	    image/x-xbitmap" {set img [image create bitmap]}
	    image/x-portable-pixmap {set img [image create photo -format ppm]}
	    image/gif {set img [image create photo -format gif]}
	    default {
		Cache_add_document $handler_return $state $data_return
		return $handler_return
	    }
	}

	# Handle getting the data into the image
	switch $handler_return {
	    PRfd {
		# Cache_add_document opens the cache file
		Cache_add_document $handler_return $state $data_return
		# We have images, therefore we also have fileevent
		fileevent readable $data_return "$var(read_handler) $state"
	    }
	    PRfile {
		# Read in the image data
		$img configure -file $data_return
		set var(eof) 1
	    }
	    PRdata {error "image handlers don't yet support -data switches"}
	    PRimage {error "recursive definition of image?"}
	    default {error "invalid data source \"$handler_return\""}
	}

	# Put the image into the cache
	Cache_add_image $url $img $var(HDRcontent-type)

	# Return the image details
	set data_return $img
	return PRimage
    } else {
	Cache_add_document $handler_return $state $data_return
	return $handler_return
    }
}

#
# present dispatches content-type handlers, which must be
# registered using register_type.
#

array set surfit {
    version "SurfIt! 0.4alpha"
}

proc PRregister_protocol {name handler} {
    global PRprotocols
    set PRprotocols($name) $handler
}

proc PRregister_encoding {name suffix handler} {
    global PRencodings
    set PRencodings($name) $handler
    foreach s $suffix {PRregister_suffix $s $name}
}

proc PRregister_type {name suffix handler data_presentation renderer \
	{close_handler {}} {start {}}} {
    global PRtypes
    set PRtypes($name) [list $handler $data_presentation $renderer $close_handler $start]
    foreach s $suffix {PRregister_suffix $s $name}
}

proc PRregister_suffix {suffix type} {
    global PRsuffixes
    foreach s $suffix {set PRsuffixes($suffix) $type}
}

### Private routines

# Encoding routines

# Try and work out the encoding from the given path
proc PRfind_encoding_from_path {path} {
    global PRencodings
    if {[set e [array get PRencodings [file extension $path]]] != {}} {
	return [lindex $e 1]
    } else {return {}}
}

# MIME type routines

# Try and work out the MIME-type from the given path
proc PRfind_type_from_path {path} {
    global PRsuffixes
    if {[set t [array get PRsuffixes [file extension $path]]] != {}} {
	return [lindex $t 1]
    } else {return {}}
}

###
### Protocol handlers
###
### These take {state data} as arguments and return information as per PRloadData.
### Prerequisite is that all necessary information has been set in the state variable.
###
### NB. That since Tcl is not 8-bit clean, binary data can only be dealt
### with by writing it to a file as it arrives from the network.
###

# The file: protocol handler reads from a file in the local filesystem.
# The path may be a script, in which case invoke it as a CGI script.

PRregister_protocol file PRprotocol_file

proc PRprotocol_file {state data} {
    upvar #0 $state var
    upvar 2 $data data_return

    # Determine data encoding
    if {[set var(HDRcontent-encoding) [PRfind_encoding_from_path $var(path)]] != {}} {
	# Strip encoding suffix from path
    }

    # Determine data type
    if {[set var(HDRcontent-type) [PRfind_type_from_path $var(path)]] == {}} {
	# Take a guess
	set var(HDRcontent-type) "text/plain"
    }

    set var(read_handler) PRfile_read

    # Reading from a file is the same as reading from a cache file
    set data_return $var(path)
    set var(file) $var(path)
    # Determining the size must be caught since the file may not exist
    if {[catch {set var(readsize) [file size $var(path)]}]} {set var(readsize) 0}
    set var(HDRcontent-length) $var(readsize)
    return PRfile
}

proc PRfile_read {state} {
    upvar #0 $state var

    set var(eof) 1
    if {[info exists var(putback_data)]} {
	set data $var(putback_data)
    } else {set data {}}
    append data [read_file $var(file)]
    # Replace the previous PRfile cache entry with the data
    Cache_add_document PRdata $state $data
    set var(readsize) [string length $data]
    return $data
}

# The wais: protocol handler uses a proxy server set up as a WWW-WAIS gateway

PRregister_protocol wais PRprotocol_wais

proc PRprotocol_wais {state data} {
    global surfit

    if {![info exists surfit(waisproxy)] || ![info exists surfit(waisproxy-port)]} {
	error "WAIS proxy server not specified"
    }
    return [PRprotocol_http $state $data [list $surfit(waisproxy) $surfit(waisproxy-port)]]
}

# The http: protocol handler sets up a HTTP request and a handler to
# deal with the response.

PRregister_protocol http PRprotocol_http

proc PRprotocol_http {state data {proxy {}}} {
    global surfit PRtypes PRencodings
    upvar #0 $state var
    upvar 2 $data data_return
    # Recover window from state
    if {[regexp {PR(\..*)} $state all win]} {
	set win [SurfIt_hyperpage $win]
	upvar #0 PS$win altState
    }

    # We'll setup the connection, send the HTTP headers, and then 
    # let a file event handler do the real work.

    # Default port for HTTP
    if {$var(port) == ""} {set var(port) 80}

    if {$proxy != {}} {
	upvar 3 $data data_return ;# Ugly!
	if {[catch {server_connect -buf [lindex $proxy 0] [lindex $proxy 1]} var(fd)]} {
	    error "cannot connect to proxy server [lindex $proxy 0]:[lindex $proxy 1]"
	}
	set requestURI $var(url)
    } elseif {[info exists surfit(proxy)]} {
	if {[catch {server_connect -buf $surfit(proxy) $surfit(proxy-port)} var(fd)]} {
	    error "cannot connect to proxy server [lindex $proxy 0]:[lindex $proxy 1]"
	}
	set requestURI $var(url)
    } else {
	if {[catch {server_connect -buf $var(host) $var(port)} var(fd)]} {
	    error "cannot connect to host $var(host):$var(port)"
	}
	set requestURI [URL_makeURIpath $var(path) $var(key)]
    }
    fcntl $var(fd) NONBLOCK yes

    # Send the HTTP request.
    # The Accept: header should be generated by registered handlers
    # User: ???

    puts $var(fd) "GET $requestURI HTTP/1.0\r
User-Agent: $surfit(version)\r
Accept: [join [array names PRtypes] ,]\r"
    if {[array names PRencodings] != {}} {
	puts $var(fd) "Accept-Encoding: [join [array names PRencodings] ,]\r"
    }
    if {[info exists altState(referer)]} {
	puts $var(fd) "Referer: $altState(referer)\r"
	unset altState(referer);	# No longer needed
    }
    puts $var(fd) "\r\n"
    flush $var(fd)

    array set var "
	read_handler PRhttp_read in_header 1 HDRcontent-type {}
	HDRcontent-encoding {} HDRcontent-length {} length 0
    "

    set data_return $var(fd)
    return PRfd
}

# File event handler for HTTP responses.  Data is also written into the cache.
# Pre-requisite is that all necessary information is in the state array variable.
# Return data is as for loadData, an empty string implies that the connection has
# closed.
# If var(putback_data) is set then prepend that data to the returned data,
# without causing it to be appended to the cache (since it has been already).

proc PRhttp_read {state} {
    upvar #0 $state var

    set var(readsize) 0

    if {[array names var fd] == {} || [eof $var(fd)]} {
	return [PRhandleEOF $state]
    }

    while {$var(in_header)} {
	if {[catch {gets $var(fd) hdr}]} {
	    # EOF or error occurred
	    if {[eof $var(fd)]} {
		# EOF really did occur
		return [PRhandleEOF $state]
	    }
	}
	# Process header
	if {[info exists hdr] && ($hdr == "\r" || $hdr == "")} {
	    set var(in_header) 0
	    # Check whether mandatory attributes have been set.
	    # Current attributes are: Content-Type Content-Length
	    if {$var(HDRcontent-type) == {}} {
		# We'll have to try and guess what the Content-Type is.
		set var(HDRcontent-type) [PRfind_type_from_path $var(path)]
	    }
	    # Content-Length is fixed at EOF
	    if {$var(handlerDelayed)} {
		# Now that we know what the content-type is, invoke a
		# Content-type handler for it and update the cache
		Cache_set_content_type $var(url) $var(HDRcontent-type)
		set var(handlerDelayed) 0
		PRdispatch_handler $var(cookie) PRfd $var(renderState) \
			$var(waitproc) $var(startproc) $var(HDRcontent-type) $var(fd)
		return {}
	    } else {
		break
	    }
	}

	if {![regexp -nocase "HTTP/(\[^ \]*) (\[^ \]+) (.*)\r?" $hdr all var(HTTPversion) var(HTTPstatus) var(HTTPreason)]} {
	    if {[regexp "(\[^:\]+): (\[^\r\]*)\r?" $hdr all key value]} {
		set var(HDR[string tolower $key]) $value
	    }
	}
    }

    if {!$var(in_header)} {
	# Tcl is NOT 8-bit clean, so if the MIME-type is not text/*
	# (or application/x-tcl) then we are forced to write the data into a file.
	if {!([string match "text/*" $var(HDRcontent-type)] || \
	      "application/x-tcl" == $var(HDRcontent-type))} {
	    #
	    # Copy the data into the cache file.  We ignore failures as far as
	    # the handler goes, but the cache will want to know that the file
	    # has failed to be downloaded.
	    #
	    if {[catch "copyfile $var(fd) $var(cachefd)" var(readsize)]} {
		set var(readsize) 0
		PRhandleEOF $state
		Cache_delete $state
		return {}
	    }
	    incr var(length) $var(readsize)

	    # Check for EOF
	    if {[eof $var(fd)]} {
		# Determine content-length if not specified in headers
		if {$var(HDRcontent-length) == {}} {
		    set var(HDRcontent-length) $var(length)
		    Cache_set_content_length $var(url) $var(HDRcontent-length)
		}
		return [PRhandleEOF $state]
	    }
	} else {
	    # Incoming data is plain text, so we can handle it in Tcl

	    # Ignore errors in reading... probably simply empty socket
	    if {[catch "read -nonewline $var(fd)" data]} {set data {}}

	    set var(readsize) [string length $data]
	    incr var(length) $var(readsize)

	    # Send to cache module
	    Cache_append $state $data

	    # Check for EOF
	    if {[eof $var(fd)]} {
		# Determine content-length if not specified in headers
		if {$var(HDRcontent-length) == {}} {
		    set var(HDRcontent-length) $var(length)
		    Cache_set_content_length $var(url) $var(HDRcontent-length)
		}
		PRhandleEOF $state
	    }

	    # Deal with data the content-type handler may have put back into the stream
	    if {[info exists var(putback_data)] && $var(putback_data) != {}} {
		set data "$var(putback_data)$data"
		unset var(putback_data)
	    }

	    return $data
	}
    }
}

# Remove this file event handler

proc PRhandleEOF {state} {
    upvar #0 $state var
    catch {close $var(fd)}
    catch {unset var(fd)}
    Cache_close $state
    set var(eof) 1
    return {}
}
