;;; TO DO:
;;; make redirection smarter (it assumes GET / now)

(require 'tcp)
(yreq "Scheduler/scheduler")
(yreq "Crypto/ssl")
(yreq "Utilities/yenta-utils")
(yreq "UI/httpd-reply-codes")

;;; NOTE: form data from POST method forms is provided in a generated header
;;; called "HTTPD Content".  It is still form-url garbled.  Also provided is the
;;; generated header "HTTPD Values" which is a list of (field-name . ungarbled
;;; field-value) pairs.  This header is also generated by GET method forms.  In
;;; addition, index-style GET methods create a header "HTTPD Index", which
;;; contains the ungarbled index.

(define httpd:httpd 'set!-below)
(define httpd:send-headers 'set!-below)

(define *httpd:priority* 5)

(def-yenta-var *httpd:private-key* #f)	; Set by init-keys.
(def-yenta-var *httpd:cert* #f)		; Set by init-keys.
(def-yenta-var *httpd:use-ssl* #t)
(def-yenta-var *httpd:accept-timeout* 30) ; Number of seconds to wait before sending a redirect.

(defvar *httpd:active-connections* 0)	; Keeps a count of active connections, so we can tell when it's safe to quit without leaving one open.
(define *httpd:taskname* "HTTPD")	; Taskname for the actual httpd.

;;; If Yenta has waited this many seconds and still claims to have open httpd connections, just die anyway.
;;; This can happen if an httpd child take a fatal error and fails to decrement *httpd;active-connections*.
(define *last-ditch-quit-timeout* 60)

;;; Ensures that we're done serving pages, and quits.  The way this
;;; should -really- work is to record whether the browser is fetching
;;; images (making the assumption that this won't change on the very
;;; last page), and, if it is, watch the low_src and normal shutdown
;;; icon go by (and deal w/browser caching somehow, I guess).  But the
;;; way this -does- work is to give the browser plenty of time to make
;;; its fetches, or at least to get them started, and to then wait
;;; until we don't have any open network connections.  At that point,
;;; we assume it's safe to shut down, even if the browser might not
;;; have fetched the non-low_src image.
(define (wait-for-no-pending-fetches-and-quit)
  (scheduler:add-timeout-task!		; This won't wind up running unless the task below takes way too long...
    "Last ditch quit" 10 *last-ditch-quit-timeout* scheduler:quit)  
  (scheduler:add-once-task!
    "Waiting for no fetches" 4
    (lambda ()
      (not (positive? *httpd:active-connections*))); zero? would be the obvious test, but just in case it somehow goes negative, don't hang forever.
    (lambda ()
      (scheduler:remove-task! *httpd:taskname*)	; Avoid a timing window by killing the task -> no more page fetches will happen!
      (scheduler:quit))))		; Tell the scheduler to terminate the entire process.

;;; This gives the browser time to at least request the icons, before we check to make sure we're done serving them.
(define (deferred-wait-for-no-pending-fetches-and-quit)
  (scheduler:add-timeout-task!
    "Deferred waiting for no fetches" 4 15 ; Give the browser 15 seconds to finish making its requests.
    wait-for-no-pending-fetches-and-quit))

;;; Holds the URL for each page fetched that httpd:url-is-html? deems worthy.
;;; The idea here is to record only pages that the user actually perceived as
;;; a page, and not inline image fetches and so forth.  The latest page is
;;; pushed onto the front.
;;;
;;; This is intended to be used by the help system, and perhaps later by some
;;; sort of meta-equiv refresh that arranges to put a refresh header only in
;;; those pages that are HTML -and- don't have a form on them, or somesuch.
;;; Final implementation of -that- idea probably needs to be combined with
;;; the use of frames to avoid reloading all the images as well.
;;;
;;; [See also *httpd:last-url-fetched*, which is defined in bombproofing.scm
;;;  so it's available to the Yenta servers, which have a scheduler, but not
;;   an httpd.]
;;;
;;; %%% Should this maybe be a yenta-var?  Not sure we want a huge history
;;; %%% accumulating, but it might be useful to dump upon a task error.
(defvar *httpd:html-history* '())

;;; "/", "*.html", and "*.html?stuff" are all valid.  GIFs and JPEGs are not.
;;; We quite deliberately just search for "/" or ".html" and don't get any
;;; fancier (e.g., if you stick ".html" into the name of a GIF, we'll be
;;; fooled), because it's easy enough just to assume no pathological naming.
(define *httpd:url-is-html-regexp* (regcomp "/$|.+\.html"))
(define (httpd:url-is-html? url)
  (regmatch? *httpd:url-is-html-regexp* url))

;;; A documentation page, by definition, is anything with the word "help"
;;; in its name.
(define *httpd:url-is-help-regexp* (regcomp "/$|.*[Hh]elp.*\.html"))
(define (httpd:url-is-help? url)
  (regmatch? *httpd:url-is-help-regexp* url))

;;; Does all the work of updating *httpd:last-html-fetched*, *httpd:html-history*, etc.
(define (httpd:update-last-urls url)
  (inc! *ctr:pages-fetched*)
  (set! *httpd:last-url-fetched* url)
  (when (httpd:url-is-help? url)
    (inc! *ctr:docs-fetched*))
  (when (httpd:url-is-html? url)
    (push! url *httpd:html-history*)))

(let ()
  (define (do-and-flush f ssl)
    (let ((x (f ssl)))
      (force-output ssl)
      x))

  (define (unread? con)
    (cond ((ssl? con)
	   (> (ssl:pending con) 0))
	  ((tcp:tcp? con)
	   (> (tcp:chars-unread con) 0))
	  (else (error "I don't know how to deal with" con)))) ; %%% Hmm.  I wonder if this should be something that won't kill Yenta dead...

  (define (crlf port)
    (write-char #\cr port)
    (write-char #\nl port))

  (define (line-terminator? char)
    (cond ((eq? char #\cr))
	  ((eq? char #\nl))
	  (else #f)))
    
  ;; Reads a string, terminated with whitespace, from a TCP connection.
  ;; Reads and throws away initial whitespace.  q.v. sch-read-char in nonblocking-io.scm.
  (define (sch-read-char con pfx use)
    (let ((pfxstr (caar pfx)))
      (if (and (string? pfxstr)
	       (> (string-length pfxstr) 0))
	  (let ((c (string-ref pfxstr 0)))
	    ;(disp c)
	    (set-car! (car pfx)
		      (substring pfxstr (string-length pfxstr)))
	    c)
	  (scheduler:when (unread? con)
	    (use (read-char con))))))

  (define (sch-peek-char con pfx use)
    (let ((pfxstr (caar pfx)))
      (if (and (string? pfxstr)
	       (> (string-length pfxstr) 0))
	  (let ((c (string-ref pfxstr 0)))
	    ;(disp c)
	    c)
	  (scheduler:when (unread? con)
	    (use (peek-char con))))))

  (define (sch-read-string con pfx use)
    (define ret (sr:make-string 1000))
    (scheduler:sequences
     ((scheduler:do-with ((char (sch-peek-char con pfx)
				(sch-peek-char con pfx)))
	 ((eof-or-char-not-whitespace? char) 'done)
	(scheduler:call (sch-read-char con pfx))))
     ((scheduler:do-with ((char (sch-peek-char con pfx)
				(sch-peek-char con pfx)))
	 ((eof-or-char-whitespace? char)
	  (use (sr:to-string ret)))
	(sr:add-char! char ret)
	(scheduler:call (sch-read-char con pfx))))))
  
  ;; Reads a line from a TCP connection; the line should end with a CRLF.
  ;; (Actually, the line can end with CR or LF, followed by any character.
  ;;  But CRLF is what's expected.)  q.v. sch-read-char in nonblocking-io.scm.
  (define (sch-read-line con pfx use)
    (define ret (sr:make-string 1000))
    (scheduler:do-with ((char (sch-peek-char con pfx)
			      (sch-peek-char con pfx)))
	((or (eof-object? char) (line-terminator? char))
	 (scheduler:sequences
	  ((scheduler:call (sch-read-char con pfx)))
	  ((scheduler:call (sch-read-char con pfx)))
	  ((use (sr:to-string ret)))))
      (sr:add-char! char ret)
      (scheduler:call (sch-read-char con pfx))))

  (define (non-empty-string str)
    (> (string-length str) 0))

  (define (strip-whitespace str)
    (set! str
	  (do ((first 0 (+ first 1)))
	      ((not (eof-or-char-whitespace? (string-ref str first)))
	       (substring str first (string-length str)))))
    (do ((last (- (string-length str) 1) (- last 1)))
	((not (eof-or-char-whitespace? (string-ref str last)))
	 (substring str 0 (+ last 1)))))

  (define (parse-list str)
    (filter non-empty-string
	    (map strip-whitespace
		 (infix-split "," str))))

  (define (split-header str)
    (let* ((colon (index str #\:))
	   (field-name (substring str 0 colon))
	   (value (parse-list (substring str (+ colon 1) 
					 (string-length str)))))
      (cons field-name value)))

  (define (ungarble str)
    (define pos 0)
    (define ret "")
    (do ()
	((= pos (string-length str)) ret)
      (cond ((char=? (string-ref str pos) #\%)
	     (set! ret (string-append ret (string 
					   (integer->char
					    (hex-string->integer
					     (substring str
							(+ pos 1)
							(+ pos 3)))))))
	     (set! pos (+ pos 3)))
	    (t
	     (set! ret (string-append ret (string (string-ref str pos))))
	     (inc! pos)))))

  (define (parse-form form-data)
    ;; The test in the lambda is to see if this is in fact a form response,
    ;; i.e., whether the data actually has a "=" in it.  Searchable index
    ;; results are silently ignored here, since they are not form data.
    (let ((values
	   (filter (lambda (item) (not (null? item)))
		   (map (lambda (item)
			  (let ((frags (infix-split "=" item)))
			    (if (not (null? (cdr frags)))
				(cons (ungarble (car frags))
				      (ungarble (cadr frags)))
				'())))
			(if (< 0 (string-length form-data))
			    (infix-split "&" (string-edit "\\+" " "
							  form-data #t))
			    '())))))
      (if (null? values)
	  '()
	  (list (cons "HTTPD Values" values)))))

  (define (parse-index form-data)
    (if (null? (cdr (infix-split "=" form-data)))
	(list (list "HTTPD Index" (ungarble form-data)))
	'()))

  (define (sch-read-request con pfx use)
    (scheduler:let* ((method (sch-read-string con pfx))
		     (uri (sch-read-string con pfx)))
      (define version "HTTP/0.9")	; default, for simple-requests.
      (define headers '())
      (format-debug 10 "~A~&" method)
      (scheduler:sequences
       ((scheduler:let* ((char (sch-peek-char con pfx)))
          (if (eq? char #\Space)	; Full request.
	      (scheduler:sequences
	       ((httpd:update-last-urls uri)
		(format-debug 10 "~A requested.~&" uri))
	       ((scheduler:let* ((version (sch-read-string con pfx))
				 (blank (sch-read-line con pfx)))
	          (scheduler:do-with ((header (sch-read-line con pfx) 
					      (sch-read-line con pfx)))
				     ((equal? header "") 'done)
		     (let* ((parsed (split-header header))
			    (add-to (assoc (car parsed) headers)))
		       (if add-to
			   (append! add-to (cdr parsed))
			   (set! headers (cons parsed headers)))))))
	       ((define content (assoc "Content-length" headers))
		(when content
		  (scheduler:do ((count (string->number (cadr content))
					(- count 1))
				 (ret "" ret))
		      ((= 0 count)
		       (set! headers (cons `("HTTPD Content" ,ret)
					   (cons (car (parse-form ret))
						 headers))))
		    (scheduler:let* ((char (sch-read-char con pfx)))
		      (set! ret (string-append ret
					       (string char))))))))
	      (scheduler:call (sch-read-line con pfx))))) ; Simp-Req's CRLF
       ((define uri-split (infix-split "\\?" uri))
	(when (and (not (null? (cdr uri-split)))
		   (not (assoc "HTTPD Values" headers))) ; POST overrides GET
	  (set! headers
		(append (parse-form (cadr uri-split)) headers))
	  (set! headers
		(append (parse-index (cadr uri-split)) headers))
	  ;; One or the other of the above will have no effect, with forms trumping indices.
	  (set! uri (car uri-split)))
	(use (cons method (cons uri (cons version headers))))))))

  (define (forbid con)
    (send-headers con http/forbidden)
    (format-debug 10 "rejected a connection from ~A~&" (tcp:remote-ip con)))

  (define (smart-redirect-to-ssl con ssl)
    (format-debug 10 "~S~&" (ssl:packet ssl))
    (scheduler:let* ((request (sch-read-request con `((,(ssl:packet ssl))))))
      (format-debug 10 "~S~&" request)
      (cond ((string-ci=? (car request) "get")
	     (let ((url (format #f "https://~A:~A/~A"
				(tcp:local-ip con)
				(tcp:local-port con)
				(cadr request))))
	       (send-headers con http/moved-temp `("Location" ,url))
	       (format con "Please point your browser at <a href=\"~A\">~A</a>"
		       url url)
	       (format-debug 10 "sent a redirect~&")))
	    (t
	     (send-headers con http/bad-request)
	     (format-debug 10 "bad request ~A~&" request)))
;;;   (close-port ssl)
      (close-port con)))

  (define (dumb-redirect-to-ssl con ssl)
    (let ((url (format #f "https://~A:~A/"
		       (tcp:local-ip con)
		       (tcp:local-port con))))
      (send-headers con http/moved-temp `("Location" ,url))
      (format con "Please point your browser at <a href=\"~A\">~A</a>"
	      url url)
      (crlf con)
      (format-debug 10 "sent a redirect~&"))
    (close-port ssl)
    (close-port con))

  ;; Smart-redirect-to-ssl seems to be broken. :-(
  (define (redirect-to-ssl con ssl) (dumb-redirect-to-ssl con ssl))

  (define (send-headers con code . headers)
    (display "HTTP/1.0 " con)
    (display code con)
    (write-char #\space con)
    (display (httpd:reply-code->text code) con)
    (crlf con)
    (for-each (lambda (header)
		(display (car header) con)
		(display ": " con)
		(when (not (null? (cdr header)))
		      (display (cadr header) con)
		      (for-each (lambda (value)
				  (display ", " con)
				  (display value con))
				(cddr header)))
		(crlf con))
	      headers)
    (crlf con))

  (define (httpd portnum check-host check-cert respond)
    (let ((lsnr (tcp:listen portnum))
	  (ctx #f))
      (when *httpd:use-ssl*
	(set! ctx (ssl:make-ctx (ssl:v23-server-method)))
	;;(ssl:set-verification! ctx #t)
	;;(ssl:trace-ctx! ctx)	; DEBUG.
	(when (not (valid-ssl-ctx? ctx))
	  (set! ctx #f)
	  (format-debug 10 "~A~&" (ssl:get-errors)))
	(unless (ssl:use-rsa-private-key!
		 ctx
		 (ssl:der-string->rsa-private-key *httpd:private-key*))
	  (format-debug 10 "~A~&" (ssl:get-errors)))
	(unless (ssl:use-certificate-from-asn1-string! ctx *httpd:cert*)
	  (format-debug 10 "~A~&" (ssl:get-errors))))
      (if (if *httpd:use-ssl*
	      (and lsnr ctx)
	      lsnr)
	  (let ()
	    (define (wait-fn)
	      (tcp:heard? lsnr))
	    (define (task)
	      (let* ((tcp (tcp:accept lsnr)) ; Actually accept an incoming request from the network.
		     (con tcp)
		     (failed #f)
		     (redirecting #f))
		;; NOTE:  Every path out of this function, from this point on, must call FAIL exactly once,
		;; or must exit successfully (see call to "respond" below) but NOT BOTH.  Otherwise, we'll miscount
		;; the number of open connections.  If we get an error and the scheduler kills us, we'll leave
		;; it permanently one high; this requires a scheduler fix (e.g., implementation of scheduler:dynwind)
		;; to do right.
		(inc! *httpd:active-connections*)
		(format-debug 10 "~S active connection~:P.~&" *httpd:active-connections*)
		(scheduler:split
		 (define (fail)		; This must be called ONCE and ONCE ONLY while rejecting a connection or recovering from an abort.
		   (set! failed #t)
		   (format-debug 10 "failed!~&")
		   (dec! *httpd:active-connections*)
		   (format-debug 10 "~S active connection~:P.~&" *httpd:active-connections*))
		 (define (maybe-read-request con pfx use)
		   (if (not failed)
		       (sch-read-request con pfx use)
		       (use #f)))
		 (define (close-ports)	; If the port is already closed, calling close-port again is a no-op, so this is safe to call a lot.
		   (close-port con)
		   (unless (eq? con tcp)
		     (close-port tcp)))
		 (define (maybe-accept ctx tcp timeout use)
		   (if *httpd:use-ssl*
		       (ssl:make-and-accept ctx tcp timeout use)
		       #f))
		 (cond ((not (tcp:connected? tcp))
			(fail))
		       (t
			(cond ((and (not failed)
				    (not (check-host (tcp:remote-ip tcp)
						     (tcp:remote-port tcp))))
			       (forbid tcp)
			       (close-ports)
			       (fail))
			      (t
			       (scheduler:sequences
				 ((scheduler:let* ((ssl (maybe-accept ctx tcp *httpd:accept-timeout*)))
				    (cond ((ssl? ssl)
					   (let ((c (ssl:current-cipher ssl)))
					     ;; Yenta's very first external user's very first use of Yenta
					     ;; got a C that blew up ssl:cipher-bits, but I don't know what
					     ;; it was.  So check instead, and tell us.  This is presumably
					     ;; some sort of SSLeay bug, since -presumably- ssl:current-cipher
					     ;; should always return something that ssl:cipher-bits can eat.
					     ;; It also appears to happen if you decline the offered certificate,
					     ;; at least in NS 3.x, though the user swears that's not what happened.
					     ;; This also seems to happen if the user clicks elsewhere while the
					     ;; browser is loading images; it appears to be a generalized abort.
					     (cond ((valid-ssl-cipher? c)
						    (format-debug 10 "SSL handshake successful; using ~A-bit ~A encryption.~&"
								  (ssl:cipher-bits c) (ssl:cipher-name c)))
						   (t
						    (format-debug 10 "(ssl:current-cipher ~S) returned ~S, which is not a valid cipher.~&"
								  ssl c)
						    (close-ports)
						    (fail))))
					   (unless failed ; Don't bother checking the cert if we've already lost.
					     (cond ((check-cert (ssl:peer-certificate ssl))
						    (set! con ssl))
						   (t
						    (forbid ssl)
						    (close-ports)
						    (fail)))))
					  ((pair? ssl)
					   (set! ssl (cdr ssl))
					   (redirect-to-ssl con ssl)
					   (set! redirecting #t)
					   (fail)) ; Don't close-ports here; we check for redirecting immediately below (next sequence).
					  (t
					   (close-ports)
					   (fail)))))
				 ((scheduler:let* ((req (maybe-read-request con '((""))))) ; If we've already failed, this won't try the read.
				    (cond (failed ; We already failed, so maybe-read-request didn't actually do anything much.
					   (unless redirecting
					     (close-ports))
					   #f)
					  (t
					   (respond req con close-ports)
					   (dec! *httpd:active-connections*) ; This is the one and only "successful" return.  All others must (fail).
					   (format-debug 10 "~S active connection~:P.~&" *httpd:active-connections*)
					   #t))))))))))))
	    (scheduler:add-task! *httpd:taskname* *httpd:priority* wait-fn task)
	    #t)
	  #f)))
  
  (set! httpd:send-headers send-headers)
  (set! httpd:httpd httpd))

;;; End of file.
