;;; Courtesy of Song Koh
;;; Harris Semiconductor
;;; Phone:		(407)-724-7085
;;; Internet:	swk@mlb.semi.harris.com
;;; ----------------------------------------
;;; transcript of session
;;; 
;;; (load "qprop1")
;;; FASLoading "/home/seymour/swk/projects/quickprop/qprop1.bin" -- done
;;; ;Value: output-results
;;; 
;;; (build-encoder 8 4)
;;; ;Value: #T
;;; 
;;; (show-time (lambda () (train-test 1 100)))
;;; Trial 0:  Learned after 19 epochs.  Running Avg:  19
;;; Eps 1*, Mu 1.75, WtRng 2, Decay -.0001, SigOff .1, Hyper #T, Sym ()
;;; process time: 55384; real time: 58600
;;; ;Value: #T
;;; 
;;; (/ 55.384 19)
;;; ;Value: 2.91494736842
;;; 
;;; I divide by the number of epochs because it takes a different number
;;; of epochs every time you run it.  There is a random initialization of
;;; the weights.
;;; ----------------------------------------
;;; -*- Mode:Scheme -*-
;;; ***************************************************************************
;;; Common Lisp implementation of "Quickprop", a variation on back-propagation.

;;; For a description of the Quickprop algorithm, see "Faster-Learning
;;; Variations on Back-Propagation: An Empirical Study" by Scott E. Fahlman
;;; in Proceedings of the 1988 Connectionist Models Summer School,
;;; Morgan-Kaufmann, 1988.

;;; This code was written and placed in the public domain by Scott E.
;;; Fahlman.  I would like to be hear about anyone using this code and what
;;; kind of results are achieved.  I can be contacted on Arpanet as
;;; "fahlman@cs.cmu.edu" or by physical mail:

;;; Scott E. Fahlman,
;;; School of Computer Science
;;; Carnegie-Mellon University
;;; Pittsburgh, PA 15213
;;; ***************************************************************************
(declare (usual-integrations))
;;; This proclamation buys a certain amount of overall speed at the expense
;;; of runtime checking.  Comment it out when debugging new, bug-infested code.

;;; Portability note: This file is mostly portable Common Lisp.  A few CMU
;;; extensions are used:
;;; SYSTEM:SERVER with no argument reads one pending external event (from
;;; X, for example) and passes it off to a handler function.  If no event
;;; is pending, it proceeds immediately.  With an argument of N, it waits N
;;; seconds for an event to arrive, then proceeds.  SYSTEM:SERVE-ALL is
;;; similar, but does not return until all pending events have been served.
;;; These calls can be removed if your Lisp handles external events in some
;;; other way, or not at all.

(define beep (lambda () #t))
(define cmu-server (lambda () #t))
(define cmu-serve-all (lambda () #t))


;;; The EXTENSIONS:*IGNORE-FLOATING-POINT-UNDERFLOW* switch, if non-null,
;;; says that floating point underflows should quietly return zero rather
;;; than signalling an error.  If your Lisp does not have such a switch,
;;; you will either have to define an error handler for floating underflows
;;; or check for tiny values at various critical points of the code.

;;;; Assorted Parameters.

;;; Thse parameters and switches control the learning algorithm.

(define *weight-range* 2.0)
;  "Initial weights in the network get random values between plus and
;  minus *weight-range*.")

(define *sigmoid-prime-offset* 0.1)
;  "Add this to the sigmoid prime value to eliminate the flat spots where
;  derivative goes to zero.")

(define *epsilon* 1.0)
;  "Controls the amount of linear gradient descent to use.")

(define *mu* 1.75)
;  "Do not let quadratic method take a step greater than this value times
;  the previous step.  If this is too large, the learning becomes chaotic.")

(define *decay* -0.0001)
;  "This factor times the current weight is added to the slope at the
;  start of each epoch.  Keeps weights from growing too big.")

(define *hyper-err* #t)
;  "If non-nil, use hyperbolic arctan error function.")

(define *split-epsilon* #t)
;  "If non-nil, divide epsilon by unit fan-in before using it.")

(define *symmetric* #f)
;  "If non-nil, use sigmoid activation function ranging from -0.5 to +0.5.
;  If nil, range is 0.0 to 1.0.")

;;; These variables and switches control the simulation and display.

(define *epoch* 0)
;  "Count of the number of times the entire training set has been presented.")

(define *restart* #t)
;  "If set, restart whenever the maximum epoch limit is exceeded.
;  Else, just quit.")

(define *graphics* #f)
;  "If nil, skip all routine display updating.")

(define *single-pass* #f)
;  "When on, pause after forward/backward cycle.")

(define *single-epoch* #f)
;  "When on, pause after each training epoch.")

(define *step* #f)
;  "Turned briefly to T in order to continue after a pause.")

(define *layout* #f)
;  "The layout structure for displaying the current network.")

(define *debug-displays* #f)
;  "If set when creating displays, create the debugging displays as well.")

;;; The real values to be used for logical one and zero values on input
;;; and output.  The routines that build various networks (e.g. BUILD-ENCODER)
;;; look at these.

(define *input-zero-value* 0.0)
;  "Value representing logical zero on inputs.")

(define *input-one-value* 1.0)
;  "Value representing logical one on inputs.")

(define *output-zero-value* 0.0)
;  "Value representing logical zero on outputs.")

(define *output-one-value* 1.0)
;  "Value representing logical one on outputs.")

;;; The sets of training inputs and outputs are stored in parallel vectors.
;;; Each set is a vector of short-float values.

(define *training-inputs* '())
;  "Vector of input patterns for training the net.")

(define *training-outputs* '())
;  "Vector of output patterns for training the net.")

;;; For some benchmarks, there is a separate set of values used for testing
;;; the network's ability to generalize.  These values are not used during
;;; training.

(define *test-inputs* '())
;  "Vector of input patterns for testing the net.")

(define *test-outputs* '())
;  "Vector of output patterns for testing the net.")


;;;; Fundamental data structures.

;;; Unit outputs and weights are short flonums.

;;; Instead of representing each unit by a structure, we represent the
;;; unit by a fixnum.  This is used to index into various vectors that hold
;;; per-unit information, such as the output state of each unit.

;;; The set of connections COMING INTO each unit is represented by a vector
;;; that is stored with the unit.  Per-connection info is stored in similar
;;; vectors.  The only constraint on network topology is that a unit's
;;; index must be greater than the index of any unit from which it receives
;;; an input.  Regular layers are not required.

;;; Unit 0 is always at a maximum-on value, and has a connection to every
;;; other unit.  The weight on this connection acts as a threshold.
;;; Next come some input units, then some hidden units, and finally some
;;; output units.

;;; The following parameters must be set up by the network-building routines.

(define *nunits* 0)
;"Total number of units in the network.")

(define *ninputs* 0)
; "Number of input units.")

(define *first-hidden* 0)
; "Index of first hidden unit.")

(define *nhidden* 0 )
;"Number of hidden units.")

(define *first-output* 0)
; "Index of first output unit.")

(define *noutputs* 0)
; "Number of output units.")

(define *outputs* '())
;  "Vector holding the final output value of each unit.")

(define *error-sums* '())
;  "Vector holding the total error activation for each unit.")

(define *errors* '())
;  "Vector holding the final error value for each unit.")

(define *nconnections* '())
;  "Vector holding the number of incoming connections for each unit.")

(define *connections* '())
;  "Vector that holds a connection vector for each unit I.
;  Each entry in the connection vector holds a unit index J,
;  indicating that this connection is from J to I.")

(define *weights* '())
;  "Vector of vectors, with each entry giving the weight associated
;  with connection IJ.")

(define *delta-weights* '())
;  "Vector of vectors, with each entry giving the change between the previous
;  weight and the current one.")

(define *slopes* '())
;  "Vector of vectors, with each entry giving the accumulated slope value
;  at the current position.")

(define *prev-slopes* '())
;  "Vector of vectors, with each entry giving the slope value for the previous
;  position.")


;;;; Network-building utilities.

;  "Create the network data structures, given the number of input, hidden
;  and output units."
(define build-data-structures 
  (lambda (ninputs nhidden noutputs)
    (set! *nunits* (+ 1 ninputs nhidden noutputs))
    (set! *ninputs* ninputs)
    (set! *first-hidden* (+ 1 ninputs))
    (set! *nhidden* nhidden)
    (set! *first-output* (+ 1 ninputs nhidden))
    (set! *noutputs* noutputs)
    (set! *outputs* (make-vector *nunits*  0.0))
    (set! *error-sums* (make-vector *nunits* 0.0))
    (set! *errors* (make-vector *nunits* 0.0))
    (set! *nconnections* (make-vector *nunits* 0))
    (set! *connections* (make-vector *nunits*))
    (set! *weights* (make-vector *nunits*))
    (set! *delta-weights* (make-vector *nunits* ))
    (set! *slopes* (make-vector *nunits*))
    (set! *prev-slopes* (make-vector *nunits*))
    (vector-set! *outputs* 0 *input-one-value*)))




;  "Select a random weight, an integer uniformly distributed over the
;  interval from minus RANGE to plus RANGE, inclusive."

(define random-weight 
  (lambda (range)
    (- (random (truncate (* 2.0 range))) range)))




;  "Build connections from every unit in range 1 to every unit in the range 2.
;  Also add a connection from unit 0 to every unit in range 2.
;  For each connection, select a random initial weight between RANDOM-RANGE
;  and its negative."

(define connect-layers 
  (lambda (start1 end1 start2 end2 . random-range-list)
    (define random-range
      (if (null? random-range-list) 0 (car random-range-list)))
    (set! *epoch* 0)
    (let ((n (1+ (- end1 start1))))
      (do ((i start2 (1+ i)))
          ((>= i end2))
          (let ((c (make-vector n))
                (w (make-vector n))
                (d (make-vector n))
		(cs (make-vector n))
		(ps (make-vector n)))
	    (vector-set! *nconnections* i n)
	    (vector-set! *connections* i c)
	    (vector-set! *weights* i w)
	    (vector-set! *delta-weights* i d)
	    (vector-set! *slopes* i cs)
	    (vector-set! *prev-slopes* i ps)
	    (vector-set! c 0 0)
	    (vector-set! w 0 (random-weight random-range))
	    (vector-set! d 0 0.0)
	    (vector-set! cs 0 0.0)
	    (vector-set! ps 0 0.0)
	    (do ((j start1 (1+ j))
		 (k 1 (1+ k)))
		((>= j end1))
	      (vector-set! c k j)
	      (vector-set! w k (random-weight random-range))
	      (vector-set! d k 0.0)
	      (vector-set! cs k 0.0)
	      (vector-set! ps k 0.0)))))))



;  "For each connection, select a random initial weight between RANDOM-RANGE
;  and its negative.  Clear delta and previous delta values."

(define init-weights
  (lambda rr
    (define random-range
      (if (null? rr) *weight-range* (car rr)))

    (do ((i 0 (1+ i)))
	((>= i *nunits*))
      (let ((w (vector-ref *weights* i))
	    (d (vector-ref *delta-weights* i))
	    (cs (vector-ref *slopes* i))
	    (ps (vector-ref *prev-slopes* i)))
	(do ((j 0 (1+ j)))
	    ((>= j (vector-ref *nconnections* i)))
	  (vector-set! w j (random-weight random-range))
	  (vector-set! d j 0.0)
	  (vector-set! cs j 0.0)
	  (vector-set! ps j 0.0))))))



;  "Save the current slope values as prev-slopes, and clear all the slopes."

(define clear-slopes
  (lambda ()
    (do ((i *first-hidden* (1+ i)))
	((= i *nunits*))
      (let ((cs (vector-ref *slopes* i))
	    (ps (vector-ref *prev-slopes* i))
	    (w (vector-ref *weights* i)))
	(do ((j 0 (1+ j)))
	    ((>= j (vector-ref *nconnections* i)))
	  (vector-set! ps j (vector-ref cs j))
	  (vector-set! cs j (* *decay* (vector-ref w j))))))))



;;;; Learning machinery.

;;; Some key utilities.

;;; Sigmoid and sigmoid prime live in the tightest inner loops, so we make
;;; them macros to save a lot of function calls.

;  "The basic sigmoid computation.  Maps sum of input activation into
;  a unit output value in the range from 0.0 to 1.0."

(define-integrable sigmoid
  (lambda (activation)
    (cond ((< activation -15.0) 0.0)
	  ((> activation 15.0) 1.0)
	  (else (/ (+ 1.0 (exp (- activation))))))))

;  "Compute the derivative of the output with respect to activation at
;  the current output value.  Add a small constant to keep the derivative
;  from going to zero when error is close to 1.0."

(define-integrable sigmoid-prime
  (lambda (output)
    (+ *sigmoid-prime-offset* (* output (- 1.0 output)))))

(define *total-error* 0.0)
;  "Accumulate the total output error for one epoch.")

(define *score-threshold* .4)
;"To count as correct, a bit's output must be this close to the desired value."

(define *total-error-bits* 0)
;  "Count number of bits in epoch that are wrong by more than
;  *SCORE-THRESHOLD*")

;  "Compute the error for one output unit.
;  If *hyper-err* is on, use hyperbolic arctan error function.
;  Record the squared error."

(define-integrable  errfun
  (lambda (desired actual)
    (let* ((dif (- desired actual)))
      (set! *total-error* (+ *total-error* (* dif dif)))
      (if (not (< (abs dif) *score-threshold*))
	  (set! *total-error-bits* (1+ *total-error-bits*)))
      (cond ((not *hyper-err*)
	     (if (and (< -0.1 dif) (< dif 0.1)) 0.0 dif))
	    ((< dif -.9999999) -17.0)
	    ((> dif  .9999999)  17.0)
	    (else (log (/ (+ 1.0 dif) (- 1.0 dif))))))))
;;; The inner loops...

;  "Input is a vector of values that become the outputs of the input units.
;  Then propagate the values forward through the network."
;  Set up all the inputs.


(define forward-pass
  (lambda (input)
    (let ((symmetric-offset (if *symmetric* -0.5 0.0)))
      (do ((i 0 (1+ i)))
	  ((>= i *ninputs*))
	  (vector-set! *outputs* (1+ i) (vector-ref input i)))
      ;; For each unit J, add up the incoming activation from all units I,
      ;; Then run it through the sigmoid to produce an output.
      (do ((j *first-hidden* (1+ j)))
	  ((= j *nunits*))
	  (let ((c (vector-ref *connections* j))
		(w (vector-ref *weights* j))
		(sum 0.0))
	    (define loop
	      (lambda (i imax p-sum)
		(if (>= i imax)
		    p-sum
		  (loop (1+ i)
			imax
			(+ p-sum (* (vector-ref *outputs* (vector-ref c i))
				    (vector-ref w i)))))))
	    (set! sum (loop 0 (vector-ref *nconnections* j) 0.0))
	    (vector-set! *outputs* j
			 (+ symmetric-offset (sigmoid sum))))))))




;  "Goal is a vector of desired values for the output units.  Propagate the
;  error back through the network, accumulating weight deltas."
;  Compare outputs to goal and determine error values.

(define backward-pass
  (lambda (goal)
    (do ((i *first-output* (1+ i))
	 (n 0 (1+ n)))
	((>= i *nunits*))
      (vector-set! *error-sums* i
		   (errfun (vector-ref goal n) (vector-ref *outputs* i))))
    ;; Zero the error sums for non-output units.
    (do ((i 0 (1+ i)))
	((>= i *first-output*))
      (vector-set! *error-sums* i 0.0))
  ;; Now propagate error back through net.  When this loop reaches unit J,
  ;; all error from later units has been collected.  Do the sigmoid-prime
  ;; calcuation, and pass error back to earlier weights and units.
    (do ((j (-1+ *nunits*) (-1+ j))
	 (symmetric-offset (if *symmetric* 0.5 0.0)))
	((< j *first-hidden*))
      (let* ((c (vector-ref *connections* j))
	     (w (vector-ref *weights* j))
	     (cs (vector-ref *slopes* j))
	     (nc (vector-ref *nconnections* j))
	     (o (vector-ref *outputs* j))
	     (err-j (* (sigmoid-prime (+ symmetric-offset o))
		       (vector-ref *error-sums* j))))
	(vector-set! *errors* j err-j)
	(do ((i 0 (1+ i)))
	    ((>= i nc))
	  (let ((i-index (vector-ref c i)))
	    (vector-set! *error-sums* i-index
			 (+ (vector-ref *error-sums* i-index)
			    (* err-j (vector-ref w i))))
	    (vector-set! cs i
			 (+ (vector-ref cs i)
			    (* err-j (vector-ref *outputs* i-index))))))))))




; "Update all the weights in the network as a function of each weight's current
; slope. previous slope, and the distance of the last move."

(define update-weights
  (lambda ()
    (let ((shrink-factor (/ *mu* (+ 1.0 *mu*))))
      (do ((j *first-hidden* (1+ j)))
	  ((= j *nunits*))
	(let ((w (vector-ref *weights* j))
	      (nc (vector-ref *nconnections* j))
	      (d (vector-ref *delta-weights* j))
	      (cs (vector-ref *slopes* j))
	      (ps (vector-ref *prev-slopes* j)))
	  (do ((i 0 (1+ i)))
	      ((>= i nc))
	    (let* ((ps-i (vector-ref ps i))
		   (cs-i (vector-ref cs i))
		   (d-i (vector-ref d i))
		   (next-step 0.0))
	      (cond ((positive? d-i) ;; If last step was positive...
		     ;; Add in epsilon if current slope is positive.
		     (if (positive? cs-i)
			 (set! next-step
			       (+ next-step
				  (if *split-epsilon*
				      (/ (* *epsilon* cs-i) nc)
				      (* *epsilon* cs-i)))))
		     ;; If current slope is close to or larger than
		     ;; prev slope,take maximum size positive step.
		     (cond ((> cs-i (* shrink-factor ps-i))
			    (set! next-step (+ next-step (* *mu* d-i))))
			   ;; Else, use quadratic estimate.
			   (else (set! next-step
				       (+ next-step
					  (* (/ cs-i (- ps-i cs-i))
					     d-i))))))
		    ;; If last step was significantly negative...
		    ((negative? d-i)
		     ;; Add in epsilon if current slope is negative.
		     (if (negative? cs-i)
			 (set! next-step
			       (+ next-step
				  (if *split-epsilon*
				      (/ (* *epsilon* cs-i) nc)
				      (* *epsilon* cs-i)))))
		     ;; If current slope is close to or more neg than
		     ;; prev slope, take maximum size negative step.
		     (cond ((< cs-i (* shrink-factor ps-i))
			    (set! next-step (+ next-step (* *mu* d-i))))
			   ;; Else, use quadratic estimate.
			   (else (set! next-step
				       (+ next-step
					  (* (/ cs-i (- ps-i cs-i))
					     d-i))))))
		    (else (set! next-step
				(+ next-step
				   (if *split-epsilon*
				       (/ (* *epsilon* cs-i) nc)
				       (* *epsilon* cs-i))))))
	      (vector-set! d i next-step)
	      (vector-set! w i (+ (vector-ref w i) next-step)))))))))

;"Perform forward and back propagation once for each set of weights in the
; training vectors, collecting deltas.  Then burn in the weights."

(define train-one-epoch
  (lambda ()

    (define pause1
      (lambda ()
	(cond ((or (not *single-pass*) *step) (set! *step* '()))
	      (else (cmu-server 1) (pause1)))))

    (define pause2
      (lambda ()
	(cond ((or (not *single-epoch*) *step) (set! *step* '()))
	      (else (cmu-server 1) (pause2)))))

    (clear-slopes)
    (do ((i 0 (1+ i)))
	((>= i (vector-length *training-inputs*)))

      (forward-pass (vector-ref *training-inputs* i))
      (backward-pass (vector-ref *training-outputs* i))
      (if *single-pass* (pause1))
      (cond (*graphics* (update-pass-displays))))
    (update-weights)
    (set! *epoch* (1+ *epoch*))
    (cond (*graphics* (update-epoch-displays)))
    (if (and *single-epoch* (not *single-pass*)) (pause2))))




;  "Train the network until there are 0 bits wrong, then print a message.
;  If any given test reaches MAX epochs, restart or abort, depending on
;  *RESTART* switch.  Repeat all this for the specified number of TIMES."

(define train-test
  (lambda (times max-epochs . rpt )
    (define report (if (null? rpt) '() (car rpt)))
    (let ((total-epochs 0)
	  (total-restarts 0)
	  (esquared 0)
	  (maxepochs 0)
	  (minepochs max-epochs)
	  (newmax '()))
    (do ((i 0 (1+ i)))
	((>= i times))
      
      (set! *epoch* 0)
      (init-weights)
      (set! newmax max-epochs)
      (call-with-current-continuation
       (lambda (return)
	 (let loop ()
	   (cond ((>= *epoch* newmax)
		  (cond (*restart*
			 (set! newmax (+ newmax max-epochs))
			 (init-weights)	
			 (format "Trial " i ": Restart after " *epoch*
				 " epochs.")
			 (set! total-restarts (1+ total-restarts)))
			(else
			 (format "Trial " i ":  Abort after " *epoch*
				 " epochs." )
			 (set! total-restarts (1+ total-restarts))
			 (set! total-epochs (+ total-epochs newmax))
			 (set! esquared (+ esquared (* newmax newmax)))
			 (set! maxepochs newmax)
			 (return '())))))
	   (set! *total-error* 0.0)
	   (set! *total-error-bits* 0)
	   (train-one-epoch)
	   (cond ((and report (zero? (modulo (-1+ *epoch*) report)))
		  (format "Trained " (-1+ *epoch*) " epochs, "
			  *total-error-bits* " bits wrong, error = "
			  *total-error*)))
	   (cond ((zero? *total-error-bits*)
		  (set! *epoch* (-1+ *epoch*))
		  (set! total-epochs (+ total-epochs *epoch*))
		  (set! esquared (+ esquared (* *epoch* *epoch*)))
		  (set! maxepochs (max *epoch* maxepochs))
		  (set! minepochs (min *epoch* minepochs))
		  (format "Trial " i ":  Learned after " *epoch*
			  " epochs.  Running Avg:  "
			  (/ total-epochs (1+ i)))
	
		  (return '())))
	   (loop))))

      (format "Eps " *epsilon* (if *split-epsilon* "*" "")
	      ", Mu " *mu*
	      ", WtRng " *weight-range*
	      ", Decay " *decay*
	      ", SigOff " *sigmoid-prime-offset*
	      ", Hyper " *hyper-err*
	      ", Sym " *symmetric* )

      (cond ((> times 1)
	     (format "ReStrt " total-restarts
		     ", Max " maxepochs
		     ", Min " minepochs
		     ", Avg " (/  total-epochs times)
		     ", SD " (sqrt (/ (- (* times esquared)
					 (* total-epochs total-epochs))
				      (* times (-1+ times)))))))))))


(define format
  (lambda d-list
    (define loop
      (lambda (d-list)
	(cond ( d-list
		(display (car d-list))
		(loop (cdr d-list))))))
    (newline)
    (loop d-list)))
	


;;;; Setup modification utilities.

;;; In order to convert from the normal assymmetric activation function to
;;; a symmetric one, several values have to be altered and the network has
;;; to be rebuilt.  Use these functions so that you don't forget any of
;;; these things and get spurious results.

;  "Convert the network to use a symmetric activation function ranging
;  from -0.5 to +0.5 instead of 0.0 to 1.0."

(define make-symmetric
  (lambda ()
    (cond (*symmetric*
	   (set! *symmetric* #t)
	   (set! *input-zero-value* -0.5)
	   (set! *input-one-value* 0.5)
	   (set! *output-zero-value* -0.5)
	   (set! *output-one-value* 0.5)
	   "Remember to rebuild the current network."))
    (else (beep))))
    

;  "Convert the network to use an asymmetric activation function ranging
;  from 0.0 to 1.0 instead of -0.5 to +0.5."

(define make-asymmetric
  (lambda ()
    (cond (*symmetric* (beep))
	  (else
	   (set! *symmetric* '())
	   (set! *input-zero-value*  0.0)
	   (set! *input-one-value* 1.0)
	   (set! *output-zero-value* 0.0)
	   (set! *output-one-value* 1.0)
	   "Remember to rebuild the current network."))))

;;; Use this to complement all the input and output values for the current
;;; training and testing patterns.  Flip each value around the midpoint
;;; between logical one and logical zero.

;  "For all the training and testing patterns, exchange logical one values
;  and logical zero values.  Other values reflect around the midpoint."

(define complement-patterns
  (lambda ()
    (let ((ival (+ *input-zero-value* *input-one-value*))
	  (oval (+ *output-zero-value* *output-one-value*)))
      (do ((i 0 (1+ i)))
	  ((>= i (vector-length *training-inputs*)))
	(do ((j 0 (1+ j)))
	    ((>= j *ninputs*))
	  (vector-set! (vector-ref *training-inputs* i) j
		       (- ival (vector-ref
				(vector-ref *training-inputs* i) j)))))
      (do ((i 0 (1+ i)))
	  ((>= i (vector-length *training-outputs*)))
	(do ((j 0 (1+ j)))
	    ((>= j *noutputs*))
	  (vector-set! (vector-ref *training-outputs* i) j
		       (- oval (vector-ref
				(vector-ref *training-outputs* i) j)))))
      (do ((i 0 (1+ i)))
	  ((>= i (vector-length *test-inputs*)))
	(do ((j 0 (1+ j)))
	    ((>= j *ninputs*))
	  (vector-set! (vector-ref *test-inputs* i) j
		       (- ival (vector-ref (vector-ref *test-inputs* i) j)))))
      (do ((i 0 (1+ i)))
	  ((>= i (vector-length *test-outputs*)))
	(do ((j 0 (1+ j)))
	    ((>= j *noutputs*))
	  (vector-set! (vector-ref *test-outputs* i) j
		       (- oval (vector-ref
				(vector-ref *test-outputs* i) j))))))))

;;;; Example

;;; The code to build an X-Y-X encoder looks like this.
;;; Display code has been omitted.

;;;   "Build an ecoder with X input units, X output units, and Y units in the
;;;   layer connecting them."

(define build-encoder
  (lambda (x y)
    (build-data-structures x y x)
    (connect-layers 1 (+ x 1) (+ x 1) (+ x y 1) *weight-range*)
    (connect-layers (+ x 1) (+ x y 1) (+ x y 1) (+ x y x 1) *weight-range*)
    (set! *training-inputs* (make-vector x))
    (set! *training-outputs* (make-vector x))
    (do ((i 0 (1+ i)))
	((>= i x))
        (let ((v (make-vector x *input-zero-value*)))
	  (vector-set! v i *input-one-value*)
	  (vector-set! *training-inputs* i v))
        (let ((v (make-vector  x *output-zero-value*)))
	  (vector-set! v i *output-one-value*)
	  (vector-set! *training-outputs* i v)))))

;;; To run this, do something like (train-test 10 200).
;;; some of my stuff :Fri Oct 12 11:11:59 EDT 1990 - swk


(define (roundf val n-places)
  (let ((mfactor (expt 10 n-places)))
    (/ (round (* val mfactor)) mfactor)))



(define (test-example inputs)
  (forward-pass inputs)
  (map (lambda (n)
	 (roundf n 2))
       (vector->list
	(subvector *outputs* (+ *first-hidden* *nhidden*) *nunits*))))


(define (output-results)
  (let((input-list (vector->list *training-inputs*)))
    (for-each
     (lambda (in expected out)
       (write-line (append (vector->list in) (vector->list expected) out)))
     input-list
     (vector->list *training-outputs*)
     (map test-example input-list))))

;;; (build-encoder 8 4)
;;; (time (train-test 1 100))
