;;;; Finding other Yentas, on the local wire and at the central server.

(yreq "Scheduler/scheduler")
(yreq "Utilities/yenta-utils")
(yreq "Parameters/persistence")
(require 'common-list-functions)

;;; This file implements support for bootstrapping Yentas.
;;; Normal Yentas eavesdrop the local wire for broadcast pings.
;;; They respond with pongs listing, for any given Yenta, a collection
;;; of other Yentas that it's been talking to recently.
;;;
;;; The central server works differently.  It also listens for pings,
;;; although such pings must come to it via a normal connection, since
;;; broadcasts aren't routed.  (If it so happens that a Yenta boots
;;; on the same wire as the central server---something that can only
;;; happen on one subnet in the world---it will receive responses
;;; from any other Yentas on that wire, -and- the central server,
;;; without explicitly asking the central server.  This is okay.)
;;; The central server also listens for registrations, which look
;;; like a ping, but use a different opcode (register, not ping),
;;; which is its clue that it should not bother to send a response.
;;; (Yentas register every time they come up, unless turned off by
;;; the user, whereas they ping only the very -first- time they
;;; come up, when they know of no other Yentas in the world.)
;;; The central server responds to a ping with a random selection
;;; from a table it maintains of all registrations.
;;;
;;; Yentas do -not- record random other Yentas they hear pinging
;;; on their local wire.  Replying with such data to a Yenta coming
;;; up would tend to bias its initial selection of peers to only
;;; those on the local wire, whereas replying with other Yentas
;;; actually being talked to tends to get remote Yentas into the act.
;;; Obviously, if the local wire is firewalled, these two are equivalent.

;;; Things that MUST STAY IN SYNC WITH EACH OTHER:
;;; . boot:ping (transmission of ping and registrations)
;;; . boot:decode-udp (reception of ping, pong, and registrations)

;;;; Globals.

(define *boot:port* 14999)

(define *boot:pong-delay* 2)		; How many seconds, at most, we might wait before responding to a ping.  See boot:pong-delay.
(define *boot:ping-interval* 5)		; How many seconds between pings.  Should be > *boot:pong-delay*, at least!
(define *boot:ping-policy* '(b b b c c)) ; The order in which to do the pings (B = broadcast, C = central server).

(define *boot:show-status* #t)		; For debugging.

;;; Never make this variable a yenta-var.  Doing so would cause its value to be
;;; persistent, which means that changes to either the hostname (done in a newer
;;; release of Yenta) or the IP address (done via the DNS) would not be noticed.
;;; Since this is only used for a short window while coming up, it's okay that
;;; we don't force a new DNS lookup every time (as we'd do if we stored only
;;; the name, and not the IP address, in this variable).
(define *boot:server-address* "yenta-boot.media.mit.edu")

(define *boot:ping-protocol* 1)		; Protocol we're using in pings.
(define *boot:known-protocols* '(1))	; List of protocols we support hearing about in pongs.

(defvar *boot:response-list* '())	; All the IP addresses we've been able to discover, via pinging, of other Yentas.
(defvar *boot:responses-ready?* #f)	; Whether or not we think we're done pinging and collecting responses.
(define *boot:udp-listener-taskname* "UDP listener")   ; We need to refer to it in multiple places.
(define *boot:ping-periodic-taskname* "Ping periodically") ; Ditto.

(defvar *boot:registration-counter-reset* 3) ; What to set the registration counter to whenever we start to register.
(defvar *boot:registration-counter* *boot:registration-counter-reset*) ; When this hits zero, we stop.
(define *boot:registration-taskname* "Register periodically")

(define *boot:we-are-central-server* #f) ; True only on one machine in the world.  Enables different behavior.

(def-yenta-var *boot:we-know-somebody* #f) ; %%% This should probably be a procedure call that looks in the rumor & cluster caches, or something...

;;;; Central dispatch for responding to any UDP packet.

;;; Everything on this page is "server side".  The "server" is
;;; either a random Yenta listening for broadcasts from new Yentas,
;;; or the central server with its registration database.

(define *boot:udp-listener* #f)		; Set by boot:open-udp and boot:close-udp via various callers.  This is the UDP listener object.

(define (boot:open-udp)
  (set! *boot:udp-listener* (udp:create *boot:port*))
  (cond (*boot:udp-listener*
	 #t)
	(t
	 (logger:log-and-display 1 "Couldn't create a UDP listener.")
	 #f)))

(define (boot:close-udp)
  (when *boot:udp-listener*
    (udp:close *boot:udp-listener*)
    (set! *boot:udp-listener* #f)))	; Don't let us try to close it a second time; we'll err.

(define (boot:start-udp-listener)
  (when (boot:open-udp)			; If we can't create it somehow, at least don't bomb everything, I guess...
    (scheduler:add-task!
	*boot:udp-listener-taskname* 2
	(lambda () (udp:data-ready? *boot:udp-listener*))
	(lambda () (boot:receive-udp *boot:udp-listener*)))))

(define (boot:kill-udp-listener)
  (when (scheduler:task-exists? *boot:udp-listener-taskname*)
    (scheduler:remove-task! *boot:udp-listener-taskname*)) ; Must do this -before- destroying the UDP object!
  (boot:close-udp))

;;; The actual list of foreign Yenta IP addresses we return.
;;; This behaves differently if we're the central server.
(define (boot:foreign-yentas)
  (cond (*boot:we-are-central-server*
	 (boot:yt-random))
	(t
	 (identity:all-ips))))

;;; We don't want to respond instantly to any ping, because we might have a subnet full of idle Yentas
;;; on idle machines, and they'll -all- try to respond at the same time.  On an Ethernet, this would lead
;;; to a packet storm due to all the collisions.  It'll eventually dissipate due to random timing variances
;;; and exponential backoff of reasonable Ethernet implementations, but such storms can last ~ 20 seconds
;;; under bad circumstances.  (C.f., Project Athena's rwho-on-a-network-of-machines-that-thought-they-were-routers
;;; meltdown.)  So instead of dealing with that mess, just wait a random amount of time, uniformly distributed
;;; between approximately zero and *boot:pong-delay* seconds, before responding.
(define (boot:pong-delay)
  (let ((sec (random *boot:pong-delay*))
	(usec (random 1000000)))
    (sleep sec)				; We have to sleep by seconds and microseconds separately, ...
    (usleep usec)))			; ... due to the API presented by SCM.  (I was lazy.  So sue me.)

;;; This is where the decoded (e.g., by sch-read-exp, a safe version of read) data goes.
;;; It must be stored globally because it has several possible clients which are glued
;;; together only by the scheduler, and this is a lot easier than trying to make sure
;;; that they are all in a common scope.
(defvar *boot:decoded-data* nil)

;;; Decodes another Yenta's utterance.  This could be one of three things:
;;; 1 a ping, asking for other Yentas' IP addresses
;;; 2 a registration, informing the central server of a Yenta coming up
;;; 3 a pong, responding to another Yenta's ping.
;;; Case 1 is seen by all Yentas on the same wire as some Yenta which is
;;; coming up for the first time and broadcasting.  Case 2 should only be
;;; seen by the central server.  Case 3 should only be seen by a Yenta
;;; that has previously uttered a ping (barring pathological bugs like
;;; pongs to the broadcast address).  [Note that our use of "pong" here
;;; is solely because it's the dual of ping; it has nothing to do with
;;; the BSD source-routed-ping program called pong.]
;;;
;;; Once it's decoded the utterance, it serves as the central dispatch
;;; routine that calls any actions that take place when we here something
;;; via UDP.
;;;
;;; Formats:
;;;
;;; PING      (ping     1 ((yenta 0 9) (scm 5 2 3) (ssl 0 8 1) (savant 1 3 8) (<uname>)))
;;; REGISTER  (register 1 ((yenta 0 9) (scm 5 2 3) (ssl 0 8 1) (savant 1 3 8) (<uname>)))
;;; Both of these give an opcode, a version number for the protocol actually
;;; in use, and the complete Yenta version information, where (<uname>) is
;;; a singleton list (so far) containing the vector returned (uname) on the
;;; machine registering, transformed into a list (because safe-read-exp cannot
;;; read #(...) notation).  This allows me to figure out which architectures
;;; are most popular, so I know where to throw my support resources.
;;;
;;; PONG      (pong     1 "10.20.30.40" "192.200.81.45" ...)
;;; This gives an opcode, a protocol version number, and IP addresses
;;; (e.g., the ASCII representation in dotted-decimal notation).
;;; If one of the IP addresses is not a string, we'll ignore that
;;; address only, but we won't drop the entire packet.  If we ever rev
;;; the bootstrap protocol in the future, the logical thing to do
;;; would be to put something not a string in there (a list or number),
;;; and let new enough Yentas interpret that differently.
;;;
;;; In all of these formats,  the data -must- be a list, and the opcode
;;; must be recognized, or we'll drop the response as garbage.  
;;; The first number after the pong is a version number for the
;;; protocol; it's currently 1, and current Yentas will drop anything
;;; that doesn't claim to be version 1.  "Future expansion."
;;;
;;; Note that whether we will see our -own- broadcasts is highly dependent
;;; upon the network medium (thicknet, thinnet, 10|100baseT, ATM...), on
;;; whether we are on a routing switch, a hub, or something else, the type
;;; of networking card, the OS...  You cannot make any assumptions here.
;;;
;;; No useful value returned.
(define (boot:decode-udp addr str)	; Address of the guy sending the packet, and the packet as a string.
  (inc! *ctr:boot-udp-received*)
  (when *boot:show-status* (format-debug 20 "~A ~A Received ~S~&" (date-string) addr str))
  (cond ((equal? addr (local-host-ip))
	 (inc! *ctr:boot-dropped-own*))	; If this is our own damned broadcast, drop it immediately!
	(t
	 (scheduler:sequences
	   ((call-with-input-string str ; Go ahead and decode the packet.
	      (lambda (port)
		(sch-read-exp port (lambda (val) (set! *boot:decoded-data* val))))))
	   ((let ((d *boot:decoded-data*))
	      (cond ((and (list? d)	; Must be a list.
			  (> (length d) 2) ; Must have at least opcode, protocol number, and something else.
			  (number? (cadr d)))
		     (cond ((memq (cadr d) *boot:known-protocols*)
			    (boot:opcode-dispatch addr d))
			   (t
			    (inc! *ctr:boot-udp-bad-version*))))
		    (t
		     (inc! *ctr:boot-udp-other-invalid*)))))))))

(define (boot:opcode-dispatch addr d)	; Address of the guy sending the packet, and the decoded packet as a list.
  (define (remember)
    (boot:yt-add addr (caddr d)))
  (let ((opcode (car d))
	(our-addr (local-host-ip)))
    (case opcode
      ((ping)
       (when *boot:show-status* (format-debug 20 "~A Handling ping...~&" (date-string)))
       (when *boot:we-are-central-server* ; Log this Yenta, iff we're the central server
	 (remember))
       (let* ((ip-addrs (boot:foreign-yentas))
	      (ip-minus-them (remove addr ip-addrs)) ; Don't tell them about themselves.
	      (ip-all (pushnew! our-addr ip-minus-them equal?)) ; Mention ourselves, in case different from our outgoing interface (we're a router?).
	      (response (format nil "~S" `(pong ,*boot:ping-protocol* ,@ip-all))))
	 (unless *boot:we-are-central-server* ; 
	   ;; There's only one central server, so even if request was broadcast on its wire, it doesn't need to wait.
	   (boot:pong-delay))		; %%% We should really do this via scheduler:sequences & with a task, since we're hanging the UI here....
	 (when *boot:show-status* (format-debug 20 "~A Sending response ~S to ~A...~&"  (date-string) response addr))
	 (inc! *ctr:boot-pongs-sent*)
	 (udp:send *boot:udp-listener* addr *boot:port* response) ; Reply to ping with pong.
;;	 (format-debug 20 "~A Response away!~&" (date-string))
	 ))
      ((pong)
       (when *boot:show-status* (format-debug 20 "~A Handling pong...~&" (date-string)))
       (unless *boot:we-know-somebody*
	 (inc! *ctr:boot-pongs-received*)
	 (let ((cs-addr (inet:address->string (inet:string->address "yenta-boot.media.mit.edu")))) ; Central server addr (IP addr as a string).
;;	   (format-debug 20 "~&We've got ~S~&" (cddr d))
	   (for-each (lambda (elt)
		       (when (and (string? elt)	; If it's not a string, we drop it immediately.
				  (not (equal? elt cs-addr)) ; Don't let anyone tell us the central server is really another Yenta.
				  (not (equal? elt our-addr)))	; Don't let other people tell us about ourselves.  That's stupid.
			 (pushnew! elt *boot:response-list* equal?)))
		     (cddr d))
	   ;; We might as well add the address of the responder to the list, if it didn't already include itself.
	   ;; I'm a little dubious about this, since if (say) 5 guys respond w/useless lists, we'll still add those
	   ;; guys themselves, and thus fail to ask the bootserver for people not on the local wire.  It's not clear
	   ;; to me exactly what the right tradeoff is here.  (The clever tradeoff would be to make boot:responses-acceptable
	   ;; check for number of responses not on the local wire, instead of just number of total responses.  That requires
	   ;; access to the subnet mask, so we know what "same wire" really means.)
	   (unless (equal? addr cs-addr) ; Don't add the central server's address if it's the responder---it's not really another Yenta.
	     (pushnew! addr *boot:response-list* equal?))
	   (boot:responses-acceptable?)))	; Maybe set *boot:we-know-somebody* if we've got enough.
;;     (format-debug 20 "~A Pong handled!~&" (date-string))
       )
      ((register)
       (when *boot:show-status* (format-debug 20 "~A Handling register...~&" (date-string)))
       (cond (*boot:we-are-central-server*
	      (inc! *ctr:boot-registrations-received*)
	      (remember))
	     (t
	      (inc! *ctr:boot-misdirected-registrations*))))
      (else
       (inc! *ctr:boot-udp-bad-opcode*)))))

;;; This is the actual routine to be called when udp:data-ready? goes true.
(define (boot:receive-udp pingd)
  (let* ((incoming (udp:receive pingd))
	 (data (car incoming))
	 (addr (cadr incoming))
	 (port (caddr incoming)))
    (when *boot:show-status* (boot:display-udp addr port data))
    (boot:decode-udp addr data)))

(define (boot:display-udp addr port data) ; Debugging function.
  (format-debug 20 "~&~A ~A:~A: ~A~&" (date-string) addr port data))

;;;; Client side.  These find other Yentas, ping an address, and listen for responses.

;;; Figuring out the broadcast address.
;;;
;;; Note that we can't just use 255.255.255.255 or somesuch,
;;; because such address will be routed, not broadcast.  Bummer.
;;;
;;; We lack easy, machine-independent access to the netmask,
;;; so guess.  One of them will be right if we're on a network
;;; whose subnet mask is a multiple of 8 bits.  If not, we're
;;; just doomed...
(define (boot:plausible-broadcast-addresses)
  (let* ((local-addr (inet:string->address (local-ip-address)))
	 (num (inet:local-network-address local-addr)) ; This is the address minus its class (not the true netmask!), e.g., 0.2.3.4 for a class A.
	 (network (inet:network local-addr)) 
	 (masks (cond ((< num #x00000100) '(#x000000FF)) ; Class C.
		      ((< num #x00010000) '(#x000000FF #x0000FFFF)) ; Class B.
		      ((< num #x01000000) '(#x000000FF #x0000FFFF #x00FFFFFF))   ; Class A.
		      (t                  '(#x000000FF #x0000FFFF #x00FFFFFF)))) ; For now, make this a separate case, even though we treat it as A.
	 (addrs (map (lambda (mask)
		       (logior local-addr mask)) ; inet:make-address doesn't do what we want here...
		     masks)))
    (map inet:address->string addrs)))

(defvar *boot:pings-to-go* '())		    ; Set by boot:set-pings, called by boot:find-yentas.  List of ping host-tuples.

(define (boot:set-pings)
  (let ((broadcast-addrs (boot:plausible-broadcast-addresses)))
    (set! *boot:pings-to-go* '())
    (for-each (lambda (flag)
		(set! *boot:pings-to-go*
		      (append *boot:pings-to-go*
			      (list (case flag
				      ((b) broadcast-addrs)
				      ((c) (list *boot:server-address*))
				      (else (logger:log-and-display
					     1
					     "*boot:ping-policy* had illegal element ~S in ~S."
					     flag *boot:ping-policy*)
					    '()	; Don't append anything in this case.
					    ))))))
	      *boot:ping-policy*)))

;;; How we know when we've got enough data back.
;;; Also tells other callers that we're happy if so.
(define (boot:responses-acceptable?)
  (cond ((>= (length *boot:response-list*) 3)
	 ;; %%% NOTE!  At the moment, we -never- claim to know enough not to do bootstrap stuff!
	 ;; %%% I'm going to leave this this way until I'm sure that we really -are- contacting
	 ;; %%% people in the caches correctly on startup, etc.  I want to make damned sure that
	 ;; %%% we don't spuriously come up and then sit, doing nothing.  --- Foner 8 Mar 99.
;	 (set! *boot:we-know-somebody* #t) ; %%% Dump this if we wind up looking into the rumor/cluster caches.
	 #t)
	(t #f)))

;;; Yentas coming up should call this every time, unless disabled by the user.
;;; This pings the central server repeatedly, since UDP packets, especially the
;;; first one through, e.g., a learning bridge, can get lost, and since the server
;;; does not acknowledge registrations.  (It'll toss any duplicates it receives.)
(define (boot:register)
  (scheduler:add-periodic-task!
    *boot:registration-taskname* 2 *boot:ping-interval*
    (lambda ()
      (cond ((positive? *boot:registration-counter*)
	     (dec! *boot:registration-counter*)
	     (inc! *ctr:boot-registrations-sent*)
	     (boot:ping *boot:server-address* #t))
	    (t
	     (scheduler:remove-task! *boot:registration-taskname*))))))

;;; How often to reregister Yenta's, in seconds.  This keeps Yentas that have been up "forever"
;;; from apparently disappearing from the bootserver---such Yentas are perhaps the -most- valuable
;;; ones for others to know about, since they've seen lots of neighbors.
(define *boot:reregister-timeout* (* 7 24 3600)) ; One week.

(define *boot:reregister-taskname* "Reregister periodically")

;;; Actually start revisiting old Yentas.  Don't call this until everything else is initialized.
(define (boot:reregister-periodically)
  (scheduler:add-periodic-task!		; This first runs -after- the timeout expires, which is correct.
    *boot:reregister-taskname* 1 *boot:reregister-timeout*
    (lambda ()
      (set! *boot:registration-counter* *boot:registration-counter-reset*) ; Must do this or we'll assume we're already done...
      (boot:register))))

;;; The format of this packet MUST STAY IN SYNC with what's
;;; expected by boot:decode-udp.  We transmit:
;;;   'ping,
;;;   the ping protocol version we're using, and
;;;   the current Yenta version (a list of all component versions,
;;;     whose final element is a singleton list containing as its
;;;     sole element (so far...) the vector returned by (uname),
;;;     transformed into a list (because safe-read-exp cannot
;;;     read #(...) notation).
;;; Note that we do -not- transmit our YID!  The recipient
;;; doesn't need it (we can always talk to it ourselves later),
;;; and the boostrap server should -not- get it!  (It would be
;;; a potential security hole to be storing IP/YID correspondences,
;;; and an eavesdropper to this plaintext transmission could trivially
;;; build such a table even if the server did not.)
(define (boot:ping host registration?)
  (let* ((opcode (if registration? 'register 'ping)) ; So the central server knows whether to bother responding.
	 (data (format nil "~S"		; Sucky way to make a string of this...
		       (list opcode *boot:ping-protocol*
			     `(,@*yenta-component-versions*
			       ,(list (vector->list (uname))))))))
    (when *boot:show-status* (format-debug 20 "Sending request ~S to ~S...~&" data host))
    (udp:send-now host *boot:port* data)))

(define (boot:start-periodic-pings)
  (scheduler:add-periodic-task!
   *boot:ping-periodic-taskname* 2 *boot:ping-interval*
   boot:periodic-ping-task))

(define (boot:kill-periodic-pings)
  (set! *boot:responses-ready?* #t)
  (scheduler:remove-task! *boot:ping-periodic-taskname*))

;;; Sets up a periodic task to send out pings at regular intervals, and assumes
;;; that someone else has set up the UDP receiver task.  On each iteration, we
;;; decide whether enough data has been accumulated to stop.  If so, the task kills
;;; itself; if not, it tries the next address(es) to ping.  When it either decides
;;; it's done, or runs out of addresses, it sets *boot:responses-ready?* to #t to
;;; tell consumers that no more data is forthcoming.  *boot:response-list* is a
;;; list of whatever it got.
;;;
;;; *boot:pings-to-go* determines how we actually do the pings.  It's a list of tuples.
;;; When we do a ping, we take the car of this list and ping every address in the
;;; tuples simultaneously, then wait for *boot:ping-interval* seconds before trying
;;; again (having set *boot:pings-to-go* to its cdr in the meantime).  When the list
;;; runs out, we're done.
;;;
;;; Note that we don't set *boot:we-know-somebody* here, even if we finishing pinging
;;; without uncovering enough respondants, 'cause we -don't- know (enough) somebodies.
;;; This has the interesting side-effect that, if somebody -does- give us a pong
;;; (late response? bug?), we'll go ahead and listen to it---though we won't wind up
;;; calling boot:contact-pongs on it, which is unfortunate.  Of course, to be such a
;;; late responder, the responder must not have responded by the -next- iteration of
;;; the pinging task, which is several seconds from when the ping went out.
;;; 
;;; We do everything with global variables so we don't have to pass continuations
;;; around to the scheduler.
(define (boot:periodic-ping-task)
  (cond ((or (boot:responses-acceptable?) ; We're happy with the set we got.  Don't bother pinging any more.
	     (null? *boot:pings-to-go*))  ; We ran out of candidates without satisfaction.  Tough luck.
	 (boot:kill-periodic-pings)
	 (boot:contact-pongs))		; Go try to contact everybody in the set.  Later responders won't be contacted, alas...
	(t
	 (let* ((pos (length *boot:pings-to-go*))	; Figure out whether this is a broadcast or central ping ...
		(what (car (last *boot:ping-policy* pos)))) ; ... by seeing where in *boot:ping-policy* we must be.
	   (case what		; Now bump the appropriate counters.
	     ((b) (inc! *ctr:boot-broadcast-pings-sent*))
	     ((c) (inc! *ctr:boot-central-pings-sent*))
	     (else (logger:log-and-display ; Theoretically impossible even if *boot:ping-policy* mis-set, 'cause of boot:set-pings' check.
		    1 "*boot:ping-policy* had illegal element ~S in ~S." flag *boot:ping-policy*))))
	 (for-each (lambda (addr) ; Ping the next set of candidates.
		     (boot:ping addr #f))
		   (car *boot:pings-to-go*))
	 (set! *boot:pings-to-go* (cdr *boot:pings-to-go*)))))

(define (boot:maybe-find-yentas)
  (when (and (not *boot:we-know-somebody*) ; If we already know people, don't bother asking again.
	     *boot:udp-listener*)	; If we couldn't bind the listener, then we're already broken; don't bother pinging.     
    (boot:set-pings)			; Figure out the total list of addresses to ping.
    (boot:start-periodic-pings)))

;;; Actually try to contact the Yentas who've responded to our pings.
(define (boot:contact-pongs)
  (for-each iy:contact-host *boot:response-list*))

;;;; Code for the central server.  Nothing on this page is used anywhere but there.

(define *boot:response-size* 10)	; The maximum number of Yentas we'll return in a response.
					; [17 bytes (including space and quotes) per IP -> 170 bytes.  Even w/headers, << 576 octet WAN MTU.]

;;; +++ The table of Yentas we know about.  It's called "yt".

;;; Known either because they registered with us, or because they pinged.
;;; [One could argue that any sane Yenta will register shortly after pinging,
;;;  but we might as well record the information redundantly, just in case.]
;;; Note that this variable will be saved by -all- Yentas, not just the central server.
;;; However, it will be empty everywhere by there.  [Set #f, not '(), to provoke a
;;; blowout if this doesn't get properly initialized before use.]
(def-yenta-var *boot:yt* #f)		; Tuples of (ip-address-as-string timestamp version).
(def-yenta-var *boot:ytn* 0)		; Current number of entries.
(define *boot:ytmax* 500)		; Maximum size of the table.

;;; We manage a table in the vector *boot:yt*.  We always
;;; insert at the end, defined as the location pointed at by
;;; *boot:ytn*.  If the table is already at its maximum size, a
;;; random element is deleted, and the new element takes its place.
;;; We do this, rather than implementing a true ringbuffer, because
;;; we also delete entries that are too old as a separate operation,
;;; which takes care of expiring really ancient stuff without the
;;; complexity of a ringbuffer.  When an entry is deleted and the
;;; table is -not- full (e.g., because it has been expired), the
;;; last entry in the table takes its place, and ytn is decremented.
;;; We do things this way to keep the table compact---it always
;;; contains valid elements in the interval [0..ytn], and contains no
;;; valid elements after ytn.  This makes it a constant-time operation
;;; to extract r random elements from the table to answer a ping, and
;;; also makes both insertion and deletion very fast.  Since we don't
;;; really care what the true order of the table is (almost all operations
;;; are content to handle effectively random elements), we're fine.
;;; The only case where this can be slow is that we have to linearly
;;; scan the table during insertion to avoid duplicate IP addresses;
;;; speeding that up would require sorting the table, or a secondary
;;; key, or hashing, none of which seem worth it.

(define (boot:yt-make)
  (set! *boot:ytn* 0)
  (set! *boot:yt* (make-array '() *boot:ytmax*)))

(define (boot:yt-get n)
  (array-ref *boot:yt* n))

(define (boot:yt-delete n)
  (unless (zero? *boot:ytn*)
    (dec! *boot:ytn*)
    (unless (zero? *boot:ytn*)		; If the table is now empty, there is nothing to move.
      (array-set! *boot:yt* (boot:yt-get *boot:ytn*) n))))

(define (boot:yt-add ip version)
  (define (set n)
    (array-set! *boot:yt* (list ip (current-time) version) n))
  (define (add n)
    (cond ((= n *boot:ytn*)		; Didn't find the IP address already, so add entry at the end.
	   (when (= *boot:ytn* *boot:ytmax*)
	     (boot:yt-delete (random *boot:ytmax*))) ; Table is full, so delete an element at random, and put last elt where it was.
	   (set *boot:ytn*)
	   (inc! *boot:ytn*))
	  (t
	   (if (equal? ip (car (boot:yt-get n)))
	       (set n)			; Found the IP address.  Bash the old entry with the new one.
	       (add (1+ n))))))		; Haven't found it yet; keep searching.
  (add 0))				; Start searching from element 0.

;;; This could get reasonably inefficient if we have 11 entries and the
;;; caller wants 10 of them, because we use pushnew and don't stop until
;;; we've got the number requested.  It gets better as the table grows,
;;; of course.  We're saved by the relative shortness of the resulting
;;; list (otherwise, the pushnew would be pretty expensive, too).  We
;;; -could- simply return a random list w/duplicates and let the pinger
;;; sort it out, since it's using pushnew as well...
(define (boot:yt-random)
  (define (ran sofar)
    (if (= (length sofar) *boot:response-size*)
	sofar
	(ran (pushnew! (car (boot:yt-get (random *boot:ytn*))) sofar 'equal?))))
  (define (all sofar n)
    (if (= n *boot:ytn*)
	sofar
	(all (push! (car (boot:yt-get n)) sofar) (1+ n))))
  (cond ((zero? *boot:ytn*)		; We haven't heard any registrations yet.
	 '())				; Hope the caller can cope...
	(t
	 (if (< *boot:response-size* *boot:ytn*)
	     (ran '())			; Caller wants a subset of the table.
	     (all '() 0)))))		; Caller wants at least as much as we have.  Give it everything we've got.

;;; +++ Things that use the table above.

(define *boot:prune-staleness* (* 3600 24 31)) ; How many seconds old a Yenta's registration should be before purging it.  Currently a month.
(define *boot:prune-interval* (* 3600 24)) ; How often to prune old entries from the table, in seconds.  Currently daily.
(define *boot:checkpoint-interval* 3600)  ; How often to save our table.  Currently hourly.

;;; We have to delete from the top down, since the act of deleting something will decrement ytn
;;; and move the last element into the deleted slot; this means that we'll miss entries if we
;;; index upwards from 0 instead of downwards from (1- ytn).
(define (boot:delete-old-yentas older-than)
  (define (del? n)
    (when (< (cadr (boot:yt-get n)) older-than)
      (boot:yt-delete n)))
  (define (next n)
    (del? n)
    (unless (zero? n)
      (next (1- n))))
  (unless (zero? *boot:ytn*)		; If it's negative, we're doomed---we never check for that anywhere...
    (next (1- *boot:ytn*))))

(define (boot:prune-old-yentas)
  (scheduler:add-periodic-task!
    "Prune old Yentas" 1 *boot:prune-interval*
    (lambda ()
      (boot:delete-old-yentas
       (- (current-time) *boot:prune-staleness*)))))

(define (boot:checkpoint)		; Note that the table can't be in the middle of a prune, because pruning...
  (scheduler:add-periodic-task!		; ... takes place all in one scheduler task timeslice (it doesn't yield).
    "Checkpoint registrations" 1 *boot:checkpoint-interval*
    vars:save-vars))			; [This is okay---it's the central bootserver saving this, not a user's Yenta.]

;;; Only run this if this is a standalone bootstrap server.
(define (boot:central-server-initialize)
  (unless *boot:we-are-central-server*
    (error "We are not the central server!")) ; Just blow out.  No customer world will ever have this set.
  (unless (directory? (yenta-name ""))	; We assume that the wobbling script has set *yenta-name-override* appropriately!
    (mkdir (yenta-name "") #o700))
  (vars:load-vars)			; [This is okay---it's the central bootserver loading this, not a user's Yenta.]
  (manage-persistent-yenta-versions)
  (vars:save-vars)			; Do an immediate resave, so we'll know right now if the permissions become wrong or somesuch.
  (unless *boot:yt*			; If we've already loaded a table, don't zero it out!
    (boot:yt-make))
  (boot:prune-old-yentas)
  (boot:checkpoint)
  (boot:start-udp-listener)
  (scheduler:initialize!))

;;; End of file.
