(require 'random)
(yreq "Utilities/yenta-utils")

;;; The intent here is to have Yentas do -something- if they've just generated
;;; interests, without forcing the user to check things in order for any interesting
;;; behavior to happen.  We may want to reset this later, perhaps.
(define *interests:new-interests-are-interesting-by-default?* #t)

(define log2 (log 2))

(define (randist ave max)
  (do ((result (* ave (/ (- (log (/ (+ (random 1000000) 1) 1000000))) log2))
	       (* ave (/ (- (log (/ (+ (random 1000000) 1) 1000000))) log2))))
      ((> max result) (inexact->exact (floor result)))))

(define (chance) (/ (random 100) 100))

(define (toward to by from)
  (+ from (* by (- to from))))

(define *interests:find-interests-taskname* "Find interests")

;;; (randist ave max) returns a random number; the average value is ave, and
;;; it will not return anything higher than or equal to max.  The distribution is
;;; a reverse exponential.  Actually, the average is a bit high, but that's
;;; because the average of (random n) is a bit low.  *shrug*
;;; Twenty times the average is where the granularity of the random function
;;; kills the probability of getting a number entirely.

;;; Procedures to check on the progress of the interest-finder.

(define (interests:progress-check)	; Called only by run-checker, a debugging function.
  (display (map (lambda (i) (cons i (compare:centroid i)))
		(filter (lambda (i) (> (compare:cluster-size i) 10))
			(compare:indices))))
  (newline))

(define (interests:run-checker)		; Debuggery.
  (scheduler:add-periodic-task! "Interest check" 3 60 interests:progress-check))

(def-yenta-var *interests:clustering?* #f) ; %%% Why is this a yenta-var?

;;; This is used by ui:front-page-contents to know that we have successfully completed
;;; a first scan, whether or not we actually found any documents, so at least it knows
;;; which page to put up.  Adding a state to *ui:startup-state* isn't as clean, because
;;; various other things use that and make various assumptions about it.
(def-yenta-var *interests:at-least-one-scan-completed* #f)

;;; Set by interests:declare-clustering-status.  This has to be a yvar 'cause otherwise
;;; a shutdown after gathering no interests, followed by a startup, doesn't alert!
(def-yenta-var *interests:alert-about-no-interests?* #f)

(define (interests:user-has-interests?)	; Did we find -anything-, ever, that we deem to be an interest?
  (and *interests:information*
       (not (null? *interests:information*))))

;;; Declare whether or not we're clustering, and coincidentally whether or not to be
;;; running the animation.  The animation can't -only- look at *interests:clustering?*,
;;; because it should also run during document scanning, if that winds up taking a while.
(define (interests:declare-clustering-status flag) ; #t if we are currently clustering, else #f.
  (format-debug 0 "~%interests:declare-clustering-status entered.  flag = ~S, old *interests:clustering?* = ~S, old *ui:animation-enable* = ~S~&"
		flag *interests:clustering?* *ui:animation-enable*)
  (set! *interests:clustering?* flag)
  (set! *ui:animation-enable* flag)
  (when (and (not flag)			; If we're not clustering, ...
	     (eq? *ui:startup-state* 'scanning)) ; ... and we used to be scanning, ...
    (set! *ui:startup-state* 'idle)	; ... then we aren't any more...
    (set! *interests:at-least-one-scan-completed* #t))	; ...and we have completed at least one scan.
  (unless flag				; Don't bother checking unless we think we're done...
    (set! *interests:alert-about-no-interests?*
	  (not (interests:user-has-interests?))))) ; If the user didn't give us anything to work with, complain.

(define *interests:badness-threshold* 0.1)
(define *interests:size-threshold* 20) 

;;; Should probably be set to a function of the undone size or something.
(def-yenta-var *interests:min-size* 10)

(def-yenta-param *interests:undone-threshold* 0.25
  "Undone-document similarity threshold"
  "Similarity required to put a document in an interest from the list of documents being processed"
  vars:->[0-1]
  (settable))

(def-yenta-param *interests:spec-factor* 30
  "Single-document interest weight"
  "Weight for an interest specified with a single document."
  vars:->nonneg-number
  (settable))

(define (compare:indices)
  (define (loop count)
    (cond ((> count (compare:index-limit)) '())
	  ((compare:index-valid? count) (cons count (loop (+ 1 count))))
	  (else (loop (+ 1 count)))))
  (loop 0))

(define (interests:find-interests!) #f)	; See find-interests! below for details.

;(let ()

(defvar extras '())

;;;; Utils for database.

(define (rand-cluster)
  (do ((try (random (compare:index-limit)) (random (compare:index-limit))))
      ((compare:index-valid? try) try)))

(define (rand-document coll)
  (let* ((len (compare:collection-length coll))
	 (index (random len))
	 (doc (compare:collection-ref coll index)))
    (compare:remove-document coll index)
    doc))

;;;; Codelets...

(define (analyze stats threshold)
  (let ((best #f)
	(value threshold))
    (for-each (lambda (index)
		(when (> (array-ref stats index) value)
		  (set! value (array-ref stats index))
		  (set! best index)))
	      (compare:indices))
    best))

(define (badness index)
  (let ((bad 0)
	(total 2))
    (do ((docs (compare:documents index) (cdr docs)))
	((null? docs) (if (> total 2)
			  (/ bad total)
			  0)) ; this means the cluster is sealed already
      (let* ((stats (compare:match (car docs)))
	     (value (array-ref stats index))
	     (prob (/ (- 1 value) 4)))
	(for-each (lambda (i)
		    (when (> (array-ref stats i)
			     value)
		      (set! prob 1)))
		  (compare:indices))
	(set! bad (+ bad prob)))
      (inc! total))))

;;;; The main procedure:

(defvar num-added 0)

(define (remove-interest index)
  (for-each (lambda (doc) (compare:add-document doc removed))
	    (compare:documents index))
  (compare:delete-document index))

(define (incoherent index)
  (< (chance) (- (badness index) *interests:badness-threshold*)))

(define undone-threshold *interests:undone-threshold*)

(define (grow-interest interest doc)
  (cond ((and (or (<= (length extras) interest)
		  (null? (list-ref extras interest)))
	      (< (compare:cluster-size interest) *interests:size-threshold*))
	 (compare:increment-document interest doc)
	 (set! undone-threshold (toward *interests:undone-threshold* 0.1
					undone-threshold)))
	(t
	 (set! extras (set-list-ref extras interest 
				    (cons doc (if (<= (length extras) interest)
						  '()
						  (list-ref extras interest))))))))

(define (add-undone-document)
  (let* ((doc (rand-document *interests:undone*))
	 (stats (compare:match doc))
	 (result (analyze stats undone-threshold)))
    (cond (result
	   (grow-interest result doc))
	  (t
	   (compare:insert-document doc)
	   (inc! num-added)
	   (set! undone-threshold (toward 0 0.1 undone-threshold))))))

(define (add-removed-document)
  (let* ((doc (rand-document removed))
	 (stats (compare:match doc))
	 (result (analyze stats undone-threshold)))
    (cond (result
	   (grow-interest result doc))
	  (t
	   (inc! num-added)
	   (compare:insert-document doc)))))

(define (test-interest)
  (let ((index (rand-cluster)))
    (set! num-added (- num-added 1))
    (when (incoherent index)
      (remove-interest index))))

(define (move-documents from to)
  (do ((i (- (compare:collection-length from) 1) (- i 1)))
      ((< i 0) #t)
    (compare:add-document (compare:collection-ref from i) to)
    (compare:remove-document from i)))

(define (do-find-interests!)
  (format-debug 0 "~&do-find-interests!~&")
  (scheduler:add-once-task!
    *interests:find-interests-taskname* 1 scheduler:always
    (lambda ()
      (scheduler:when (not *interests:in-use*)
       (set! *interests:available* #f)
       (interests:declare-clustering-status #t)
       (scheduler:simple-do 
	((= (compare:collection-length *interests:undone*) 0)
	 (scheduler:do ((indices (compare:indices) (cdr indices)))
	   ((null? indices)
	    (move-documents removed *interests:undone*)
	    (format-debug 0 "~%Clustering done; disabling animation...~&")
	    (interests:declare-clustering-status #f)
	    (vars:save-encrypted)	; Checkpoint immediately, so we can't lose this work.
	    (set! *interests:available* #t)
	    (wb:start-interyenta-tasks) ; Actually start up networking, if it hasn't already been started.  Useless to do before clustering.
	    (set! *ui:interest-new* #t)	; Open the door.
	    ;; %%% more stuff...
	    )
	   (let ((ind (car indices)))
	     (cond ((or (< (compare:cluster-size ind)
			   *interests:min-size*)
			(incoherent ind))
		    (remove-interest ind))
		   (t
		    (format-debug 50 "~&ind = ~S~&" ind)
		    (when (> (length extras) ind)
			  (for-each (lambda (doc)
				      (compare:increment-document ind doc))
				    (list-ref extras ind)))
		    (when (null? (safe-list-ref *interests:information* ind))
			  (set! *interests:information*
				(set-list-ref *interests:information*
					      ind 
					      (vector
					       ""
					       0
					       *interests:new-interests-are-interesting-by-default?*
					       (ssl:sha1-fingerprint
						(compare:show-document
						 (compare:centroid ind)
						 5))))))
		    (format-debug 50 "~&*interests:information* = ~S~&" *interests:information*)
		    (when (null? (safe-list-ref *iy:cluster-cache* ind))
			  (set! *iy:cluster-cache*
				(set-list-ref *iy:cluster-cache* ind (list *local-yenta-id*)))))))))
	(let* ((undone-weight 
		(inexact->exact 
		 (floor 
		  (sqrt (compare:collection-length *interests:undone*)))))
	       (test-weight num-added)
	       (removed-weight
		(inexact->exact 
		 (floor (sqrt (compare:collection-length removed)))))
	       (choice (random (+ test-weight removed-weight undone-weight))))
	  (if (> undone-weight choice)
	      (add-undone-document)
	      (if (> (+ undone-weight test-weight) choice)
		  (test-interest)
		  (add-removed-document)))))))))

(define (find-interests!)
  (cond ((scheduler:task-exists? *interests:find-interests-taskname*)
	 #f)
	(t
	 (do-find-interests!)
	 #t)))

(set! interests:find-interests! find-interests!)
;)
