#!/usr/local/bin/clisp -C

;;; Remove duplicates in message catalogs.
;;; Bruno Haible 28.3.1997

;; This could roughly be implemented as
;;   cp INPUT temp1
;;   cp INPUT temp2
;;   msgcomm --more-than=1 -o OUTPUT temp1 temp2
;; but this has the drawbacks that
;;  - msgcomm doesn't seem to be made for this,
;;  - it removes the header,
;;  - it ``normalizes'' the messages its own way.

;; This could also be roughly implemented as
;;   xgettext -d - --omit-header -n -w 1000 INPUT > OUTPUT
;; but this has the drawbacks that
;;  - it sometimes reverses the list of lines belonging to the hunk,
;;  - it removes the header,
;;  - it ``normalizes'' the messages its own way.

(defstruct message
  lines        ; list of all lines belonging to the hunk
  msgid        ; nil or a string
  msgstr       ; nil or a string
  occurs       ; list of strings "file:nn" where the message occurs
)

(defun main (infilename outfilename)
 (declare (type string infilename outfilename))
 #+UNICODE (setq *default-file-encoding* charset:iso-8859-1)
 (with-open-file (istream infilename :direction :input)
  (with-open-file (ostream outfilename :direction :output)
   (let ((hunk-list nil) ; list of all hunks
         (hunk-table (make-hash-table :test #'equal))
           ; (gethash msgid hunk-table) is the hunk who has the given msgid
         (eof "EOF")
        )
    (flet ((read-hunk () ; reads a hunk, returns nil on eof
             (let ((line nil) (lines nil) (occurs nil))
               (loop
                 (setq line (read-line istream nil eof))
                 (when (eql line eof) (return))
                 (if (equal line "")
                   (when lines (return))
                   (progn
                     (push line lines)
                     (when (and (>= (length line) 3) (string= line "#: " :end1 3))
                       (push (subseq line 3) occurs)
                 ) ) )
               )
               (when lines
                 (setq lines (nreverse lines))
                 (setq occurs (nreverse occurs))
                 (flet ((line-group (id &aux (idlen (length id)))
                          (let ((l (member-if
                                     #'(lambda (line)
                                         (and (>= (length line) idlen)
                                              (string= line id :end1 idlen)
                                       ) )
                                     lines
                               ))  )
                            (when l
                              (setq l (cons (subseq (car l) idlen) (cdr l)))
                              (let ((i (position-if-not
                                         #'(lambda (line)
                                             (and (plusp (length line))
                                                  (eql (char line 0) #\")
                                           ) )
                                         l
                                   ))  )
                                (subseq l 0 i)
                       )) ) ) )
                   (let ((msgid (line-group "msgid "))
                         (msgstr (line-group "msgstr ")))
                     (make-message :lines lines
                                   :msgid msgid
                                   :msgstr msgstr
                                   :occurs occurs
               ) ) ) )
          )) )
      (loop
        (let ((hunk (read-hunk)))
          (unless hunk (return))
          (if (null (message-msgid hunk))
            (push hunk hunk-list)
            (let ((other-hunk (gethash (message-msgid hunk) hunk-table)))
              (if (not other-hunk)
                (progn
                  (push hunk hunk-list)
                  (setf (gethash (message-msgid hunk) hunk-table) hunk)
                )
                (progn
                  (unless (equal (message-msgstr hunk)
                                 (message-msgstr other-hunk)
                          )
                    (warn "Same message, different translations: ~A and ~A"
                          (message-occurs hunk) (message-occurs other-hunk)
                  ) )
                  (setf (message-occurs other-hunk)
                        (append (message-occurs other-hunk)
                                (message-occurs hunk)
                  )     )
      ) ) ) ) ) )
      (setq hunk-list (nreverse hunk-list))
    )
    (flet ((print-hunk (hunk)
             (let ((lines (message-lines hunk))
                   (msgid (message-msgid hunk))
                   (msgstr (message-msgstr hunk))
                   (occurs (message-occurs hunk)))
               (dolist (line lines)
                 (cond ((and (>= (length line) 3) (string= line "#: " :end1 3))
                        (when occurs
                          (format ostream "#: ~{~A~^ ~}~%" occurs)
                          (setq occurs nil)
                       ))
                       ((and (>= (length line) 1) (string= line "#" :end1 1))
                        (format ostream "~A~%" line)
                       )
                       ((and (>= (length line) 6) (string= line "msgid " :end1 6))
                        (format ostream "msgid ~{~A~%~}" msgid)
                       )
                       ((and (>= (length line) 7) (string= line "msgstr " :end1 7))
                        (format ostream "msgstr ~{~A~%~}" msgstr)
                       )
               ) )
               (format ostream "~%")
          )) )
      (mapc #'print-hunk hunk-list)
    )
))))

(main (first *args*) (second *args*))
