/*
gender_order - a LifeLines gender order statistics program
        by Jim Eggert (eggertj@ll.mit.edu)
        Version 1,  5 August 1993
                listsort code by John Chandler (JCHBN@CUVMB.CC.COLUMBIA.EDU)
        Version 2, 10 August 1993
                added family examples, modified output format slightly

This program computes gender order statistics for children in all the
families in a database.  Genders are considered to be ternary: male
(M), female (F), or unknown (U).  Children in a family form a pattern
of genders by birth order, e.g. MFFM for a family consisting of a boy,
two girls, then a boy.  The frequency of these patterns is calculated.
In addition to the complete gender pattern for a family, initial
gender patterns are computed.  For the above example, the initial
patterns are . (no children), M, MF, MFF, and MFFM.  The frequency of
these initial patterns can be used to answer questions such as how
many families with a boy then two girls go on to have another boy.

For example, suppose you want to know what fraction of families with a
child gender pattern P (e.g., P=MFFM) have no more children, have a
boy next (PM), have a girl next (PF), and have a child of unknown (to
the database!) gender next (PU).  You can find these fractions as
#complete(P)/#initial(P), #initial(PM)/#initial(P),
#initial(PF)/#initial(P), and #initial(PU)/#initial(P), respectively.
Note that these fractions should add up to 1.  Also note that the
pattern "." denotes no children at all.  As a initial pattern it gives
the total number of families in the database, as a complete pattern the
number of childless families in the database.

You can use either of two compare procedures to sort the results
differently.  Rename the one you want to use as compare, the other one
something else (like compare1).

*/

global(comparison)

/* This compare procedure sorts purely alphabetically */
proc compare1(astring,bstring) {
    set(comparison,strcmp(astring,bstring))
}

/* This compare procedure sorts by length
   and alphabetically within groups of equal length */
proc compare(astring,bstring) {
    set(alen,strlen(astring))
    set(blen,strlen(bstring))
    if (lt(alen,blen)) {
        set(comparison,neg(1))
    }
    elsif (gt(alen,blen)) {
        set(comparison,1)
    }
    else {
        set(comparison,strcmp(astring,bstring))
    }
}

proc listsort(alist,ilist) {
/*
   Input:  alist  - list of strings (could use numbers, instead)
   Output: ilist  - list of index pointers into "alist" in sorted order
   Needed: compare- external "function" of two arguments to set global
                    variable "comparison" to -1,0,+1 according to relative
                    order of the two arguments

   Uses an internal list as a work area.  This is significantly faster
   than a bubble sort.

*/
        list(wlist)

        set(list_size,length(alist))
        set(completion,0)
        set(index,0)
        while (lt(index,list_size)) {
                set(index,add(index,1))
                setel(ilist,index,index)
        }
        while (lt(completion,list_size)) {
                set(count,0)
                while (lt(count,list_size)) {
                        set(count,add(count,1))
                        set(index,dequeue(ilist))
                        set(test,getel(alist,index))
                        if(eq(count,1)) {
                                set(work_min,test)
                                set(work_max,test)
                        }
                        call compare(test,work_max)
                        if(ge(comparison,0)) {
                                enqueue(wlist,index)
                                set(work_max,test)
                        } else { call compare(test,work_min)
                                while(gt(comparison,0)) {
                                        enqueue(ilist,dequeue(wlist))
                                        set(work_min,getel(alist,getel(wlist,1)))
                                        call compare(test,work_min)
                                }
                                requeue(wlist,index)
                                set(work_min,test)
                        }
                }
                set(completion,length(wlist))
                while (not(empty(wlist))) { enqueue(ilist,dequeue(wlist)) }
        }
}

proc main() {
    list(patterns)
    list(initial_counts)
    list(complete_counts)
    table(indices)
    list(sorted_indices)
    list(complete_examples)

/* accumulate gender order statistics, even unknown genders */

    enqueue(initial_counts,0)
    enqueue(complete_counts,0)
    enqueue(patterns,".")
    enqueue(complete_examples,"no example")
    insert(indices,".",1)
    set(max_index,1)
    set(max_nc,0)

    set(nextfam,0)
    print("Processing families ")
    forfam(family,fnum) {
        setel(initial_counts,1,fnum)
        set(pattern,"")
        set(index,1)
        if (nc,nchildren(family)) {
            if (gt(nc,max_nc)) { set(max_nc,nc) }
            children(family,child,cnum) {
                if (not(strcmp(sex(child),"F"))) {
                    set(pattern,save(concat(pattern,"F")))
                }
                elsif (not(strcmp(sex(child),"M"))) {
                    set(pattern,save(concat(pattern,"M")))
                }
                else {
                    set(pattern,save(concat(pattern,"U")))
                }
                set(index,lookup(indices,pattern))
                if (index) {
                    setel(initial_counts,index,
                          add(getel(initial_counts,index),1))
                }
                else {
                    set(max_index,add(max_index,1))
                    set(index,max_index)
                    insert(indices,pattern,index)
                    enqueue(patterns,save(pattern))
                    enqueue(initial_counts,1)
                    enqueue(complete_counts,0)
                }
            }
        }
        else {
            if(not(strcmp(getel(complete_examples,1),"no example"))) {
                setel(complete_examples,1,save(key(family)))
            }
        }
        if (not(getel(complete_examples,index))) {
            setel(complete_examples,index,save(key(family)))
        }
        setel(complete_counts,index,add(getel(complete_counts,index),1))
        if (ge(fnum,nextfam)) {
            print(d(fnum)) print(" ")
            set(nextfam,add(nextfam,100))
        }
    }

    print("\nSorting results...")

    call listsort(patterns,sorted_indices)

/* print out gender order statistics sorted alphabetically */

    print("done\nPrinting results...")

    set(initialcol,add(max_nc,16))
    set(completecol,add(initialcol,12))
    set(examplecol,add(completecol,12))
    "Gender pattern"  col(sub(initialcol,7)) "initial"
    col(sub(completecol,8)) "complete" col(examplecol) "example\n"

    forlist(sorted_indices,index,inum) {
        getel(patterns,index)
        set(initial,getel(initial_counts,index))
        col(sub(initialcol,strlen(d(initial)))) d(initial)
        set(complete,getel(complete_counts,index))
        col(sub(completecol,strlen(d(complete)))) d(complete)
        col(examplecol) getel(complete_examples,index) "\n"
    }
    print("done")
}
