.rm tm	\" no warnings
.nr Rb 1
.ds m1 Jan.
.ds m2 Feb.
.ds m3 Mar.
.ds m4 Apr.
.ds m5 May
.ds m6 June
.ds m7 July
.ds m8 Aug.
.ds m9 Sep.
.ds ma Oct.
.ds mb Nov.
.ds mc Dec.
.\"		Robbert's Dynamite Troff Macros
.\"
.\" Use at your own risk.  These will normally be used next to -ms.  It
.\" redefines LP, PP, IP, SH, NH, FS, KS, KF, KE, bp (!), refer macros,
.\" and page format.  Lines are aligned on vertical spacing for a perfect
.\" page mirror.  It attempts to remove widows and to balance the pages.
.\" Figure macros are available through .F1 <figure> .F2 <trailer> .F3.
.\" There's no extra spacing between paragraphs, so you can use .LP any-
.\" time to align on vertical spacing or to reset the formatting parameters
.\" (point size, ...).  .KW keyword specifies a keyword, .KW flushes them.
.\" Use my refb if you want this to work.  If you look through this file,
.\" you may find some handy definitions that you can use as well.  By the
.\" way, if there's no .TL, .NH begins a new chapter.
.\"		Good luck, brave person.
.\"
.\"
.\"	=====> Ds is like ds, but then accepts real arguments
.\"
.de Ds	\"	--- define string ---
.ds \\$1 "\\$2
..
.de As	\"	--- append to string ---
.as \\$1 "\\$2
..
.\"	=====> page formatting macros <=====
.\"
.de Al	\"	--- alignment macro ---
.br
.if !\\n(PD \{\
.   nr VV \\n(VS-(\\n(VS/11)	\" vertical spacing minus a little bit
.   sp \\n(VVu-((\\n(.du-\\n(T[u+\\n(VVu)%\\n(VSu)
.\}
..
.de T]	\"	--- bottom of page trap macro ---
.ev 1				\" switch environment to save line buffer
.ie \\n(C%%2 \{\
.   nr C% +1			\" increase column counter
.   po \\n(POu+\\n(LLu+1c	\" new page offset
.   sp |\\n(Tpu			\" to top of right column
.   ev
.\}
.el \{\
.   ch T]			\" remove trap immediately
.   if \\n(C% .nr C% +1		\" if counting columns, count columns
.   po \\n(POu			\" set page offset
.   ie e .nr Bl \\n(nl		\" save position of left page
.   el .if \\n(Rb&\\n(Tc&((\\n(nl-\\n(Bl>0.5v):(\\n(Bl-\\n(nl>0.5v)) \
.      tm WN:balancing problem (\\n(nl != \\n(Bl)
.   if \\n(Fd .\{
.      sp |\\n(Plu-\\n(Fdu	\" to bottom of page
.      Fd			\" output footnotes
.      rm Fd			\" remove footnotes
.      nr Fd 0			\" clear footnote size
.   \}
.   nr Tl 0
.   if e .if \\n(nl+1v<=\\n(Pl .nr Tl 1	\" left page was shortened
.   if !'\\*(Pf'no' \{\
.      ie \\n(Tc \{\
.         sp |2.4c		\" some room at the top of the page
.         ie \\n(Pp  .tl ''\\s-1- \\n% -\\s+1''		\" paper header
.         el .ie o   .tl '\\*(S2'\\*(T2'\\f3\\n%\\fP'	\" right page header
.         el	     .tl '\\f3\\n%\\fP'\\*(T1'\\*(S1'	\" left page header
.      \}
.      el \{\
.         sp |\\n(.pu-2c	\" bottom of page
.         if !\\n(Pp .tl ''\\s-1- \\n% -\\s+1''		\" paper header
.      \}
.   \}
.   nr Tc 1			\" page number at top of page
.   ev				\" restore environment
'   b[				\" skip to next page, springing T[
.\}
..
.de E]	\"	--- end of input ---
.P]				\" end of last paragraph
.nr Kf 1			\" flush floating keeps
.if \\n(Kr \c
..
.de Bt	\"	--- change bottom of page trap ---
.nr Bt \\$1			\" calculate new page trap
.ie \\n(Bt<=\\n(nl .T]		\" if before current pos, spring now
.el .ch T] \\n(Btu		\" set new page trap
..
.nr T| 0			\" busy flag
.de T[	\"	--- top of page macro ---
.if \\n(Rb .tm PG:\\n%
.nr Bt \\n(Pl-1v+1		\" bottom of page trap position
.wh \\n(Btu T]			\" set bottom of page trap
.po \\n(POu			\" page offset
.nr Fc 0 1			\" reset footnote count
.if \\n(Fe .Fa			\" append leftover footnote
.ev 1				\" switch environment to save line buffer
.nr T[ 2.4c+1v+0.7c		\" size of page header
.sp |\\n(T[u			\" end of header
.if \\n(Kr .Kr			\" release some floating keeps
.Al				\" align in case of figures
.ev				\" restore environment
.nr Tp \\n(.d			\" page start
..
.de 2C	\"	--- 2 column output ---
.P]
.nr C% 1 1			\" start column counter
.ll (\\n(LTu-1c)/2u		\" calculate line length
.nr LL \\n(.l			\" -ms compatibility
.Al				\" align
.nr Tp \\n(.d			\" new top of page
.P[
..
.de 1C	\"	--- back to 1 column output ---
.P]
.ll \\n(LTu			\" restore line length
.nr LL \\n(.l			\" -ms compatibility
.po \\n(POu			\" restore margin
.nr C% 0			\" stop column count
.P[
..
.\"
.\"	=====> paragraph macros <=====
.\"
.de P[	\"	--- begin paragraph ---
.if !\\n(Ks .di Pd		\" divert
..
.de P]	\"	--- end paragraph ---
.ce 0				\" break, turn off centering
.in 0				\" turn off indent
.if !\\n(Ks \{\
.   nr Pm \\n(.u		\" save fill mode
.   nf				\" stop filling
.   di
.   \" diversion ended.  If paragraph doesn't fit, do something special
.   \" if left page was decreased, decrease right page too, else if
.   \" paragraph doesn't fit for but one line, decrease page length
.   if \\n(.t+1v<\\n(dn .if \\n(Tl:(\\n(.t+2v>=\\n(dn) .Bt -1v
.   Pd				\" flush paragraph
.   if \\n(Pm .fi		\" restore fill mode, but don't break
.\}
..
.\"
.\"	=====> footnote macros <=====
.\"
.rm FS FE FJ FK			\" remove -ms footnote stuff
.de FS	\"	--- start footnote ---
.ev 1				\" switch environments
.da Fe				\" divert footnote to Fe
.fi
..
.de FE	\"	--- end of footnote ---
.nf				\" break and stop filling
.da
.ev				\" restore environment
.\" If footnote doesn't fit, break here and now.  If it does, append it to
.\" the other macro and move end of page trap up.  If buffering already,
.\" continue buffering.
.nr Fe +\\n(dn			\" calculate new footnote size
.if \\n(Fe=\\n(dn .ie \\n(nl+\\n(.d+1v+\\n(Fd+\\n(Fe>=\\n(Bt .Bt \\n(nl+\\n(.d
.el .Fa				\" footnote still fits
..
.de Fa	\"	--- add footnote to buffer ---
.ev 1				\" switch environments again
.da Fd				\" add footnote to Fd
.if \\n+(Fc=1 \l'1i'		\" footnote separator on first footnote
.Fe				\" ditto
.br				\" ditto
.da
.ev				\" restore environment
.nr Fd +\\n(dn			\" calculate new footnote size
.Bt \\n(Pl-\\n(Fd-1v		\" calculate new page trap
.rm Fe				\" remove old footnote
.nr Fe 0			\" clear footnote size
..
.\"
.\"	=====> keep macros <=====
.\"
.nr Kl 0			\" free list
.nr n0 1
.nr n1 2
.nr n2 3
.nr n3 4
.nr n4 5
.nr n5 6
.nr n6 7
.nr n7 8
.nr n8 9
.nr n9 (-1)			\" end of free list
.nr Ko (-1)			\" queue in
.nr Ki (-1)			\" queue out
.de Bp	\"	--- begin a new page ---
.if !(\\n(.d=(\\n(T[) .T]	\" if not top of page, go to bottom
..
.de Kg	\"	--- output keep \\$1 with height \\$2
.nr Pm \\n(.u			\" save fill mode
.nf				\" don't fill
.\\$1				\" output keep
.if \\n(Pm .fi			\" restore fill mode
.if \\n(.t<2v .Bp		\" if little room left, begin new page
..
.de KS	\"	--- begin static keep ---
.P]				\" end paragraph
.nr Ks +1			\" mark keep
.di Ks				\" divert keep to Ks
.P[
..
.de KF	\"	--- begin floating keep --
.P]				\" end paragraph
.nr Ks +1			\" mark keep
.di Kf				\" divert keep to Kf
.P[
..
.de KE	\"	--- end keep --
.P]				\" break
.ie '\\n(.z'Ks' \{\
.   di
.   if \\n(dn>=\\n(.t .Bp	\" if it doesn't fit, begin a new page
.   Kg Ks \\n(dn		\" release static keep
.\}
.el \{\
.   di
.   ie (\\n(Ki<0)&(\\n(dn<\\n(.t) .Kg Kf \\n(dn
.   el \{\
.      if \\n(Kl<0 .Kr		\" free list exhausted, flush some entries
.      if \\n(Ki>=0 .nr n\\n(Ki (\\n(Kl)	\" if (Ki != NIL) n[Ki] = Kl
.      nr Ki (\\n(Kl)		\" Ki = Kl
.      nr Kl (\\n(n\\n(Kl)	\" Kl = n[Kl]
.      rn Kf d\\n(Ki		\" d[Ki] = Kf	diversion
.      nr h\\n(Ki (\\n(dn)	\" h[Ki] = dn	height
.      nr n\\n(Ki (-1)		\" n[Ki] = -1	(end of list)
.      if \\n(Ko<0 .nr Ko (\\n(Ki)	\" if (Ko < 0) Ko = Ki
.      nr Kr 1			\" entries to release
.   \}
.\}
.nr Ks -1
.P[				\" start a new paragraph
..
.de Kr	\"	--- release floating keep ---
.in 0				\" no indentation
.nf				\" no filling
.nr Kr 0			\" don't release while releasing
.Kg d\\n(Ko \\n(h\\n(Ko		\" output it
.fi				\" restore filling
.in				\" restore indentation
.nr Kt \\n(Ko			\" Kt = Ko
.nr Ko (\\n(n\\n(Kt)		\" Ko = n[Kt]	remove from queue
.nr n\\n(Kt (\\n(Kl)		\" n[Kt] = Kl	put on free list
.nr Kl (\\n(Kt)			\" Kl = Kt
.nr Kr (\\n(Ko>=0)		\" Kr = (Ko >= 0)
.ie !\\n(Kr .nr Ki (-1)		\" if Ko < 0 then Ki = end of list
.el .if \\n(h\\n(Ko<\\n(.t .Kr	\" release another one
.if \\n(Kf .T]			\" if flushing, begin new page
..
.de KK	\"	--- flush floating keeps ---
.nr Kf 1			\" flush floating keeps
.Bp				\" begin a new page
.nr Kf 0			\" don't flush anymore
..
.\"
.\"	=====> user macros <=====
.\"
.rn bp b[			\" rename begin page request
.de bp	\"	--- begin page for users ---
.P]				\" end paragraph
.T]				\" to bottom of page
.P[				\" begin new paragraph
..
.de B[	\"	--- begin block ---
.br
.ds Bw \\$2
.ds Bh \\$3
.di \\$1
.nf
..
.de B]	\"	--- end block ---
.fi
.di
.nr \\*(Bw \\n(dl
.nr \\*(Bh \\n(dn
..
.de B|	\"	--- position block ---
.nf
.mk B|				\" remember vertical position
.nr Bw \\$2			\" copy x argument
.nr Bh \\$3			\" copy y argument
.in +\\n(Bwu			\" go to horizontal position
.sp \\n(Bhu			\" go to vertical position
.\\$1				\" output block
.in				\" return to horizontal position
.sp |\\n(B|u			\" return to vertical position
.fi
..
.de C[	\"	--- begin centered block ---
.P]				\" end paragraph
.nr Ks +1			\" mark keep
.di Cd				\" divert to Cd
.P[
..
.de C]	\"	--- end centered block ---
.P]				\" break
.di
.if \\n(dl<\\n(.l .in (\\n(.lu-\\n(dlu)/2u	\" indent to center
.Kg Cd \\n(dl			\" get diverted text
.in 0				\" no indentation
.nr Ks -1			\" end of keep
.P[				\" begin normal paragraph
..
.de Q[	\"	--- begin quote ---
.C[				\" begin centered block
.nr Ql \\n(.l			\" save line length
.ll \\n(.lu*3u/4u		\" set line length to 3/4 of current ll
..
.de Q]	\"	--- end quote ---
.ll \\n(Qlu			\" restore line length
.C]				\" end centered block
..
.
.de SZ	\"	--- size change ---
.br				\" first break
.ps \\$1			\" change point size
.vs \\$1			\" change vertical spacing
..
.de JR	\"	--- reset indentation ---
.nr Jn 0			\" current indent
.nr Ji 0			\" index of indented paragraphs
.nr J0 5n			\" reset indent sizes
.nr J1 5n
.nr J2 5n
.nr J3 5n
.nr J4 5n
.nr J5 5n
..
.de RT	\"	--- reset fonts and such ---
.ps \\n(PS			\" point size
.vs \\n(VS			\" vertical spacing
.ll \\n(LLu			\" line length
.nr Pl 27c+0.5v			\" length of page
.ll \\n(LLu			\" line length
.ev 1				\" parameters in environment 1 (title)
.ps 12				\" point size
.ll \\n(LLu			\" line length
.lt \\n(LTu			\" title length in environment 1
.ev
.ft 1				\" reset font
..
.de RS	\"	--- increase indent ---
.nr Jn +\\n(J\\n(Ji
.nr Ji +1
..
.de RE	\"	--- decrease indent ---
.nr Ji -1
.nr Jn -\\n(J\\n(Ji
..
.de JP	\"	--- begin unlabeled, indented paragraph ---
.P]				\" end paragraph
.if \\n(.t<1v .Bp		\" if not enough room, begin page
.if !(\\n(.d=\\n(Tp) .sp 0.3v 	\" if not top of page, skip some space
.nr Jj \\n(J\\n(Ji		\" increase in indent
.fi				\" start filling
.in \\n(Jnu+\\n(Jju		\" set new indent
.ta \\n(Jju			\" set tab
.ti -\\n(Jju			\" set temporary indent
.P[
..
.de IP	\"	--- begin labeled, indented paragraph ---
.if \\n(.$>1 .nr J\\n(Ji \\$2n	\" set indent if specified in ens
.JP				\" do indented paragraph
.RT				\" restore -ms variables
\&\\$1	\c
..
.de QP	\"	--- begin quotation ---
.if \\n(.$>1 .nr J\\n(Ji \\$2n	\" set indent if specified in ens
.ll -\\n(J\\n(Jiu		\" decrease line length
.JP				\" do indented paragraph
.RT				\" restore -ms variables
\&\\$1	\c
..
.de LP	\"	--- begin paragraph ---
.P]				\" end last paragraph
.Al				\" align
.sp \\n(PDu			\" interparagraph spacing
.JR				\" reset indentation
.RT				\" restore -ms variables
.fi				\" start filling
.P[				\" begin next
..
.de PP	\"	--- begin paragraph with temporary indent ---
.P]				\" end last paragraph
.Al				\" align
.sp \\n(PDu			\" interparagraph spacing
.JR				\" reset indentation
.RT				\" restore -ms variables
.fi				\" start filling
.ti +5n				\" temporary indent
.P[				\" begin next
..
.de CH	\"	--- chapter heading ---
.P]				\" break, start chapter
.KK				\" flush floating keeps
.Bp				\" begin page
.nr Tc 0			\" page number at bottom of page
.nr Fi 1 1			\" current figure
.RT				\" restore -ms variables
.ps 18				\" set point size
.vs 24				\" set vertical spacing
.ce 1000			\" center all lines
.nr Hi 0			\" header index 0
.rm HS				\" remove header string
.rm Rc				\" do not count pages
.fi				\" filling
.P[				\" start a new paragraph
..
.de SH	\"	--- section heading ---
.P]				\" end last paragraph
.Al				\" align
.if \\n(.t<5v .Bp		\" if not enough room, begin new page
.if !(\\n(.d=\\n(Tp) .sp 	\" if not top of page, skip some space
.RT				\" restore -ms variables
.ft 3				\" bold font
.nr Hi 0			\" header index 0
.rm HS				\" remove header string
.fi				\" start filling
.P[				\" start a new paragraph
..
.de NH	\"	--- numbered section header ---
.ie \\n(.$=0 .nr Ha 1		\" if no argument, Ha = 1
.el .nr Ha \\$1			\" Ha is argument
.if \\n(Ha<1 .nr H1 0		\" reset subsection numbers
.if \\n(Ha<2 .nr H2 0
.if \\n(Ha<3 .nr H3 0
.if \\n(Ha<4 .nr H4 0
.if \\n(Ha<5 .nr H5 0
.if \\n(Ha=0 .nr Ha 1		\" .NH 0 is like .NH 1, but then resets
.nr H\\n(Ha +1			\" H[Ha]++
.ie (\\n(Pp=0)&(\\n(Ha=1) \{\
.   CH
.   if !\\n(Pp .Ds Fn "\\n(H1\\*(Fs1"	\" reset next figure string
.   ds HS \\n(H1
\\s+6\\*(HS\\s-6
.   sp 0.5
.\}
.el \{\
.   SH
.   ds HS \\n(H1
.   if \\n(Ha>1 .as HS .\\n(H2
.   if \\n(Ha>2 .as HS .\\n(H3
.   if \\n(Ha>3 .as HS .\\n(H4
.   if \\n(Ha>4 .as HS .\\n(H5
\\*(HS.
.\}
.ds H0 \\*(HS.
.nr Hi \\n(Ha			\" header index
..
.de TL	\"	--- title of paper ---
.nr Pp 1			\" mark it's a paper
.CH
.Ds Fn "1"			\" next figure string
.ps -2
..
.de AU	\"	--- authors ---
.sp
.ft 2
.ps \\n(PS
.vs \\n(VS
..
.de AI	\"	--- author's institution
.sp
.ft 1
.ps \\n(PS
.vs \\n(VS
..
.de AB
.AI
.if !\\n(.$ ABSTRACT
.sp
.ce 0
.Q[
..
.de AE
.Q]
.sp
..
.de PS	\"	--- start picture ---
.\" $1 is height, $2 is width in units
..
.de PE	\"	--- end of picture ---
..
.de UX	\"	--- UNIX macro ---
.ie \\n(U1 \\$2\s-1UNIX\s0\\$1
.el \{\
\\$2\s-1UNIX\\s0\\$1\(dg
.   FS
\(dg UNIX is a Registered Trademark of AT&T in the USA and other countries.
.   FE
.nr U1 1
.\}
..
.de IX	\"	--- add to index, update page headers ---
.LP				\" end header, define page headers
.if \\n(Hi=0 \{\
.   ds T1 \\$1
.   ds T2 \\$1
.   rm S1 S2			\" no chapter or section number
.\}
.if \\n(Hi=1 \{\
.   ds T1 \\$1
.   ds S1 \s-2CHAP.\& \\*(HS\s+2
.   ds T2 \\$1
.   ds S2 \\*(S1
.\}
.if \\n(Hi=2 \{\
.   ds T2 \\$1
.   ds S2 \s-2SEC.\& \\*(HS\s+2
.\}
.da IO				\" divert to index
\\!.In \\n(Hi "\\*(HS" "\\$1" \\n%
.da
..
.de In	\"	--- output index ---
.P]				\" end of paragraph
.if !(\\n(.d=\\n(Tp) .ie \\$1<2 .sp 1.7
.el .if \\$1=2 .sp 0.3
.in 0
.ad l				\" adjust only left side
.ll -5n				\" decrease line length
.nr J0 0
.P[
.ie \\$1 \{\
.   nr In \\$1-1
.   nr J\\$1 \\n(J\\n(In+\\w'\\$2'+3.5n
.   in \\n(J\\$1u		\" set indent
.   ta \\w'\\$2'u+3.5n
.   ti -\\w'\\$2'u+3.5n
.   ie \\$1<2 \\s+3\\f3\\$2	\\$3\\f1\\s-3\&\c
.   el \\$2	\\$3\&\c
.\}
.el \\s+3\\f3\\$3\\f1\\s-3\&\c
.ll +5n				\" reset line length
.nr In \\n(.l-\w'\\$4'
\\\\h'|\\n(Inu'\\$4
.in 0				\" break, reset indent
.ad b				\" adjust both sides, end of diversion
..
.de IH	\"	--- index header ---
.ie \\$1 .NH \\$1		\" start a new header
.el .CH				\" start a new, unindexed, chapter
\\$2
.IX "\\$2"			\" add header to index
..
.ds Fs .
.de F1	\"	--- begin figure ---
.ds Fp \\*(Fn
.ie \\n(Pp .ds Fn \\n+(Fi
.el .ds Fn \\n(H1\\*(Fs\\n+(Fi
.KF				\" floating keep
.sp 0.5c
.C[				\" begin centered block
..
.de F2	\"	--- end of figure, begin label ---
.C]				\" end centered block
.sp 0.5c
.Q[
.fi
.ps -2
.vs -2
\\fBFig.\ \\*(Fp.\\fP
.ft 1
..
.de F3	\"	--- end of figure label ---
.br
.ft
.vs
.ps
.Q]
.sp 0.8				\" leave some room under the figure
.KE				\" end floating keep
..
.de KW	\"	--- keyword ---
.ie \\n(.$ \{\
.   ie '\\n(.z'' .tm KW:\\$1
.   el \\!.KW "\\$1"
.\}
.el \{\
.   P]
.   tm KW
.   P[
.\}
..
.de Kx	\"	--- start list of keywords ---
.P]
.if !(\\n(.d=\\n(Tp) .sp	\" if not top of page, skip some space
.P[
..
.de Kw	\"	--- output keyword ---
.LP
.in 1c
.ti -1c
\&\\$1 \\$2
..
.nr Di 1 1			\" current definition
.Ds Dn "\\*(Dx\\n(Di"
.de D[	\"	--- begin definition ---
.sp 0.5c
.Ds Dp "\\*(Dx\\n(Di"
.Ds Dn "\\*(Dx\\n+(Di"
\\fBDefinition\ \\*(Dp.\ \ \\$1\\fP\ \ \c
..
.de D]	\"	--- end of definition ---
.sp 0.3c
..
.\"
.\"	=====> refer macros <=====
.\"
.rm <. <,
.Ds >. "."		\" reference ends with period
.Ds >, ","		\" reference ends with comma
.Ds [. " \\f1["		\" start of reference
.Ds .] "]\\fP"		\" end of reference
.de ]<	\"	--- references ---
.if \\n(Rb \{\
.   ie \\n(.$ \{\
.      ie '\\n(.z'' .tm ]<:\\$1
.      el \\!.]< "\\$1"
.   \}
.   el \{\
.      P]
.      tm ]<
.      P[
.   \}
.\}
..
.de ]>
..
.de ]-	\"	--- remove garbage before next definition ---
.rm [A [B [C [D [E [G [H [I [J [M [N [O [P [Q [R [S [T [V ]. ],
..
.de RR	\"	--- add comma + argument to reference ---
\\*(],\\$1\c
.ds ]. .
.ds ], , \&
..
.de Rc	\"	--- sited on pages ($1: all; $2: first; ...) ---
.ie \\n(.$>2 Cited on pages \\$1.
.el Cited on page \\$1.
..
.de ][	\"	--- new reference ---
.KS				\" keep together
.JP [\\*([F]			\" start indented paragraph
.if !\\*([H .RR "\\*([H"
.if !\\*([A .RR "\\*([A"
.if !\\*([Q .RR "\\*([Q"
.if !\\*([T \{\
\\*(],\(l"\\*([T\c
.ds ]. .\(r"
.ds ], ,\(r" \&
.\}
.if !\\*([R .RR "\\*([R"
.if !\\*([M .RR "\\*([M"
.if !\\*([J .RR "\\f2\\*([J\\fP"
.if !\\*([V .RR "Vol.\& \\*([V"
.if !\\*([N .RR "No.\& \\*([N"
.if !\\*([P .ie \\n([P>0 .RR "pp.\& \\*([P"
.el .RR "p.\& \\*([P"
.if !\\*([B .RR "in \\f2\\*([B\\fP"
.if !\\*([E .RR "ed.\& \\*([E"
.if !\\*([S .RR "\\*([S"
.if !\\*([I .RR "\\*([I"
.if !\\*([C .RR "\\*([C"
.if !\\*([G .RR "Gov't.\& ordering no.\& \\*([G"
.if !\\*([D .RR "\\*([D"
\&\\*(].
.if !\\*([L .Rc "\\*([L" \\*([L
.if !\\*([O \&\\*([O
.KE
..
.\"
.\"	=====> accents <=====
.\"
.ds - \(em
.ds ' \h'\w'e'u-\w'\(aa'u/2+.06m'\z\(aa\h'-\w'e'u+\w'\(aa'u/2-.06m'
.ds ` \h'\w'e'u-\w'\(ga'u/2+.06m'\z\(ga\h'-\w'e'u+\w'\(ga'u/2-.06m'
.ds : \h'\w'u'u-\w'\(ad'u/2+.06m'\z\(ad\h'-\w'u'u+\w'\(ad'u/2-.06m'
.ds ^ \h'\w'a'u-\w'^'u/2+.06m'\z^\h'-\w'a'u+\w'^'u/2-.06m'
.ds ~ \h'\w'a'u-\w'~'u/2+.06m'\z~\h'-\w'a'u+\w'~'u/2-.06m'
.ds C \h'\w'e'u-\w'\(ah'u/2+.06m'\z\(ah\h'-\w'e'u+\w'\(ah'u/2-.06m'
.ds v \h'\w'e'u-\w'\(ah'u/2+.06m'\z\(ah\h'-\w'e'u+\w'\(ah'u/2-.06m'
.ds , \h'\w'c'u-\w'\(ac'u/2'\z\(ac\h'-\w'c'u+\w'\(ac'u/2'
.ds -- \*-
.ds q[ \(l"
.ds ]q \(r"
.\"
.\"	=====> user settable definitions <=====
.\"
.cs 5 20u			\" font 5, constant width
.nr PS 12			\" point size
.nr VS 14			\" vertical spacing
.nr LL 6.5i			\" line length
.nr FL 15c			\" footnote length (no effect currently)
.nr LT 15c			\" title length
.nr PO \n(.o			\" page offset
.nr PD 0			\" interparagraph spacing
.\"
.\"	=====> -ms init <=====
.\"
.nr FM 1			\" ms hack: remove page traps
.ch FO				\" remove bottom of page trap
.ch FX				\" remove footnote trap
.rm PT BT			\" remove other traps
.nr YE 1			\" causes break in .EQ
.\"
.\"	=====> initialization <=====
.\"
.RT				\" set these variables
.JR				\" reset indentation
.hw packet re-start trans-par-ent trans-par-ently trans-par-ency work-station trans-action time-stamp
.wh 0 T[			\" top of page macro
.em E]				\" end of text macro
.P[				\" begin paragraph
.TL
IMPLEMENTING DISTRIBUTED ALGORITHMS
.br
USING 
.br
REMOTE PROCEDURE CALLS
.AU
H.E. Bal *
R. van Renesse
A.S. Tanenbaum
.AI
Vrije Universiteit
Amsterdam, The Netherlands
.FS
* This research was sponsored in part by the
Netherlands Organization for Pure Scientific Research (Z.W.O.)
under project number 125-30-10
.FE
.AB
.PP
Remote Procedure Call (RPC) is a simple yet powerful primitive
for communication and synchronization between distributed processes.
A problem with RPC is the fact that it tends
to decrease the amount of parallelism in the application,
due to its synchronous nature.
This paper shows how light-weight processes can be used to
circumvent this problem.
The combination of blocking RPC calls and
light-weight processes provides both simple semantics
and efficient exploitation of parallelism.
.PP
The communication primitive of the Amoeba Distributed Operating System
is based on this combination.
We will describe how two important classes
of algorithms, branch
and bound and alpha-beta search, can be run in a parallel
way using this primitive.
The results of some experiments comparing these algorithms
on a single processor and on Amoeba are also discussed.
.AE
.NH 1
INTRODUCTION
.PP
As computing technology advances, it becomes increasingly difficult
and expensive to
make computers faster by just increasing the speed of the chips.
Electrical signals in copper wire travel at 2/3 the speed of
light, or about 20 cm/nanosecond, so very fast computers must be very small,
which leads to severe heat dissipation problems among other things.
The obvious solution is to harness together a large number of moderately
fast computers to achieve the same computing power as one very fast
computer, but at a fraction of the cost.
.PP
Many ways of organizing multiple processors into distributed systems have
been proposed.
At one end of the spectrum are the
.I
loosely-coupled systems
.R
consisting of a number of independent computers,
each with its own operating system and
users, exchanging files and mail over a public data network.
At the other end of the spectrum are
.I
tightly-coupled systems
.R
with multiple processors on the same bus and sharing a common memory.
In between are systems consisting of mini- or microcomputers
communicating over a fast local network and all running a single,
system-wide operating system.
We have used a system in the latter category as a testbed
for the implementation of some distributed algorithms.
.PP
In this paper we will briefly describe this system,
called Amoeba, and its communication primitive, which is
essentially a Remote Procedure Call (RPC).
The main intent of the paper is to describe how some fairly
complex distributed algorithms can be implemented on such a system
using RPC.
Measurements on the performances of these algorithms are
presented in the last section.
.NH 1
THE AMOEBA SYSTEM
.PP
The Amoeba Distributed Operating System\*([.Mullender and Tanenbaum 1985; Tanenbaum and Mullender 1981; Mullender and Tanenbaum 1984, 1986; Tanenbaum et al. 1986\*(.]
.]< 0
.]< 1
.]< 2
.]< 3
.]< 4
consists of a collection of (possibly different) processors,
each with its own local memory, which communicate over a local network.
Currently, we use mainly Motorola 68010 processors connected by a
10 Mbps token ring (Pronet),
although Amoeba also runs on the VAX, NS16032, PDP-11 and IBM-PC.
Amoeba is based on the client-server model\*(<.\*([.Tanenbaum and Van Renesse 1985\*(.]\*(>.
.]< 5
The system is composed of four basic components.
First, each user has a personal workstation, to be used for editing on a
bit-map graphics terminal and other activities that require dedicated
computing power for interactive work.
Second, there is a pool of processors that can be dynamically allocated to
users as needed. For example, a user who wants to run a 5-pass compiler
might be allocated 5 pool processors for the duration of the compilation,
to allow the passes to run largely in parallel.
Third, there are specialized servers: file servers, directory servers,
process servers, bank servers (for accounting) etc.
Fourth, there are gateways that connect the system to similar systems
elsewhere.
.PP
The Amoeba communication primitive is based on Remote Procedure Call (RPC)\*(<.\*([.Birrell and Nelson 1984; Nelson 1981\*(.]\*(>.
.]< 6
.]< 7
RPC is a mechanism for communication across a network.
It resembles a normal procedure call.
Amoeba uses a simple form of RPC: the
client sends a request to any server that is willing to offer a
certain service and some server sends a response back.
RPC has the advantage of simple semantics,
similar to the procedure calls with which every programmer is familiar.
It is a higher level construct than asynchronous message passing,
so it is potentially easier to use.
.PP
One problem with RPC is the fact that the caller (client) is
blocked during the call,
so a separate mechanism is needed to obtain parallelism.
In Amoeba, a process (or \fIcluster\fR)
consists of one or more
light-weight processes called \fItasks\fR.
Tasks share a common address space and run in parallel.
While a task is blocked in an RPC
other tasks in its cluster may run if they have work
to do.
The combination of blocking RPC calls and
light-weight processes provides both simple semantics
and efficient exploitation of parallelism.
In the following sections we will describe how they
can be used together to implement parallel algorithms for
branch-and-bound and alpha-beta search.
.NH 1
PARALLEL BRANCH AND BOUND USING RPC
.PP
The branch-and-bound method is a technique for solving a
large class of combinatorial optimization problems.
It has been applied to Integer Programming, Machine Scheduling problems,
the Traveling Salesman Problem, and many others\*(<.\*([.Lawler and Wood 1966\*(.]\*(>.
.]< 8
We have chosen to implement the Traveling Salesman Problem (TSP),
in which it is
desired to find the shortest route for a salesman to visit each
of the
.I n
cities in
his territory exactly once.
.F1
.SZ -2
.PS
boxwid = 0.25
boxht = 0.25
B1: box "L"
B2: [
	movewid = 2
	B21: box "A"; 
	move ; 
	B22: box "P"; 
	move ; 
	B23: box "W"
] with .n at B1.s-(0,.5)
B3: [
	movewid = .75
	B31: box "P"; 
	move ; 
	B32: box "W"; 
	move ; 
	B33: box "R"; 
	move; 
	B34: box "W"; 
	move; 
	B35: box "A";
	move; 
	B36: box"P"
] with .n at B2.s-(0,.5)
B4: [
	movewid=.75
	B41: box "W"; 
	move ; 
	B42: box "P"; 
	move ; 
	B43: box "W"; 
	move; 
	B44: box "R"; 
	move; 
	B45: box "P";
	move; 
	B46: box "A"
] with .n at B3.s-(0,.5)
line from B2.B21.n to B1.w
line from B2.B22.n to B1.s
line from B2.B23.n to B1.e
line from B3.B31.n to B2.B21.w
line from B3.B32.n to B2.B21.e
line from B3.B33.n to B2.B22.w
line from B3.B34.n to B2.B22.e
line from B3.B35.n to B2.B23.w
line from B3.B36.n to B2.B23.e
line from B4.B41.n to B3.B31.s
line from B4.B42.n to B3.B32.s
line from B4.B43.n to B3.B33.s
line from B4.B44.n to B3.B34.s
line from B4.B45.n to B3.B35.s
line from B4.B46.n to B3.B36.s
.PE
.SZ +2
.F2
Tree of 4-city Traveling Salesman Proplem for London, Amsterdam, Paris, and
Washington.
.F3
.PP
Abstractly, the branch-and-bound method uses a \fItree\fR to structure the
space of possible solutions.
A \fIbranching rule\fR tells how the tree is built.
For the TSP, a node of the tree represents a partial tour.
Each node has a branch for every city that is not on this partial tour.
Fig. \|\*(Fp shows a tree for a 4-city problem.
Note that a leaf represents a full tour (a solution).
For example, the leftmost branch represents the
tour London - Amsterdam - Paris - Washington.
.PP
A \fIbounding rule\fR avoids searching the whole tree.
For TSP, the bounding rule is simple.
If the length of a partial tour exceeds the length of any already known
solution, the partial tour will never lead to a solution better
than what is already known.
.PP
Parallelism in a branch-and-bound algorithm is obtained by searching
parts of the tree in parallel.
If enough processors were available, a new processor could be
allocated to every node of the tree.
Every processor would select the best partial path from its children
and report the result back to its parent.
If there are N cities, this approach would require O(N!) processors.
More realistically, the work has to be divided among the available processors.
In our model, each processor starts at the node given to it
and generates the complete partial tree reachable from that node
down to \fIdepth\fR levels.
Each time the processor generates a node at level \fIdepth\fR
it hands out this node to a subcontractor for further evaluation.
These evaluations and the generation of the partial tree occur in parallel.
Figure \|\*(Fn shows how the tree of Figure \|\*(Fp can be searched,
using a 2-level processor hierarchy (i.e., a subcontractor has
no subcontractors itself).
.F1
.SZ -2
.PS
boxwid = 0.25
boxht = 0.25
B1: box "L"
B2: [
	movewid = 2
	B21: box "A"; 
	move ; 
	B22: box "P"; 
	move ; 
	B23: box "W"
] with .n at B1.s-(0,.5)
B5: [
	movewid = 2
	B51: box "A"; 
	move ; 
	B52: box "P"; 
	move ; 
	B53: box "W"
] with .n at B2.s-(0,.5)
B3: [
	movewid = .75
	B31: box "P"; 
	move ; 
	B32: box "W"; 
	move ; 
	B33: box "R"; 
	move; 
	B34: box "W"; 
	move; 
	B35: box "A";
	move; 
	B36: box"P"
] with .n at B5.s-(0,.5)
B4: [
	movewid=.75
	B41: box "W"; 
	move ; 
	B42: box "P"; 
	move ; 
	B43: box "W"; 
	move; 
	B44: box "R"; 
	move; 
	B45: box "P";
	move; 
	B46: box "A"
] with .n at B3.s-(0,.5)
line from B2.B21.n to B1.w
line from B2.B22.n to B1.s
line from B2.B23.n to B1.e

arrow from B2.B21.s - (0,.1) to B5.B51.n + (0,.1)
arrow from B2.B22.s - (0,.1) to B5.B52.n + (0,.1)
arrow from B2.B23.s - (0,.1) to B5.B53.n + (0,.1)

line from B3.B31.n to B5.B51.w
line from B3.B32.n to B5.B51.e
line from B3.B33.n to B5.B52.w
line from B3.B34.n to B5.B52.e
line from B3.B35.n to B5.B53.w
line from B3.B36.n to B5.B53.e

line from B4.B41.n to B3.B31.s
line from B4.B42.n to B3.B32.s
line from B4.B43.n to B3.B33.s
line from B4.B44.n to B3.B34.s
line from B4.B45.n to B3.B35.s
line from B4.B46.n to B3.B36.s
.PE
.SZ +2
.F2
Example of a distributed tree search
.F3
.PP
In Figure \|\*(Fp, the processor that traverses the top part of the tree (the root processor)
searches one level.
It splits off three subtrees, each of depth two, which are traversed
in parallel by the subcontractors.
This algorithm is shown in Figure \|\*(Fn.
The algorithm sets the global variable 'minimum' to the length of
the shortest path.
This variable is initialized with a very high value.
.F1
.DS
\fBprocedure\fR traverse(node,depth,length);
\fBbegin\fR
     { `node' is a node of the search tree. It contains
       a list of the cities on the current partial tour.
       `length' is the length of the partial path so far.
       `depth' is the number of levels to be searched
       before the rest of the tree should be handed
       out to a subcontractor }
     \fBif\fR length < minimum \fBthen\fR
     \fBbegin\fR  { if length >= minimum skip this node }
          \fBif\fR `node' is a leaf \fBthen\fR
               minimum := length;
          \fBelse if\fR depth = 0 \fBthen\fR
               hand out subtree rooted at `node'
               to a subcontractor;
          \fBelse\fR
               \fBfor\fR each child c of `node' \fBdo\fR
                     traverse(c,depth\(mi1,length+dist(node,c));
     \fBend\fR
\fBend\fR
.DE
.F2
Tree traversal algorithm
.F3
.PP
A processor only blocks if it tries to hand out a subtree while there
are no free subcontractors.
Each subcontractor executes the same traversal process,
with a different initial node and
probably with a different initial depth.
In general, a subcontractor may split up the work over even more
processors,
so a subcontractor may also play the role of a root processor.
.PP
The Traveling Salesman Problem has been implemented under Amoeba
using the algorithm described above.
A processor playing the role of a
subcontractor can be viewed as an Amoeba \fIserver\fR.
The service it offers is the evaluation of a TSP subtree.
Each server repeatedly waits for some work,
performs the work, and returns the result.
A processor playing the role of a root processor is a \fIclient\fR.
.PP
.ds cp C\s-2\v'0.4m'p\v'-0.4m'\s+2
.ds mp M\s-2\v'0.4m'p\v'-0.4m'\s+2
.ds aj A\s-2\v'0.4m'p,j\v'-0.4m'\s+2
The 'handing out of work' is implemented using
Remote Procedure Calls.
As stated before, a problem with RPC is the fact that the caller
(client) is blocked during the call.
Therefore, the client cluster is split up into several tasks
(see Figure \|\*(Fn).
A cluster \*(cp running on processor p contains one \fImanager\fR task \*(mp
that performs the tree traversal.
If the cluster has N subcontractors, it also contains N \fIagent\fR
tasks A\s-2\v'0.4m'p,1\v'-0.4m'\s+2 .. A\s-2\v'0.4m'p,N\v'-0.4m'\s+2.
An agent \*(aj controls the communication with subcontractor j.
.F1
.SZ -2
.PS
B1: [
	boxht = 1
	boxwid = .75
	movewid = .1
	B11: box "manager"
	move
	B12: box "agent 1"
	move 
	B13: box "agent 2"
	move 
	B14: box invis "..." wid 2*boxwid
	move
	B15: box "agent n"
]
box ht B1.ht+.2 wid B1.wid+0.2 at B1
boxht = 1
boxwid = .75
movewid = .1
B2: box "server 1" with .n at B1.B12.s - (0,.4)
move 
B3: box "server 2" with .n at B1.B13.s - (0,.4)
move
B4: box invis "..." wid 2*boxwid with .n at B1.B14.s - (0,.4)
move
B5: box "server n" with .n at B1.B15.s - (0,.4)
arrow <-> from B1.B12.s - (0,.01) to B2.n + (0,.01)
arrow <-> from B1.B13.s - (0,.01) to B3.n + (0,.01)
arrow <-> from B1.B15.s - (0,.01) to B5.n + (0,.01)
.PE
.SZ +2
.F2
Process structure of the TSP program
.F3
.PP
After the manager task \*(mp receives a subtree T to evaluate, it starts
the tree traversal of Figure \|\*(Fp.
When it finds a subtree that has to be subcontracted out,
it tries to find a free agent, say \*(aj.
The agent \*(aj sends the work to be done to the
manager M\s-2\v'0.4m'j\v'-0.4m'\s+2
of subcontractor j,
using an RPC with a partial path and the current best solution as parameters.
This manager M\s-2\v'0.4m'j\v'-0.4m'\s+2
starts executing the process we describe here on processor j.
When  M\s-2\v'0.4m'j\v'-0.4m'\s+2 finishes the evaluation of the subtree,
it returns the result to \*(aj.
This agent checks if the current best solution has to be updated,
and then becomes available again for the next request from \*(mp.
In the mean time, the manager \*(mp continues its tree traversal
and eagerly tries to find new work to distribute.
The entire client cluster only blocks if the manager tries to
deal out work while all agents (and thus all subcontractors) are engaged.
.PP
This implementation fully utilizes the parallelism present in
the algorithm.
Furthermore, the implementation is highly flexible.
It uses depth-first search, but it
can easily be adapted to other strategies, such as breadth-first
or best-first.
.NH 1
PARALLEL ALPHA-BETA SEARCH USING RPC
.PP
Alpha-beta search is an efficient method for searching game trees for
two-person, zero-sum games.
A node in such a game tree corresponds to a position in the game.
Each node has one branch for every possible move in that position.
A value associated with the node indicates how good that position is
for the player who is about to move (let's assume this player is 'white').
At even levels of the tree, this value is the \fImaximum\fR of the
values of its children; at odd levels it is the \fIminimum\fR, as
the search algorithm assumes black will choose the move that is
least profitable for white.
Most implementations negate the values of the odd level nodes,
so the values are maximized at all levels.
.PP
The alpha-beta algorithm finds the best move in the current position,
searching only part of tree.
It uses a \fIsearch window\fR (alpha,beta)
and prunes positions whose values fall outside this window.
The algorithm is shown in Figure \|\*(Fn.
.F1
.DS
\fBfunction\fR AlphaBeta(node,depth,alpha,beta): integer;
\fBbegin\fR
     \fBif\fR depth = 0 \fBthen\fR
          alpha := evaluation(node)
     \fBelse\fR
        \fBfor\fR each child c of `node' \fBdo\fR
        \fBbegin\fR
              r := \(miAlphaBeta(c,depth\(mi1,\(mibeta,\(mialpha)
              \fBif\fR r > alpha \fBthen\fR
              \fBbegin\fR
                   alpha := r;
                   \fBif\fR alpha >= beta \fBthen\fR
                        \fBexit\fR loop;   { pruning }
              \fBend\fR
        \fBend\fR
     AlphaBeta := alpha
\fBend\fR
.DE
.F2
Sequential alpha-beta algorithm
.F3
.PP
Alpha-beta search differs significantly from branch-and-bound in
the way the best solution is constructed.
A branch-and-bound program (potentially) updates
its solution every time a processor
visits a leaf node (see Figure 3).
That processor only needs to know the current best solution and
the value associated with the leaf.
An alpha-beta program, on the other hand, has to \fIcombine\fR
the values of the leaves and the interior nodes, using the
structure of the tree.
Some parallel alpha-beta programs realize this
by having a dedicated processor for
every node (up to a certain level) that collects the results of the
child processors\*(<.\*([.Finkel and Fishburn 1982\*(.]\*(>.
.]< 9
As a disadvantage of this approach, processors associated with
high level interior nodes spend most of their time waiting for
their children to finish.
.PP
Our solution avoids this problem by working the other way round.
The child processors compute the values for their parent nodes,
so there is no need for their parent processors to wait.
To do this, an \fIexplicit\fR tree structure is built,
containing the alpha and beta bounds at each node.
The search tree is no longer just a concept, but it is
actually built as a data structure.
This tree is distributed over all processors,
each processor containing that part of the tree it is working on.
.PP
The process structure of alpha-beta is somewhat simpler than
that of TSP, because the shared tree can be used for synchronization
within the client cluster.
Hence there is no need for a manager task.
The client cluster contains as many tasks as there are subcontractors
(see Figure \|\*(Fn).
.F1
.SZ -2
.PS
B1: [
	boxht = 1
	boxwid = .75
	movewid = .1
	B12: box "task 1"
	move 
	B13: box "task 2"
	move 
	B14: box invis "..." wid 2*boxwid
	move
	B15: box "task n"
	line from B13.n + (0,1) to B13.n + (-.2,.75)
	line from B13.n + (0,1) to B13.n + (.2,.75)
	line from B13.n + (-.2,.75) to B13.n + (-.3,.5)
	line from B13.n + (-.2,.75) to B13.n + (-.05,.5)
	line from B13.n + (.2,.75) to B13.n + (.05,.5)
	line from B13.n + (.2,.75) to B13.n + (.3,.5)
	"shared tree" at B14.n + (-0.2,0.5)
]
box ht B1.ht+.2 wid B1.wid+0.2 at B1
boxht = 1
boxwid = .75
movewid = .1
B2: box "server 1" with .n at B1.B12.s - (0,.4)
move 
B3: box "server 2" with .n at B1.B13.s - (0,.4)
move
B4: box invis "..." wid 2*boxwid with .n at B1.B14.s - (0,.4)
move
B5: box "server n" with .n at B1.B15.s - (0,.4)
arrow <-> from B1.B12.s - (0,.01) to B2.n + (0,.01)
arrow <-> from B1.B13.s - (0,.01) to B3.n + (0,.01)
arrow <-> from B1.B15.s - (0,.01) to B5.n + (0,.01)
.PE
.SZ +2
.F2
Process structure of the alpha-beta program
.F3
.PP
Each task essentially executes the sequential alpha-beta algorithm of
Figure 5.
To keep other tasks from evaluating the same positions,
each task leaves a trace of what is has done already
by building the tree.
Each task does a depth-first search in the tree until it
either finds an unvisited node
or it decides that the subtree rooted at the current node
should be evaluated by another processor.
In the first case it generates all children of the unvisited node
and continues with the first child node.
In the second case it sends the node to a subcontractor using RPC
and waits for the result.
.PP
After a subtree has been evaluated (whether local or remote) its
result should be used to update the alpha and beta values of
other nodes in the tree.
This is illustrated in Figure 7.
.F1
.SZ -2
.PS
boxwid = 0.25
boxht = 0.25
B1: box "1"
"38" at B1.ne + (.03,.03)
move 3
B2: box "2"
"28" at B2.ne + (.03,.03)
B3: [
	B31: box "2"
	"-23" at B31.ne + (.03,.03)
	move 1
	B32: box "5"
	"-15" at B32.ne + (.03,.03)
] with .n at B1.s - (0,.5)
B4: [
	B41: box "3"
	"25" at B41.ne + (.03,.03)
	move .4
	B42: box "4"
	"38" at B41.ne + (.03,.03)
] with .n at B3.B31.s - (0,.5)
B5: [
	B51: box "6"
	"15" at B51.ne + (.03,.03)
	move .2
	B52: box "7"
	"9" at B52.ne + (.03,.03)
	move .2
	B53: box "8"
] with .n at B3.B32.s - (0,.5)
B6: [
	B61: box "2"
	"-38" at B61.ne + (.03,.03)
	move 1
	B62: box "5"
	"-28" at B62.ne + (.03,.03)
] with .n at B2.s - (0,.5)
B7: [
	B71: box "3"
	"25" at B71.ne + (.03,.03)
	move .4
	B72: box "4"
	"38" at B72.ne + (.03,.03)
] with .n at B6.B61.s - (0,.5)
B8: [
	B81: box "6"
	"15" at B81.ne + (.03,.03)
	move .2
	B82: box "7"
	"9" at B82.ne + (.03,.03)
	move .2
	B83: box "8"
	"28" at B83.ne + (.03,.03)
] with .n at B6.B62.s - (0,.5)
line from B1.w to B3.B31.n
line from B1.e to B3.B32.n

line from B2.w to B6.B61.n
line from B2.e to B6.B62.n

line from B3.B31.w to B4.B41.n
line from B3.B31.e to B4.B42.n

line from B3.B32.w to B5.B51.n
line from B3.B32.s to B5.B52.n
line from B3.B32.e to B5.B53.n

line from B6.B61.w to B7.B71.n
line from B6.B61.e to B7.B72.n

line from B6.B62.w to B8.B81.n
line from B6.B62.s to B8.B82.n
line from B6.B62.e to B8.B83.n

"7(a)" at B5.B51.s + (0, -.5)
"7(b)" at B8.B81.s + (0, -.5)
.PE
.SZ +2
.F2
Example of alpha-beta search
.F3
In Figure 7(a), the subtrees rooted at nodes 3, 4, 6, and 7 have
been evaluated.
After the subtree rooted at node 8 has been evaluated
the value of the parent of node 8 (node 5) is updated (as 20 > 15).
This is shown in Figure 7(b).
Furthermore, the evaluation of the subtree rooted at 5 has now been
completed.
As its final value (-20) is the highest value of level 1,
the value of node 1 is updated too.
.PP
After the value of a node has been improved
this new value can be used as a tighter alpha bound for its children.
Each child can use this new alpha value as a tighter beta bound for
its own children, and so on.
So new values are propagated down the tree,
to ensure each node uses the smallest possible alpha-beta window.
In principle, new bounds can even be propagated across processor
boundaries.
However, this would also increase the communication overhead.
We have not yet experimented with this kind of propagation.
.NH 1
DISCUSSION
.PP
We have done some measurements on the TSP and the alpha-beta programs.
The hardware used was
a collection of 10 MHz 68010 CPU's connected by a 10 Mpbs token ring.
For each program, we ran both a sequential (single processor) version
and a parallel (multi-processor) version.
For simplicity, the parallel versions use only a 2-level processor hierarchy.
They use one processor for the client process and
a varying number of processors for the servers.
.PP
The depths of the subtrees are important parameters of the TSP algorithm.
If the client processor distributes work at a too high level, the effectiveness
of pruning will be severely weakened.
For example, if it traverses just one level,
then the best solution in the leftmost branch of the tree
cannot be used as a bound in its neighbor branch,
as these branches are searched simultaneously.
Increasing the depth of the root subtree will decrease this effect, at
the cost of more communication between the root processor and its
subcontractors.
To achieve high performance, a good compromise has to be found.
For an 11-city problem we found the optimal search depth of the
client to be three levels.
The results for an 11-city problem using this search depth are shown in
Fig. \|\*(Fn.
.F1
.TS
box, tab(:);
c | c | c
l | n | n.
version:time(secs):speedup:
_
sequential:637.2:
1 server:548.1:1
2 servers:309.7:1.77
3 servers:218.2:2.51
4 servers:171.7:3.19
5 servers:141.5:3.87
6 servers:124.2:4.41
.TE
.F2
Table I: results for 11-city traveling salesman problem.
.F3
The last entry in the table shows the speedup over the 1-server version.
With 7 processors (1 client and 6 servers) a 5-fold speedup over the
sequential program is achieved.
Note that with only one server, there is still some parallelism,
as the client can find the next subtree to hand out, while
the server is working on the previous subtree.
.PP
To measure the performance of the alpha-beta algorithm,
we implemented the game of \fIOthello\fR, using this algorithm.
Fig. \|\*(Fn shows the time to evaluate a position, averaged over
five different positions with a fan-out (number of moves) of
approximately fifteen.
The depth of the search tree was four plies.
As for TSP, the division of labour between the client and the servers
is important.
For the parallel versions the client searched three plies,
the servers searched one ply.
.F1
.TS
box, tab(:);
c | c | c | c | c
l | n | n | n | n.
version:time(secs):speedup:#evaluations:search overhead
_
sequential:266.9::2670:1
1 server:324.6:1:2670:1
2 servers:196.2:1.65:3925:1.47
3 servers:153.3:2.12:4732:1.77
4 servers:125.1:2.59:5676:2.13
5 servers:114.0:2.84:6424:2.40
6 servers:111.5:2.91:6719:2.51
.TE
.F2
Table II: results for Othello implementation of alpha-beta search
.F3
.PP
The results show that the speedup achieved is significantly worse
for alpha-beta search than for TSP.
The main reason is that alpha-beta search suffers more from the decrease
in pruning efficiency than TSP.
The third entry in table 2 shows the number of leaves visited by
alpha-beta (i.e., the number of static evaluations).
This number is a yardstick for the total amount of work done.
The last entry shows the search overhead over the sequential version.
.PP
Our implementations of TSP and alpha-beta search have been deliberately
kept simple initially,
as we implemented them just to gain some experience with programming
using RPC and light-weight processes.
However, our results indicate that
the primitives offered by Amoeba are sufficiently general for
more advanced implementations.
.NH 1
REFERENCES
.nr [W \w'10'
.LP
.]<
.ds [F Birrell and Nelson 1984
.]-
.ds [T Implementing Remote Procedure Calls
.ds [A \*([(B\*()]irrell, A. D.
.as [A " and \*([(N\*()]elson, B. J.
.ds [J ACM Transactions on Computer Systems
.ds [V 2
.ds [N 1
.ds [P 39-59
.nr [P 1
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m2 1984
.][ 1 journal-article
.ds [F Finkel and Fishburn 1982
.]-
.ds [T Parallelism in Alpha-Beta Search
.ds [A \*([(F\*()]inkel, R. A.
.as [A " and \*([(F\*()]ishburn, J. P.
.ds [J Artificial Intelligence
.ds [V 19
.ds [P 89-106
.nr [P 1
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D 1982
.][ 1 journal-article
.ds [F Lawler and Wood 1966
.]-
.ds [T Branch-and-bound Methods: a survey
.ds [A \*([(L\*()]awler, E. L.
.as [A " and \*([(W\*()]ood, D. E.
.ds [J Operations Research
.ds [V 14
.ds [N 4
.ds [P 699-719
.nr [P 1
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m7 1966
.][ 1 journal-article
.ds [F Mullender and Tanenbaum 1984
.]-
.ds [T Protection and Resource Control in Distributed Operating Systems
.ds [A \*([(M\*()]ullender, S. J.
.as [A " and \*([(T\*()]anenbaum, A. S.
.ds [J Computer Networks
.ds [V 8
.ds [N 5
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D 1984
.][ 1 journal-article
.ds [F Mullender and Tanenbaum 1985
.]-
.ds [T A Distributed File Service Based on Optimistic Concurrency Control
.ds [A \*([(M\*()]ullender, S. J.
.as [A " and \*([(T\*()]anenbaum, A. S.
.ds [J Proc. 10th ACM Symposium on Operating Systems Principles
.ds [C Rosario Resort, Orcas Island, Washington
.ds [P 51-62
.nr [P 1
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(mc 1985
.][ 1 journal-article
.ds [F Mullender and Tanenbaum 1986
.]-
.ds [T Design of a Capability-Based Distributed Operating System
.ds [A \*([(M\*()]ullender, S. J.
.as [A " and \*([(T\*()]anenbaum, A. S.
.ds [J Computer Journal
.ds [V 29
.ds [N 4
.ds [P 289-299
.nr [P 1
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m8 1986
.][ 1 journal-article
.ds [F Nelson 1981
.]-
.ds [T Remote Procedure Call
.ds [A \*([(N\*()]elson, B. J.
.ds [I Carnegie-Mellon University
.ds [R CMU-CS-81-119
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m5 1981
.][ 4 tech-report
.ds [F Tanenbaum and Mullender 1981
.]-
.ds [T An Overview of the Amoeba Distributed Operating System
.ds [A \*([(T\*()]anenbaum, A. S.
.as [A " and \*([(M\*()]ullender, S. J.
.ds [J Operating Syst. Rev.
.ds [V 15
.ds [N 3
.ds [P 51-64
.nr [P 1
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m7 1981
.][ 1 journal-article
.ds [F Tanenbaum et al. 1986
.]-
.ds [T Using Sparse Capabilities in a Distributed Operating System
.ds [A \*([(T\*()]anenbaum, A. S.
.as [A ", \*([(M\*()]ullender, S. J.
.as [A ", and \*([(V\*()]an Renesse, R.
.ds [J Proc. 6th Int. Conf. on Distributed Computing Systems
.ds [C Cambridge, Massachusetts
.ds [P 558-563
.nr [P 1
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m5 1986
.][ 1 journal-article
.ds [F Tanenbaum and Van Renesse 1985
.]-
.ds [T Distributed Operating Systems
.ds [A \*([(T\*()]anenbaum, A. S.
.as [A " and \*([(V\*()]an Renesse, R.
.ds [J Computing Surveys
.ds [V 17
.ds [N 4
.ds [P 419-470
.nr [P 1
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(mc 1985
.][ 1 journal-article
.nr [W \w'10'
.]>
.nr [W \w'10'
