.rm tm	\" no warnings
.nr Rb 1
.ds m1 Jan.
.ds m2 Feb.
.ds m3 Mar.
.ds m4 Apr.
.ds m5 May
.ds m6 June
.ds m7 July
.ds m8 Aug.
.ds m9 Sep.
.ds ma Oct.
.ds mb Nov.
.ds mc Dec.
.\"		Robbert's Dynamite Troff Macros
.\"
.\" Use at your own risk.  These will normally be used next to -ms.  It
.\" redefines LP, PP, IP, SH, NH, FS, KS, KF, KE, bp (!), refer macros,
.\" and page format.  Lines are aligned on vertical spacing for a perfect
.\" page mirror.  It attempts to remove widows and to balance the pages.
.\" Figure macros are available through .F1 <figure> .F2 <trailer> .F3.
.\" There's no extra spacing between paragraphs, so you can use .LP any-
.\" time to align on vertical spacing or to reset the formatting parameters
.\" (point size, ...).  .KW keyword specifies a keyword, .KW flushes them.
.\" Use my refb if you want this to work.  If you look through this file,
.\" you may find some handy definitions that you can use as well.  By the
.\" way, if there's no .TL, .NH begins a new chapter.
.\"		Good luck, brave person.
.\"
.\"
.\"	=====> Ds is like ds, but then accepts real arguments
.\"
.de Ds	\"	--- define string ---
.ds \\$1 "\\$2
..
.de As	\"	--- append to string ---
.as \\$1 "\\$2
..
.\"	=====> page formatting macros <=====
.\"
.de Al	\"	--- alignment macro ---
.br
.if !\\n(PD \{\
.   nr VV \\n(VS-(\\n(VS/11)	\" vertical spacing minus a little bit
.   sp \\n(VVu-((\\n(.du-\\n(T[u+\\n(VVu)%\\n(VSu)
.\}
..
.de T]	\"	--- bottom of page trap macro ---
.ev 1				\" switch environment to save line buffer
.ie \\n(C%%2 \{\
.   nr C% +1			\" increase column counter
.   po \\n(POu+\\n(LLu+1c	\" new page offset
.   sp |\\n(Tpu			\" to top of right column
.   ev
.\}
.el \{\
.   ch T]			\" remove trap immediately
.   if \\n(C% .nr C% +1		\" if counting columns, count columns
.   po \\n(POu			\" set page offset
.   ie e .nr Bl \\n(nl		\" save position of left page
.   el .if \\n(Rb&\\n(Tc&((\\n(nl-\\n(Bl>0.5v):(\\n(Bl-\\n(nl>0.5v)) \
.      tm WN:balancing problem (\\n(nl != \\n(Bl)
.   if \\n(Fd .\{
.      sp |\\n(Plu-\\n(Fdu	\" to bottom of page
.      Fd			\" output footnotes
.      rm Fd			\" remove footnotes
.      nr Fd 0			\" clear footnote size
.   \}
.   nr Tl 0
.   if e .if \\n(nl+1v<=\\n(Pl .nr Tl 1	\" left page was shortened
.   if !'\\*(Pf'no' \{\
.      ie \\n(Tc \{\
.         sp |2.4c		\" some room at the top of the page
.         ie \\n(Pp  .tl ''\\s-1- \\n% -\\s+1''		\" paper header
.         el .ie o   .tl '\\*(S2'\\*(T2'\\f3\\n%\\fP'	\" right page header
.         el	     .tl '\\f3\\n%\\fP'\\*(T1'\\*(S1'	\" left page header
.      \}
.      el \{\
.         sp |\\n(.pu-2c	\" bottom of page
.         if !\\n(Pp .tl ''\\s-1- \\n% -\\s+1''		\" paper header
.      \}
.   \}
.   nr Tc 1			\" page number at top of page
.   ev				\" restore environment
'   b[				\" skip to next page, springing T[
.\}
..
.de E]	\"	--- end of input ---
.P]				\" end of last paragraph
.nr Kf 1			\" flush floating keeps
.if \\n(Kr \c
..
.de Bt	\"	--- change bottom of page trap ---
.nr Bt \\$1			\" calculate new page trap
.ie \\n(Bt<=\\n(nl .T]		\" if before current pos, spring now
.el .ch T] \\n(Btu		\" set new page trap
..
.nr T| 0			\" busy flag
.de T[	\"	--- top of page macro ---
.if \\n(Rb .tm PG:\\n%
.nr Bt \\n(Pl-1v+1		\" bottom of page trap position
.wh \\n(Btu T]			\" set bottom of page trap
.po \\n(POu			\" page offset
.nr Fc 0 1			\" reset footnote count
.if \\n(Fe .Fa			\" append leftover footnote
.ev 1				\" switch environment to save line buffer
.nr T[ 2.4c+1v+0.7c		\" size of page header
.sp |\\n(T[u			\" end of header
.if \\n(Kr .Kr			\" release some floating keeps
.Al				\" align in case of figures
.ev				\" restore environment
.nr Tp \\n(.d			\" page start
..
.de 2C	\"	--- 2 column output ---
.P]
.nr C% 1 1			\" start column counter
.ll (\\n(LTu-1c)/2u		\" calculate line length
.nr LL \\n(.l			\" -ms compatibility
.Al				\" align
.nr Tp \\n(.d			\" new top of page
.P[
..
.de 1C	\"	--- back to 1 column output ---
.P]
.ll \\n(LTu			\" restore line length
.nr LL \\n(.l			\" -ms compatibility
.po \\n(POu			\" restore margin
.nr C% 0			\" stop column count
.P[
..
.\"
.\"	=====> paragraph macros <=====
.\"
.de P[	\"	--- begin paragraph ---
.if !\\n(Ks .di Pd		\" divert
..
.de P]	\"	--- end paragraph ---
.ce 0				\" break, turn off centering
.in 0				\" turn off indent
.if !\\n(Ks \{\
.   nr Pm \\n(.u		\" save fill mode
.   nf				\" stop filling
.   di
.   \" diversion ended.  If paragraph doesn't fit, do something special
.   \" if left page was decreased, decrease right page too, else if
.   \" paragraph doesn't fit for but one line, decrease page length
.   if \\n(.t+1v<\\n(dn .if \\n(Tl:(\\n(.t+2v>=\\n(dn) .Bt -1v
.   Pd				\" flush paragraph
.   if \\n(Pm .fi		\" restore fill mode, but don't break
.\}
..
.\"
.\"	=====> footnote macros <=====
.\"
.rm FS FE FJ FK			\" remove -ms footnote stuff
.de FS	\"	--- start footnote ---
.ev 1				\" switch environments
.da Fe				\" divert footnote to Fe
.fi
..
.de FE	\"	--- end of footnote ---
.nf				\" break and stop filling
.da
.ev				\" restore environment
.\" If footnote doesn't fit, break here and now.  If it does, append it to
.\" the other macro and move end of page trap up.  If buffering already,
.\" continue buffering.
.nr Fe +\\n(dn			\" calculate new footnote size
.if \\n(Fe=\\n(dn .ie \\n(nl+\\n(.d+1v+\\n(Fd+\\n(Fe>=\\n(Bt .Bt \\n(nl+\\n(.d
.el .Fa				\" footnote still fits
..
.de Fa	\"	--- add footnote to buffer ---
.ev 1				\" switch environments again
.da Fd				\" add footnote to Fd
.if \\n+(Fc=1 \l'1i'		\" footnote separator on first footnote
.Fe				\" ditto
.br				\" ditto
.da
.ev				\" restore environment
.nr Fd +\\n(dn			\" calculate new footnote size
.Bt \\n(Pl-\\n(Fd-1v		\" calculate new page trap
.rm Fe				\" remove old footnote
.nr Fe 0			\" clear footnote size
..
.\"
.\"	=====> keep macros <=====
.\"
.nr Kl 0			\" free list
.nr n0 1
.nr n1 2
.nr n2 3
.nr n3 4
.nr n4 5
.nr n5 6
.nr n6 7
.nr n7 8
.nr n8 9
.nr n9 (-1)			\" end of free list
.nr Ko (-1)			\" queue in
.nr Ki (-1)			\" queue out
.de Bp	\"	--- begin a new page ---
.if !(\\n(.d=(\\n(T[) .T]	\" if not top of page, go to bottom
..
.de Kg	\"	--- output keep \\$1 with height \\$2
.nr Pm \\n(.u			\" save fill mode
.nf				\" don't fill
.\\$1				\" output keep
.if \\n(Pm .fi			\" restore fill mode
.if \\n(.t<2v .Bp		\" if little room left, begin new page
..
.de KS	\"	--- begin static keep ---
.P]				\" end paragraph
.nr Ks +1			\" mark keep
.di Ks				\" divert keep to Ks
.P[
..
.de KF	\"	--- begin floating keep --
.P]				\" end paragraph
.nr Ks +1			\" mark keep
.di Kf				\" divert keep to Kf
.P[
..
.de KE	\"	--- end keep --
.P]				\" break
.ie '\\n(.z'Ks' \{\
.   di
.   if \\n(dn>=\\n(.t .Bp	\" if it doesn't fit, begin a new page
.   Kg Ks \\n(dn		\" release static keep
.\}
.el \{\
.   di
.   ie (\\n(Ki<0)&(\\n(dn<\\n(.t) .Kg Kf \\n(dn
.   el \{\
.      if \\n(Kl<0 .Kr		\" free list exhausted, flush some entries
.      if \\n(Ki>=0 .nr n\\n(Ki (\\n(Kl)	\" if (Ki != NIL) n[Ki] = Kl
.      nr Ki (\\n(Kl)		\" Ki = Kl
.      nr Kl (\\n(n\\n(Kl)	\" Kl = n[Kl]
.      rn Kf d\\n(Ki		\" d[Ki] = Kf	diversion
.      nr h\\n(Ki (\\n(dn)	\" h[Ki] = dn	height
.      nr n\\n(Ki (-1)		\" n[Ki] = -1	(end of list)
.      if \\n(Ko<0 .nr Ko (\\n(Ki)	\" if (Ko < 0) Ko = Ki
.      nr Kr 1			\" entries to release
.   \}
.\}
.nr Ks -1
.P[				\" start a new paragraph
..
.de Kr	\"	--- release floating keep ---
.in 0				\" no indentation
.nf				\" no filling
.nr Kr 0			\" don't release while releasing
.Kg d\\n(Ko \\n(h\\n(Ko		\" output it
.fi				\" restore filling
.in				\" restore indentation
.nr Kt \\n(Ko			\" Kt = Ko
.nr Ko (\\n(n\\n(Kt)		\" Ko = n[Kt]	remove from queue
.nr n\\n(Kt (\\n(Kl)		\" n[Kt] = Kl	put on free list
.nr Kl (\\n(Kt)			\" Kl = Kt
.nr Kr (\\n(Ko>=0)		\" Kr = (Ko >= 0)
.ie !\\n(Kr .nr Ki (-1)		\" if Ko < 0 then Ki = end of list
.el .if \\n(h\\n(Ko<\\n(.t .Kr	\" release another one
.if \\n(Kf .T]			\" if flushing, begin new page
..
.de KK	\"	--- flush floating keeps ---
.nr Kf 1			\" flush floating keeps
.Bp				\" begin a new page
.nr Kf 0			\" don't flush anymore
..
.\"
.\"	=====> user macros <=====
.\"
.rn bp b[			\" rename begin page request
.de bp	\"	--- begin page for users ---
.P]				\" end paragraph
.T]				\" to bottom of page
.P[				\" begin new paragraph
..
.de B[	\"	--- begin block ---
.br
.ds Bw \\$2
.ds Bh \\$3
.di \\$1
.nf
..
.de B]	\"	--- end block ---
.fi
.di
.nr \\*(Bw \\n(dl
.nr \\*(Bh \\n(dn
..
.de B|	\"	--- position block ---
.nf
.mk B|				\" remember vertical position
.nr Bw \\$2			\" copy x argument
.nr Bh \\$3			\" copy y argument
.in +\\n(Bwu			\" go to horizontal position
.sp \\n(Bhu			\" go to vertical position
.\\$1				\" output block
.in				\" return to horizontal position
.sp |\\n(B|u			\" return to vertical position
.fi
..
.de C[	\"	--- begin centered block ---
.P]				\" end paragraph
.nr Ks +1			\" mark keep
.di Cd				\" divert to Cd
.P[
..
.de C]	\"	--- end centered block ---
.P]				\" break
.di
.if \\n(dl<\\n(.l .in (\\n(.lu-\\n(dlu)/2u	\" indent to center
.Kg Cd \\n(dl			\" get diverted text
.in 0				\" no indentation
.nr Ks -1			\" end of keep
.P[				\" begin normal paragraph
..
.de Q[	\"	--- begin quote ---
.C[				\" begin centered block
.nr Ql \\n(.l			\" save line length
.ll \\n(.lu*3u/4u		\" set line length to 3/4 of current ll
..
.de Q]	\"	--- end quote ---
.ll \\n(Qlu			\" restore line length
.C]				\" end centered block
..
.
.de SZ	\"	--- size change ---
.br				\" first break
.ps \\$1			\" change point size
.vs \\$1			\" change vertical spacing
..
.de JR	\"	--- reset indentation ---
.nr Jn 0			\" current indent
.nr Ji 0			\" index of indented paragraphs
.nr J0 5n			\" reset indent sizes
.nr J1 5n
.nr J2 5n
.nr J3 5n
.nr J4 5n
.nr J5 5n
..
.de RT	\"	--- reset fonts and such ---
.ps \\n(PS			\" point size
.vs \\n(VS			\" vertical spacing
.ll \\n(LLu			\" line length
.nr Pl 27c+0.5v			\" length of page
.ll \\n(LLu			\" line length
.ev 1				\" parameters in environment 1 (title)
.ps 12				\" point size
.ll \\n(LLu			\" line length
.lt \\n(LTu			\" title length in environment 1
.ev
.ft 1				\" reset font
..
.de RS	\"	--- increase indent ---
.nr Jn +\\n(J\\n(Ji
.nr Ji +1
..
.de RE	\"	--- decrease indent ---
.nr Ji -1
.nr Jn -\\n(J\\n(Ji
..
.de JP	\"	--- begin unlabeled, indented paragraph ---
.P]				\" end paragraph
.if \\n(.t<1v .Bp		\" if not enough room, begin page
.if !(\\n(.d=\\n(Tp) .sp 0.3v 	\" if not top of page, skip some space
.nr Jj \\n(J\\n(Ji		\" increase in indent
.fi				\" start filling
.in \\n(Jnu+\\n(Jju		\" set new indent
.ta \\n(Jju			\" set tab
.ti -\\n(Jju			\" set temporary indent
.P[
..
.de IP	\"	--- begin labeled, indented paragraph ---
.if \\n(.$>1 .nr J\\n(Ji \\$2n	\" set indent if specified in ens
.JP				\" do indented paragraph
.RT				\" restore -ms variables
\&\\$1	\c
..
.de QP	\"	--- begin quotation ---
.if \\n(.$>1 .nr J\\n(Ji \\$2n	\" set indent if specified in ens
.ll -\\n(J\\n(Jiu		\" decrease line length
.JP				\" do indented paragraph
.RT				\" restore -ms variables
\&\\$1	\c
..
.de LP	\"	--- begin paragraph ---
.P]				\" end last paragraph
.Al				\" align
.sp \\n(PDu			\" interparagraph spacing
.JR				\" reset indentation
.RT				\" restore -ms variables
.fi				\" start filling
.P[				\" begin next
..
.de PP	\"	--- begin paragraph with temporary indent ---
.P]				\" end last paragraph
.Al				\" align
.sp \\n(PDu			\" interparagraph spacing
.JR				\" reset indentation
.RT				\" restore -ms variables
.fi				\" start filling
.ti +5n				\" temporary indent
.P[				\" begin next
..
.de CH	\"	--- chapter heading ---
.P]				\" break, start chapter
.KK				\" flush floating keeps
.Bp				\" begin page
.nr Tc 0			\" page number at bottom of page
.nr Fi 1 1			\" current figure
.RT				\" restore -ms variables
.ps 18				\" set point size
.vs 24				\" set vertical spacing
.ce 1000			\" center all lines
.nr Hi 0			\" header index 0
.rm HS				\" remove header string
.rm Rc				\" do not count pages
.fi				\" filling
.P[				\" start a new paragraph
..
.de SH	\"	--- section heading ---
.P]				\" end last paragraph
.Al				\" align
.if \\n(.t<5v .Bp		\" if not enough room, begin new page
.if !(\\n(.d=\\n(Tp) .sp 	\" if not top of page, skip some space
.RT				\" restore -ms variables
.ft 3				\" bold font
.nr Hi 0			\" header index 0
.rm HS				\" remove header string
.fi				\" start filling
.P[				\" start a new paragraph
..
.de NH	\"	--- numbered section header ---
.ie \\n(.$=0 .nr Ha 1		\" if no argument, Ha = 1
.el .nr Ha \\$1			\" Ha is argument
.if \\n(Ha<1 .nr H1 0		\" reset subsection numbers
.if \\n(Ha<2 .nr H2 0
.if \\n(Ha<3 .nr H3 0
.if \\n(Ha<4 .nr H4 0
.if \\n(Ha<5 .nr H5 0
.if \\n(Ha=0 .nr Ha 1		\" .NH 0 is like .NH 1, but then resets
.nr H\\n(Ha +1			\" H[Ha]++
.ie (\\n(Pp=0)&(\\n(Ha=1) \{\
.   CH
.   if !\\n(Pp .Ds Fn "\\n(H1\\*(Fs1"	\" reset next figure string
.   ds HS \\n(H1
\\s+6\\*(HS\\s-6
.   sp 0.5
.\}
.el \{\
.   SH
.   ds HS \\n(H1
.   if \\n(Ha>1 .as HS .\\n(H2
.   if \\n(Ha>2 .as HS .\\n(H3
.   if \\n(Ha>3 .as HS .\\n(H4
.   if \\n(Ha>4 .as HS .\\n(H5
\\*(HS.
.\}
.ds H0 \\*(HS.
.nr Hi \\n(Ha			\" header index
..
.de TL	\"	--- title of paper ---
.nr Pp 1			\" mark it's a paper
.CH
.Ds Fn "1"			\" next figure string
.ps -2
..
.de AU	\"	--- authors ---
.sp
.ft 2
.ps \\n(PS
.vs \\n(VS
..
.de AI	\"	--- author's institution
.sp
.ft 1
.ps \\n(PS
.vs \\n(VS
..
.de AB
.AI
.if !\\n(.$ ABSTRACT
.sp
.ce 0
.Q[
..
.de AE
.Q]
.sp
..
.de PS	\"	--- start picture ---
.\" $1 is height, $2 is width in units
..
.de PE	\"	--- end of picture ---
..
.de UX	\"	--- UNIX macro ---
.ie \\n(U1 \\$2\s-1UNIX\s0\\$1
.el \{\
\\$2\s-1UNIX\\s0\\$1\(dg
.   FS
\(dg UNIX is a Registered Trademark of AT&T in the USA and other countries.
.   FE
.nr U1 1
.\}
..
.de IX	\"	--- add to index, update page headers ---
.LP				\" end header, define page headers
.if \\n(Hi=0 \{\
.   ds T1 \\$1
.   ds T2 \\$1
.   rm S1 S2			\" no chapter or section number
.\}
.if \\n(Hi=1 \{\
.   ds T1 \\$1
.   ds S1 \s-2CHAP.\& \\*(HS\s+2
.   ds T2 \\$1
.   ds S2 \\*(S1
.\}
.if \\n(Hi=2 \{\
.   ds T2 \\$1
.   ds S2 \s-2SEC.\& \\*(HS\s+2
.\}
.da IO				\" divert to index
\\!.In \\n(Hi "\\*(HS" "\\$1" \\n%
.da
..
.de In	\"	--- output index ---
.P]				\" end of paragraph
.if !(\\n(.d=\\n(Tp) .ie \\$1<2 .sp 1.7
.el .if \\$1=2 .sp 0.3
.in 0
.ad l				\" adjust only left side
.ll -5n				\" decrease line length
.nr J0 0
.P[
.ie \\$1 \{\
.   nr In \\$1-1
.   nr J\\$1 \\n(J\\n(In+\\w'\\$2'+3.5n
.   in \\n(J\\$1u		\" set indent
.   ta \\w'\\$2'u+3.5n
.   ti -\\w'\\$2'u+3.5n
.   ie \\$1<2 \\s+3\\f3\\$2	\\$3\\f1\\s-3\&\c
.   el \\$2	\\$3\&\c
.\}
.el \\s+3\\f3\\$3\\f1\\s-3\&\c
.ll +5n				\" reset line length
.nr In \\n(.l-\w'\\$4'
\\\\h'|\\n(Inu'\\$4
.in 0				\" break, reset indent
.ad b				\" adjust both sides, end of diversion
..
.de IH	\"	--- index header ---
.ie \\$1 .NH \\$1		\" start a new header
.el .CH				\" start a new, unindexed, chapter
\\$2
.IX "\\$2"			\" add header to index
..
.ds Fs .
.de F1	\"	--- begin figure ---
.ds Fp \\*(Fn
.ie \\n(Pp .ds Fn \\n+(Fi
.el .ds Fn \\n(H1\\*(Fs\\n+(Fi
.KF				\" floating keep
.sp 0.5c
.C[				\" begin centered block
..
.de F2	\"	--- end of figure, begin label ---
.C]				\" end centered block
.sp 0.5c
.Q[
.fi
.ps -2
.vs -2
\\fBFig.\ \\*(Fp.\\fP
.ft 1
..
.de F3	\"	--- end of figure label ---
.br
.ft
.vs
.ps
.Q]
.sp 0.8				\" leave some room under the figure
.KE				\" end floating keep
..
.de KW	\"	--- keyword ---
.ie \\n(.$ \{\
.   ie '\\n(.z'' .tm KW:\\$1
.   el \\!.KW "\\$1"
.\}
.el \{\
.   P]
.   tm KW
.   P[
.\}
..
.de Kx	\"	--- start list of keywords ---
.P]
.if !(\\n(.d=\\n(Tp) .sp	\" if not top of page, skip some space
.P[
..
.de Kw	\"	--- output keyword ---
.LP
.in 1c
.ti -1c
\&\\$1 \\$2
..
.nr Di 1 1			\" current definition
.Ds Dn "\\*(Dx\\n(Di"
.de D[	\"	--- begin definition ---
.sp 0.5c
.Ds Dp "\\*(Dx\\n(Di"
.Ds Dn "\\*(Dx\\n+(Di"
\\fBDefinition\ \\*(Dp.\ \ \\$1\\fP\ \ \c
..
.de D]	\"	--- end of definition ---
.sp 0.3c
..
.\"
.\"	=====> refer macros <=====
.\"
.rm <. <,
.Ds >. "."		\" reference ends with period
.Ds >, ","		\" reference ends with comma
.Ds [. " \\f1["		\" start of reference
.Ds .] "]\\fP"		\" end of reference
.de ]<	\"	--- references ---
.if \\n(Rb \{\
.   ie \\n(.$ \{\
.      ie '\\n(.z'' .tm ]<:\\$1
.      el \\!.]< "\\$1"
.   \}
.   el \{\
.      P]
.      tm ]<
.      P[
.   \}
.\}
..
.de ]>
..
.de ]-	\"	--- remove garbage before next definition ---
.rm [A [B [C [D [E [G [H [I [J [M [N [O [P [Q [R [S [T [V ]. ],
..
.de RR	\"	--- add comma + argument to reference ---
\\*(],\\$1\c
.ds ]. .
.ds ], , \&
..
.de Rc	\"	--- sited on pages ($1: all; $2: first; ...) ---
.ie \\n(.$>2 Cited on pages \\$1.
.el Cited on page \\$1.
..
.de ][	\"	--- new reference ---
.KS				\" keep together
.JP [\\*([F]			\" start indented paragraph
.if !\\*([H .RR "\\*([H"
.if !\\*([A .RR "\\*([A"
.if !\\*([Q .RR "\\*([Q"
.if !\\*([T \{\
\\*(],\(l"\\*([T\c
.ds ]. .\(r"
.ds ], ,\(r" \&
.\}
.if !\\*([R .RR "\\*([R"
.if !\\*([M .RR "\\*([M"
.if !\\*([J .RR "\\f2\\*([J\\fP"
.if !\\*([V .RR "Vol.\& \\*([V"
.if !\\*([N .RR "No.\& \\*([N"
.if !\\*([P .ie \\n([P>0 .RR "pp.\& \\*([P"
.el .RR "p.\& \\*([P"
.if !\\*([B .RR "in \\f2\\*([B\\fP"
.if !\\*([E .RR "ed.\& \\*([E"
.if !\\*([S .RR "\\*([S"
.if !\\*([I .RR "\\*([I"
.if !\\*([C .RR "\\*([C"
.if !\\*([G .RR "Gov't.\& ordering no.\& \\*([G"
.if !\\*([D .RR "\\*([D"
\&\\*(].
.if !\\*([L .Rc "\\*([L" \\*([L
.if !\\*([O \&\\*([O
.KE
..
.\"
.\"	=====> accents <=====
.\"
.ds - \(em
.ds ' \h'\w'e'u-\w'\(aa'u/2+.06m'\z\(aa\h'-\w'e'u+\w'\(aa'u/2-.06m'
.ds ` \h'\w'e'u-\w'\(ga'u/2+.06m'\z\(ga\h'-\w'e'u+\w'\(ga'u/2-.06m'
.ds : \h'\w'u'u-\w'\(ad'u/2+.06m'\z\(ad\h'-\w'u'u+\w'\(ad'u/2-.06m'
.ds ^ \h'\w'a'u-\w'^'u/2+.06m'\z^\h'-\w'a'u+\w'^'u/2-.06m'
.ds ~ \h'\w'a'u-\w'~'u/2+.06m'\z~\h'-\w'a'u+\w'~'u/2-.06m'
.ds C \h'\w'e'u-\w'\(ah'u/2+.06m'\z\(ah\h'-\w'e'u+\w'\(ah'u/2-.06m'
.ds v \h'\w'e'u-\w'\(ah'u/2+.06m'\z\(ah\h'-\w'e'u+\w'\(ah'u/2-.06m'
.ds , \h'\w'c'u-\w'\(ac'u/2'\z\(ac\h'-\w'c'u+\w'\(ac'u/2'
.ds -- \*-
.ds q[ \(l"
.ds ]q \(r"
.\"
.\"	=====> user settable definitions <=====
.\"
.cs 5 20u			\" font 5, constant width
.nr PS 12			\" point size
.nr VS 14			\" vertical spacing
.nr LL 6.5i			\" line length
.nr FL 15c			\" footnote length (no effect currently)
.nr LT 15c			\" title length
.nr PO \n(.o			\" page offset
.nr PD 0			\" interparagraph spacing
.\"
.\"	=====> -ms init <=====
.\"
.nr FM 1			\" ms hack: remove page traps
.ch FO				\" remove bottom of page trap
.ch FX				\" remove footnote trap
.rm PT BT			\" remove other traps
.nr YE 1			\" causes break in .EQ
.\"
.\"	=====> initialization <=====
.\"
.RT				\" set these variables
.JR				\" reset indentation
.hw packet re-start trans-par-ent trans-par-ently trans-par-ency work-station trans-action time-stamp
.wh 0 T[			\" top of page macro
.em E]				\" end of text macro
.P[				\" begin paragraph
.B
\\$1
.R
..
.TL
DISTRIBUTED PROGRAMMING WITH SHARED DATA
.AU
Henri E. Bal \(**
Andrew S. Tanenbaum
.AI
Dept. of Mathematics and Computer Science
Vrije Universiteit
Amsterdam, The Netherlands
.FS
\(**This research was supported in part by the
Netherlands organization for scientific research under grant 125-30-10.
.FE
.AB
Until recently, at least one thing was clear about parallel programming:
tightly coupled (shared memory) machines were programmed in a language
based on shared variables and loosely coupled (distributed) systems
were programmed using message passing.
The explosive growth of research on distributed systems and their
languages, however, has led to several new methodologies that blur
this simple distinction.
Operating system primitives (e.g., problem-oriented shared memory,
Shared Virtual Memory, the Agora shared memory) and languages
(e.g., Concurrent Prolog, Linda, Emerald) for programming distributed
systems have been proposed that support the shared variable paradigm
without the presence of physical shared memory.
In this paper we will look at the reasons for this evolution, the
resemblances and differences among these new proposals,
and the key issues in their design and implementation.
It turns out that many implementations are based on replication of data.
We take this idea one step further, and discuss how automatic replication 
(initiated by the run time system) can be used as a basis for
a new model, called the shared data-object model, whose semantics are similar
to the shared variable model.
Finally, we discuss the design of a new language for distributed
programming, Orca, based on the shared data-object model.
.AE
.NH 1
INTRODUCTION
.PP
Parallel computers of the MIMD (Multiple Instruction Multiple Data) class
are traditionally divided into two broad subcategories:
tightly coupled and loosely coupled systems.
In a tightly coupled system at least part of the primary memory is \fIshared\fR.
All processors have direct access to this shared memory,
in one machine instruction.
In a loosely coupled (\fIdistributed\fR) system,
processors only have access to their own local memories;
processors can communicate by sending messages over a communication channel,
such as a point-to-point link or a local area network\*(<.\*([.Tanenbaum and Van Renesse 1985\*(.]\*(>.
.]< 0
Tightly coupled systems have the significant advantage of fast communication
through shared memory.
Distributed systems, on the other hand, are much easier to build,
especially if a large number of processors is required.
.PP
Initially, programming language and operating system designers strictly
followed the above classification, resulting in two parallel programming
paradigms: shared variables (for tightly coupled systems)
and message passing (for distributed systems).
Some languages and operating systems for uniprocessors or shared-memory
multiprocessors support processes that communicate via
message passing (e.g., MINIX\*([.Tanenbaum 1987\*(.]
.]< 1
).
More recently, the dual approach, applying the shared variable paradigm
to distributed systems, has become a popular research topic.
At first sight, this approach may seem to be against the grain,
as the message passing paradigm much better matches the primitives
provided by the distributed hardware.
For sequential languages, however, we have become quite used to
programming paradigms like functional, logic, and object-oriented programming,
which do not directly reflect the underlying architecture either.
.PP
The purpose of this paper is twofold.
First, we will classify existing techniques for providing conceptual
shared memory
by looking at their most important similarities and differences.
Analysis of the semantics
shows that many proposals are not strictly like message passing
nor like shared variables, but somewhere in between.
In other words, there exists a \fIspectrum\fR of communication mechanisms,
of which shared variables and message passing are the two extremes.
Most primitives towards the shared-variable end of the spectrum use
\fIreplication\fR of data for an efficient distributed implementation.
.PP
The second purpose of the paper is to discuss a new model providing
conceptual shared memory and a new programming language, Orca, based
on this model.
Unlike most other languages for distributed programming,
Orca is intended for distributed application
programming rather than systems programming.
A major issue in its design was to keep the language as simple as possible
and to exclude features that are only useful for systems programming.
The simple design has been realized by using an intelligent
run time system, which dynamically decides where to store data and
how to replicate data.
.PP
Some theoretical work has been done in the area of simulating shared memory
in distributed systems (e.g.\*(<,\*([.Upfal and Wigderson 1987\*(.]\*(>,
.]< 2
).
In these studies, a distributed system is usually regarded as a (possibly
incomplete) graph, where nodes represent processors and arcs represent
communication channels.
These studies typically aim at minimizing the number of messages needed
to read or write a simulated shared variable.
In this paper, we are more interested in real-life distributed computing systems
(like those advocated by V\*([.Berglund 1986\*(.]
.]< 3
and Amoeba\*([.Mullender and Tanenbaum 1986\*(.]
.]< 4
).
In such systems, all processes can directly communicate with each other,
although communication between processes on different processors is
expensive.
These systems frequently support additional communication primitives,
like multicast and broadcast.
.NH 1
SHARED VARIABLES AND MESSAGE PASSING
.PP
Communication through shared variables probably is the oldest paradigm
in parallel programming.
Many operating systems for uni-processors are structured as collections
of processes, executing in quasi-parallel, and communicating through shared variables.
Synchronizing access to shared data has been a research topic since
the early sixties.
Numerous programming languages exist that use shared variables.
.PP
The semantics of the model are fairly simple, except for
what happens when two processes simultaneously try
to write (or read and write) the same variable.
The semantics may either define simple reads and writes to be indivisible
(conflicting reads or writes are serialized) or may leave the
effect of simultaneous writes undefined.
.PP
The basis for message passing as a programming language construct is
Hoare's classic paper on CSP\*(<.\*([.Hoare 1978\*(.]\*(>.
.]< 5
A message in CSP is sent from one process (the sender) to one other
process (the receiver).
The sender waits until the receiver has accepted the message
(\fIsynchronous\fR message passing).
.PP
Many variations of message passing
have been proposed\*(<.\*([.Andrews and Schneider 1983; Bal et al. 1988\*(.]\*(>.
.]< 6
.]< 7
With \fIasynchronous\fR message passing, the sender continues immediately
after sending the message.
Remote procedure call and rendez-vous are two-way interactions between
two processes.
Broadcast and multicast are interactions between one sender and many
receivers\*(<.\*([.Gehani 1984\*(.]\*(>.
.]< 8
Communication ports or mailboxes can be used to avoid explicit
addressing of processes.
.PP
Below, we will describe the most important differences between the two
extremes of our spectrum: shared variables and simple (synchronous
and asynchronous) message passing.
Some of the extensions to message passing mentioned above make the differences
less profound.
.IP "   \(bu"
A message transfers information between two processes, which must both
exist (be alive) when the interaction takes place.
At least the sender must know the identity of the receiver.
Data stored in a shared variable is accessible to any process.
Processes interacting through shared variables need not even have overlapping
lifetimes or know about each other's existence.
They just have to know the address of the shared variable.
.IP "   \(bu"
An assignment to a shared variable has immediate effect.
In contrast, there is a measurable delay between sending a message
and its being received.
For asynchronous message passing, for example,
this has some ramifications for the order in which messages are received.
Usually, the semantics are \fIorder-preserving\fR:
messages between a pair of processes are received in the same order
they were sent.
With more than two processes, the delay still has to be taken into account.
Suppose Process P\s-2\v'0.4m'1\v'-0.4m'\s+2 sends a message \f5X\fR
to P\s-2\v'0.4m'2\v'-0.4m'\s+2 and then to P\s-2\v'0.4m'3\v'-0.4m'\s+2.
Upon receiving \f5X\fR, P\s-2\v'0.4m'3\v'-0.4m'\s+2 sends a message
\f5Y\fR to P\s-2\v'0.4m'2\v'-0.4m'\s+2.
There is no guarantee that P\s-2\v'0.4m'2\v'-0.4m'\s+2 will receive \f5X\fR
before \f5Y\fR.
.IP "   \(bu"
Message passing intuitively is more \fIsecure\fR than sharing variables.
Security means that one program module cannot effect the correctness
of other modules (e.g., by a ``wild store'' through a bad pointer).
The feasibility of a secure message passing language was demonstrated by NIL\*(<.\*([.Strom and Yemini 1986\*(.]\*(>.
.]< 9
Shared variables can be changed by any process, so security is
a bigger problem.
One solution is to use \fImonitors\fR, which encapsulate data and
serialize all operations on the data.
.IP "   \(bu"
A message exchanges information, but it also \fIsynchronizes\fR processes.
The receiver waits for a message to arrive;
with synchronous message passing, the sender also waits for the
receiver to be ready.
With shared variables, two different types of synchronization are useful\*(<.\*([.Andrews and Schneider 1983\*(.]\*(>.
.]< 6
\fIMutual exclusion\fR prevents simultaneous writes (or
reads and writes) of the same variable;  \fIcondition synchronization\fR
allows a process to wait for a certain condition to be true.
Processes can synchronize through
shared variables by using busy-waiting (polling),
but this behavior is undesirable, as it wastes processor cycles.
Better mechanisms are semaphores, eventcounts,
and condition variables.
.LP
The message passing model has some additional implementation problems,
as noted, for example, by Kai Li\*(<.\*([.Li 1986\*(.]\*(>.
.]< 10
Passing a complex data structure to a remote process is difficult.
Processes cannot easily be moved (migrated) to another processor,
making efficient process management more complicated.
The shared variable model does not suffer from these problems.
.NH 1
IN BETWEEN SHARED VARIABLES AND MESSAGE PASSING
.PP
The shared variable and message passing paradigms each have their
own advantages and disadvantages.
It should come as no surprise that language and operating
system designers have
looked at primitives that are somewhere in between these two extremes,
and that share the advantages of both.
In this section, we will discuss several such approaches.
.PP
In theory, a shared variable can simply be simulated on a distributed
system by storing it one processor and letting other processors read and write it
with remote procedure calls.
In most distributed systems, however, a remote procedure call
is two to four orders of magnitude slower than reading local data.
(Even Spector\*([.Spector 1982\*(.]
.]< 11
reports an overhead of 150 microseconds for a
certain class of remote references, despite a
highly tuned, microcoded implementation).
This difference makes a straightforward simulation unattractive.
.PP
Most systems described in this section offer primitives
that have some properties of shared variables and some of message passing.
The semantics are somewhere in between shared variables and
message passing.
Often, the data are only accessible by some of the processes and
only through some specific operations.
These restrictions make the primitives more secure than regular shared
variables and make an efficient implementation possible even if physical shared
memory is absent.
.PP
We will discuss four key issues for every primitive:
.IP "   \(bu"
What are the \fIsemantics\fR of the primitive?
.IP "   \(bu"
How are shared data \fIaddressed\fR?
.IP "   \(bu"
How is access to shared data \fIsynchronized\fR ?
.IP "   \(bu"
How can the primitive be \fIimplemented efficiently\fR without using physical
shared memory?
.LP
We will first discuss proposals that are ``close'' to message passing;
subsequent designs are increasingly similar to shared variables.
The results are summarized in Table 1 at the end of this section.
.NH 2
Communication Ports
.PP
In CSP-like languages, interacting processes must explicitly
name each other.
For many applications (e.g., those based on the client/server model)
this is inconvenient.
A solution is to send messages indirectly through
a \fIcommunication port\fR\*(<.\*([.William Mao and Yeh 1980\*(.]\*(>.
.]< 12
A port or mailbox is a variable where messages can be sent to or received from.
.PP
A port can be regarded as a shared \fIqueue\fR data structure,
with the following operations defined on it:
.DS
.ft 5
send(msg,q);         /* Append a message to the end of the queue. */
msg := receive(q);   /* Wait until the queue is not empty and
                      * get message from head of queue.
                      */
.ft R
.DE
The latter operation also synchronizes processes.
Ports can be addressed like normal variables.
The implementation is fairly straightforward; a buffer is needed
to store messages sent but not yet received.
.PP
Although the semantics of ports are essentially those of
asynchronous message passing,
it is interesting to note that ports can be described as shared
data structures with specialized access operations.
.NH 2
Ada's shared variables
.PP
Processes (tasks) in Ada\(dg
can communicate through the rendez-vous mechanism or through shared variables.
.FS
\(dg Ada is a registered trademark of the U.S. Dept. of Defense, Ada Joint Program Office
.FE
Shared variables in Ada are normal variables that happen to be
visible to several tasks, as defined by the Ada scope rules.
In an attempt to make the language implementable on memory-disjunct
architectures, special rules for shared variables were introduced
(section 9.11 of the language reference manual\*([.U.S.~Department~of~Defense 1983\*(.]
.]< 13
).
Between synchronization points (i.e., normal rendez-vous communication),
two tasks sharing a variable cannot make any assumptions about the order
in which the other task performs operations on the variable.
In essence, this rule permits a distributed implementation to use copies
(replicas) of shared variables and to update these copies only on
rendez-vous.
.PP
The semantics of Ada's shared variables are quite different from normal
shared variables, as updates do not have immediate effect.
Also, other features of the language design
complicate a distributed implementation\*(<.\*([.Stammers 1985\*(.]\*(>.
.]< 14
Introducing conceptual shared data this way does not seem like
a major breakthrough in elegant language design, but it does illustrate
the idea of replication.
.NH 2
The object model
.PP
Object-oriented languages are becoming increasingly popular,
not only for writing sequential programs, but also for
implementing parallel applications.
Different languages have different definitions of the term ``object,''
but in general an object encapsulates both \fIdata\fR and \fIbehavior\fR.
Concurrent languages that are strongly influenced by the object-oriented
programming paradigm include:
ABCL/1\*(<,\*([.Shibayama and Yonezawa 1987\*(.]\*(>,
.]< 15
Aeolus\*(<,\*([.Wilkes and LeBlanc 1986\*(.]\*(>,
.]< 16
Concurrent\%Smalltalk\*(<,\*([.Yokote and Tokoro 1987\*(.]\*(>,
.]< 17
Emerald\*(<,\*([.Black et al. 1987\*(.]\*(>,
.]< 18
Raddle\*(<,\*([.Forman 1986\*(.]\*(>,
.]< 19
and Sloop\*(<.\*([.Lucco 1987\*(.]\*(>.
.]< 20
.PP
An object in a concurrent object-based language can be considered as shared
data that are accessible only through a set of \fIoperations\fR defined
by the object.
These operations are invoked by sending a message to the object.
Operation invocation can either be asynchronous (the invoker continues
immediately after sending the message) or synchronous (the invoker waits
until the operation has been completed).
.PP
Objects are usually addressed by an object \fIreference\f (returned upon
creation of the object) or by a global object name.
To synchronize access to (shared) objects, several approaches are conceivable.
Emerald uses a monitor-like construct to synchronize multiple operation
invocations to the same object.
Sloop supports \fIindivisible\fR objects, for which only one operation
invocation at a time is allowed to execute.
For condition synchronization, Sloop allows
operations to suspend on a boolean expression,
causing the invoking process to block until the expression is ``true.''
.PP
A key issue in a distributed implementation of objects is to locate objects
on those processors that use them most frequently.
Both Emerald and Sloop allow (but do not enforce) the programmer to
control the locations of objects; these locations can be changed
dynamically (object migration).
Alternatively, the placement of objects can be left entirely
to the run time system.
For this purpose,
Sloop dynamically maintains statistical information about
the program's communication patterns.
Some language implementations also support replication of
immutable (read-only) objects.
.PP
The object model already presents the illusion of shared data.
Access to the shared data is restricted to some well-defined operations,
making the model more secure than the simple shared variable model.
Synchronization can easily be integrated with the operations.
In Sloop, operations are invoked by asynchronous messages, so the semantics
of Sloop still resemble message passing.
Emerald uses synchronous operation invocations, resulting in a model
closer to shared variables.
.NH 2
Problem-oriented shared memory
.PP
Cheriton\*([.Cheriton 1985\*(.]
.]< 21
has proposed a kind of shared memory that can be tailored
to a specific application, the so-called
problem-oriented shared memory.
The shared memory can be regarded as a distributed system service,
implemented on multiple processors.
Data are stored (replicated) on one or more of these processors,
and may also be cached on client workstations.
.PP
The semantics of the problem-oriented shared memory are tuned to
the needs of the application using it.
In general, the semantics are more relaxed than those of shared variables.
In particular, inconsistent copies of the same data are allowed to coexist
temporarily,
so a ``read'' operation does not necessarily return the value stored
by the most recent ``write.''
There are several different approaches to deal with these
\fIstale\fR data, for example to let the applications
programmer worry about it, or to let the shared memory guarantee a
certain degree of accurateness (e.g., a shared variable containing
the ``time of the day'' can be kept accurate within, say, 5 seconds).
.PP
The problem-oriented shared memory is addressed also in an
application specific way.
Addresses are broadcast to the server processors.
There is no special provision to synchronize processes (processes
can synchronize using message passing).
.PP
The implementation significantly benefits from the relaxed semantics.
Most important, it does not have to use complicated schemes to atomically
update all copies of the same data.
.NH 2
The Agora shared memory
.PP
The Agora shared memory allows processes written in different languages
and executing on different types of machines to communicate\*(<.\*([.Bisiani and Forin 1987\*(.]\*(>.
.]< 22
It has been implemented on closely coupled as well as loosely coupled
architectures, using the Mach operating system.
.PP
The memory contains shared data structures, accessible through an (extendible)
set of standard functions.
These functions are available (e.g., as library routines) in all languages
supported by the system.
A shared data structure is organized as a set of immutable data elements,
accessed indirectly through (mutable) \fImaps\fR.
A map maps an index (integer or string) onto the address of a data element.
To change an element of the set, a new element must be added and the
map updated accordingly.
Elements that are no longer accessible are automatically garbage collected.
.PP
Exclusive access to a data structure is provided by a standard function
that applies a user function to a data structure.
For condition synchronization, a pattern-directed mechanism is supported.
For example, a process can wait until a certain element is added to
a set.
.PP
The implementation is based on replication of data structures on reference.
As in Cheriton's model, read operations may return stale data.
.NH 2
Tuple Space
.PP
The Tuple Space is a novel synchronization mechanism,
designed by David Gelernter for his language Linda\*(<.\*([.Gelernter 1985; Ahuja et al. 1986\*(.]\*(>.
.]< 23
.]< 24
The Tuple Space is a global memory containing \fItuples\fR,
which are similar to records in Pascal.
For example, the tuple ``["Miami", 305]'' consists
of a string field and an integer field.
Tuple Space is manipulated by three atomic operations:
\fBout\fR adds a tuple to Tuple Space, \fBread\fR reads an existing tuple,
and \fBin\fR reads and deletes a tuple.
Note that there is no operation to \fIchange\fR an existing tuple.
Instead, the tuple must first be removed from Tuple Space, and later
be put back.
.PP
Unlike all other conceptual shared memory systems discussed in this paper,
Tuple Space is addressed \fIassociatively\fR (by contents).
A tuple is denoted by supplying actual or formal parameters for every field.
The tuple mentioned above can be read and removed, for example, by
.DS
\fBin\f5("Miami", 305);\fR
.DE
or by
.DS
\fBinteger\f5 areacode;
\fBin\f5("Miami", \fBvar\f5 areacode);\fR
.DE
In the latter case, the formal parameter \fIareacode\fR is assigned
the value 305.
.PP
Both \fBread\fR and \fBin\fR block until a matching tuple exists in Tuple Space.
If two processes simultaneously try to remove (\fBin\fR) the same tuple,
only one of them will succeed and the other one will block.
As tuples have to be removed before being changed, simultaneous updates
are automatically synchronized.
.PP
Although the semantics of Tuple Space are significantly different
from shared variables (e.g., it lacks assignment),
the Tuple Space clearly gives the illusion of a shared memory.
The Tuple Space has been implemented on machines with
shared memory (Encore Multimax, Sequent Balance) as well as
on memory-disjunct machines (iPSC hypercube, S/Net, Ethernet based
network of MicroVaxes).
A distributed implementation can benefit from the availability of multicast\*(<.\*([.Carriero and Gelernter 1986\*(.]\*(>.
.]< 25
As associative addressing is potentially expensive,
several compile-time optimizations have been devised to make it reasonably
efficient\*(<.\*([.Carriero 1987\*(.]\*(>.
.]< 26
.NH 2
Shared virtual memory
.PP
Kai Li has extended the concept of \fIvirtual memory\fR to distributed
systems, resulting in a \fIshared virtual memory\fR\*(<.\*([.Li 1986\*(.]\*(>.
.]< 10
This memory is accessible by all processes and is addressed like
traditional virtual memory.
Li's system guarantees memory \fIcoherence\fR: the value returned by
a ``read'' always is the value stored by the last ``write.''
.PP
The address space is partitioned into a number of fixed-size \fIpages\fR.
At any point in time, several processors may have a \fIread-only\fR copy
of the same page;
alternatively, a single processor may have a \fIread-and-write\fR copy.
.PP
If a process tries to write on a certain page while its processor does not
have a read-and-write copy of it, a ``write page-fault'' occurs.
The fault-handling routine tells other processors to \fIinvalidate\fR
their copies,
fetches a copy of the page (if it did not have one yet),
sets the protection mode to read-and-write,
and resumes the faulting instruction.
.PP
If a process wants to read a page, but does not have a copy of it,
a ``read page-fault'' occurs.
If any processor has a read-and-write copy of the page, this processor
is instructed to change the protection to read-only.
A copy of the page is fetched and the faulting instruction is resumed.
.PP
Shared Virtual Memory is addressed like normal virtual memory.
An implementation may support several synchronization mechanisms,
such as semaphores, eventcounts, and monitors.
.PP
The shared virtual memory can be used to simulate true shared variables,
with exactly the right semantics.
The implementation uses the hardware Memory Management Unit
and can benefit from the availability of multicast (e.g., to invalidate
all copies of a page).
Several strategies exist to deal with the problem of multiple simultaneous
writes and to administrate which processors contain copies of a page\*(<.\*([.Li 1986\*(.]\*(>.
.]< 10
The entire scheme will perform very poorly if processes on many
different processors repeatedly write on the same page.
Migrating these processes to the same processor is one possible cure
to this problem.
.NH 2
Shared logical variables
.PP
Most concurrent logic programming languages
(PARLOG\*(<,\*([.Clark and Gregory 1986; Gregory 1987\*(.]\*(>,
.]< 27
.]< 28
Concurrent Prolog\*(<,\*([.Shapiro 1986, 1987\*(.]\*(>,
.]< 29
.]< 30
Flat Concurrent Prolog)
use shared logical variables as communication channels.
Shared logical variables have the \fIsingle-assignment\fR property:
once they are bound to a value (or to another variable) they cannot
be changed.
(In ``sequential'' logic languages, variables may receive another value
after backtracking; most concurrent logic languages eliminate backtracking,
however).
Single-assignment is not a severe restriction,
because a logical variable can be bound to a structure containing
one or more other, unbound variables, which can be used for future
communication.
In fact, many communication patterns can be expressed
using shared logical variables\*(<.\*([.Shapiro 1986\*(.]\*(>.
.]< 29
.PP
Synchronization in concurrent logic languages resembles data-flow
synchronization: processes can (implicitly) wait for a variable to be bound.
Shared logical variables provide a clean semantic model, resembling
normal logic variables.
Addressing also is the same for both types of variables (i.e., through
unification).
.PP
The single-assignment property allows the model to be implemented with
reasonable efficiency on a distributed system.
An implementation of Flat Concurrent Prolog on a Hypercube is described in\*(<.\*([.Taylor et al. 1987\*(.]\*(>.
.]< 31
If a process tries to read a logical variable stored on a remote processor,
the remote processor adds the process to a list associated with
the variable.
As soon as the variable gets bound (if it was not already),
its value is sent to all processes on the list.
These processes will keep the value for future reference.
In this way, variables are automatically replicated on reference.
.NH 2
Discussion
.PP
Table 1 gives an overview of the most important properties of
the techniques we discussed.
The techniques differ widely in their semantics
and addressing and synchronization mechanisms.
A key issue in the implementation is \fIreplication\fR.
Multicast is frequently used to speed up the implementation.
.KF
.nr VS 14
.nr PS 10
.nr LL 7i
.LP
.TS
box, tab(:);
cfB s s s s
c || c | c | c | c .
Table 1 -  Overview of conceptual shared memory techniques
_
technique:semantics:addressing:synchronization:implementation
=
Comm. ports:shared queues:variables:blocking:straight m.p.
:::receive():
_
Ada's shared:weird:variables:rendez-vous:replication, updates
variables::::on rendez-vous
_
object model:shared objects:object-:indivisible obj.,:object migration,
::references:blocking oper.:repl. read-only objects
_
problem-oriented:shared mem. with:application-:through:replication,
shared memory:stale data:specific:messages:multicast
_
Agora:shared data struct.:flat name:pattern-:replication on
shared memory:stale data:space:directed:reference
_
Tuple Space:shared memory,:associatively:blocking:compile-time analysis,
(Linda):no assignment::read() and in():replication, multicast
_
shared virtual:shared mem.:linear addr.:semaphores,:MMU, replication,
memory::space:eventcounts, etc.:multicast
_
shared logical:logical var.:unification:suspend on:replication on
variables:(single assignm.)::unbound vars.:reference
.TE
.nr VS 14
.nr PS 12
.nr LL 6.5i
.KE
.PP
Replication of data has already been used for a long time
in distributed databases
to increase the availability of data in the presence of processor failures.
Replication introduces a severe problem:
the possibility of having inconsistent copies of the same logical data.
For databases, several solutions exist\*(<.\*([.Bernstein and Goodman 1981\*(.]\*(>.
.]< 32
Typically, multiple copies of the same data are accessed when reading
or writing data.
.PP
The techniques discussed in this section use replication to decrease
the \fIaccess time\fR to shared data, rather than to increase availability.
Therefore, it is unattractive to consult several processors on every
access to the data.
Instead, just the local copy should suffice for as many accesses as possible.
With this restriction, different solutions must be found to deal
with the consistency problem.
.PP
Table 1 shows three different ways of dealing with inconsistency.
Ada, the problem-oriented shared memory, and the Agora shared memory
relax the semantics of the shared memory.
The latter two systems allow ``read'' operations to return stale data.
Higher level protocols must be used by the programmer to solve inconsistency
problems.
Ada requires copies to be updated only on rendez-vous.
.PP
The second approach (used for objects, Tuple Space, and shared
logical variables) is to replicate only \fIimmutable\fR data (data that
cannot be changed).
This significantly reduces the complexity of the problem,
but it may also introduce new problems.
The approach is most effective in languages using single-assignment.
Such languages, however, will need a complicated distributed garbage
algorithm to get rid of unaccessible data.
In Linda, tuples are immutable objects.
A tuple can conceptually be changed by first taking it out of Tuple Space,
storing it in normal (local) variables.
After changing these local data, they can be put back in a new tuple.
As tuples are accessed by contents,
it makes little difference that the old tuple has been replaced
by a new one, instead of being modified while in Tuple Space.
As a major advantage of doing the modification outside Tuple Space,
updates by different processes are automatically synchronized.
On the other hand, a small modification to a large tuple (like setting a bit
in a 100K bitvector) will be expensive, as the tuple has to be copied twice.
.PP
The third approach to the consistency problem is exemplified by the shared
virtual memory: use protocols that guarantee memory coherence.
Before changing a page, all copies of the page are invalidated,
so subsequent reads will never return stale data.
Great care must be taken in the implementation, however, to avoid
thrashing.
For badly behaving programs, the system may easily spend most of its
time moving and invalidating pages.
.NH 1
THE SHARED DATA-OBJECT MODEL
.PP
We have developed a new conceptual shared-memory model, called
the \fIshared data-object\fR model.
In this model, shared data are encapsulated in passive objects.
The data contained by the object are only accessible through a set
of \fIoperations\fR defined by the object's type.
Objects are instances of \fIabstract data types\fR.
Unlike, say, Emerald and Sloop, we do not consider objects to be
active entities; neither do we consider all entities in the system
to be objects.
.PP
Parallel activity originates from the dynamic creation of multiple
sequential (single-threaded) processes.
When a process spawns a child process, it can pass any
of its objects as \fBshared\fR parameters to the child.
The children can pass the object to \fItheir\fR children, and so on.
In this way, the object gets distributed among some of the descendants of the
process that declared the object.
All these processes \fIshare\fR the object
and can perform the same set of operations
on it, as defined by the object's type.
Changes to the object made by one process are visible to other processes,
so a shared data-object is a communication channel between processes.
This mechanism is similar to call-by-sharing in CLU\*(<.\*([.Liskov et al. 1977\*(.]\*(>.
.]< 33
.PP
The shared data-object model has many advantages over regular shared variables.
Access to shared data is only allowed through operations defined by an
abstract data type.
All these operations are \fIindivisible\fR.
Simultaneous operation invocations of the same object have the effect
as if they were executed one by one, so access to shared data is
automatically synchronized.
Blocking operations are used for condition synchronization of processes,
as will be explained later.
.NH 2
Implementing shared data-objects in a distributed system
.PP
Shared data-objects are a simple, secure mechanism for sharing data
and synchronizing access to the data.
The design cannot be judged, however, without also considering the
implementation.
The distributed implementation we have designed is based
on \fIselective replication\fR and \fImigration\fR of objects,
under full control of the run time system.
The compiler distinguishes between two kinds of operations:
.IP "   \(bu"
a ``read'' operation does not modify the object; it is performed on a
local copy, if one exists.
.IP "   \(bu"
a ``write'' operation may read and write the object's data;
it affects all copies.
.LP
For sake of clarity, we will use a simplified view of our model to describe
its implementation.
In particular, we will assume an object to contain a single integer
and we will consider only two operations:
.DS
\fBoperation\f5 read(x: object): integer;       -- return current value
\fBoperation\f5 write(x: object; val: integer); -- store new value
.ft R
.DE
.PP
The run time system dynamically keeps track of how many times
processors perform remote read and write operations on each object.
If a processor frequently reads a remote object, it is profitable for it
to maintain a local copy of the object.
The execution time overhead of maintaining the statistics is neglectable
compared with the time needed to do remote references.
The space overhead is not a real concern either,
considering the current state of memory
technology.
.PP
A major issue in implementing replication is how to propagate
changes made to the data.
Two approaches are possible: invalidating all-but-one copies of the data,
or updating all copies.
Kai Li's Shared Virtual Memory uses invalidation.
In our model, invalidation is indeed feasible, but it has some
disadvantages.
First, if an object is big (e.g., a 100K bitvector) it is
wasteful to invalidate its copies, especially if an operation changes
only a small part (e.g., 1 bit).
In this case, it is far more efficient to apply the operation to all copies,
hence updating all copies.
Second, if an object is small (e.g., an integer),
sending the new value is probably just as expensive as sending an
invalidation message.
Although update algorithms are more complicated than invalidation algorithms,
we think it is useful to study them.
.PP
A related issue is how to synchronize simultaneous operation invocations
that try to modify the same object.
To serialize such invocations we appoint one replica of the object
as \fIprimary copy\fR, direct all ``write'' operations to this
primary copy, and then propagate them to the secondary copies.
An alternative approach would be to treat all copies as equals and use
a distributed locking protocol to provide mutual exclusion.
The primary copy method, however, allows one important optimization:
the primary copy can be migrated to the processor that most frequently
changes the object, making updates more efficient.
The statistical information described above
is also used for this purpose.
.NH 2
Dealing with inconsistency
.PP
The presence of multiple copies of the same data introduces the consistency
problem discussed in section 3.9.
As we do not want to clutter up the semantics of our model,
the implementation should adequately solve this problem.
In the following sections we will describe such implementations for different
kinds of distributed architectures.
.PP
To deal with the consistency problem, we will first need a deeper understanding
of the problem itself.
Suppose we implemented our model as follows.
To update an object \f5X\fR, its primary copy is locked and a message containing
the new value of \f5X\fR is sent to
all processors containing a secondary copy.
Such a processor updates its copy and then sends an acknowledgement back.
When all messages have been acknowledged, the primary copy is updated and
unlocked.
.PP
During the update protocol some processors have received the new value
of \f5X\fR while others still use its old value.
This is intuitively unappealing, but by itself is not the real problem.
Far more important, not all processors will observe modifications
to different objects in the same \fIorder\fR.
As a simple example, consider the program in Figure 1.
.DS
.ft 5

       X,Y: \fBshared\f5 object;  -- initially 0

       Process P\s-2\v'0.4m'1\v'-0.4m'\s+2:
            \fBfor\f5 i := 1 \fBto\fS \(if \fBdo\f5
               write(X, i);

       Process P\s-2\v'0.4m'2\v'-0.4m'\s+2:
            \fBrepeat\f5
               y := read(Y); x := read(X);
               \fBif\f5 x > y \fBthen\f5
                  write(Y, x);

       Process P\s-2\v'0.4m'3\v'-0.4m'\s+2:
            \fBrepeat\f5
               y := read(Y); x := read(X);
               \fBassert\f5 x \fS\(>=\f5 y;

       \fBFigure 1\fR: Example program.

.DE
P\s-2\v'0.4m'2\v'-0.4m'\s+2 tries to keep \f5Y\fR up-to-date with \f5X\fR;
P\s-2\v'0.4m'3\v'-0.4m'\s+2 verifies that \f5X\fR is greater than
or equal to \f5Y\fR.
Clearly, the latter condition should always be true.
.PP
Now suppose \f5X\fR and \f5Y\fR are replicated as shown in Figure 2.
P\s-2\v'0.4m'1\v'-0.4m'\s+2 contains the primary copy of \f5X\fR,
P\s-2\v'0.4m'2\v'-0.4m'\s+2 and P\s-2\v'0.4m'3\v'-0.4m'\s+2 have
secondary copies.
P\s-2\v'0.4m'2\v'-0.4m'\s+2 has the primary copy of \f5Y\fR,
P\s-2\v'0.4m'3\v'-0.4m'\s+2 has a secondary copy.
.DS
.PS 5.5i
P1: box "\fBX\fR" wid 0.25 ht 0.4
arrow "update(X)" below wid 0.025 ht 0.05
P2: box "\fBY\fR" "copy(X)" wid 0.25 ht 0.4
arrow "update(Y)" below wid 0.025 ht 0.05
P3: box "copy(X)" "copy(Y)" wid 0.25 ht 0.4
line from P1.n to P1.n + (0,0.15)
line "update(X)" below to P3.n + (0,0.15)
arrow to P3.n wid 0.025 ht 0.05
"P1" at P1.s - (0, 0.05)
"P2" at P2.s - (0, 0.05)
"P3" at P3.s - (0, 0.05)
.PE

.ce 1
\fBFigure\fR 2: Distribution of \f5X\fR and \f5Y\fR.
.DE
The following sequence of events may happen:
.IP 1.
\f5X\fR is incremented and becomes 1; P\s-2\v'0.4m'1\v'-0.4m'\s+2
sends an update message to P\s-2\v'0.4m'2\v'-0.4m'\s+2
and P\s-2\v'0.4m'3\v'-0.4m'\s+2.
.IP 2.
P\s-2\v'0.4m'2\v'-0.4m'\s+2 receives the update message,
assigns 1 to the variable \f5Y\fR and sends an update message
of \f5Y\fR to P\s-2\v'0.4m'3\v'-0.4m'\s+2.
.IP 3.
P\s-2\v'0.4m'3\v'-0.4m'\s+2 receives
the update message from P\s-2\v'0.4m'2\v'-0.4m'\s+2,
puts the value 1 in its copy of \f5Y\fR,
and is surprised to see that \f5Y\fR now is greater than \f5X\fR (which
still contains 0).
.IP 4
P\s-2\v'0.4m'3\v'-0.4m'\s+2 receives the update message
from P\s-2\v'0.4m'1\v'-0.4m'\s+2, and stores the value 1 in its copy
of \f5X\fR.
.LP
P\s-2\v'0.4m'3\v'-0.4m'\s+2 observes the changes to \f5X\fR
and \f5Y\fR in the wrong order.
The problem is caused by the arbitrary amount of time that messages
may take to travel from the source to the destination
and by the inability to transfer information simultaneously from one source to
many destinations.
Such an implementation basically provides message passing semantics
disguised in shared variable syntax.
.PP
The solution to the consistency problem depends very much on the architecture of
the underlying distributed system.
We will discuss solutions for three different classes of architectures:
systems supporting point-to-point messages,
reliable multicast, and unreliable multicast respectively.
.NH 2
Implementation with point-to-point messages
.PP
One model of a distributed system is a collection of processors that
communicate by sending point-to-point messages to each other.
A communication path between any two processors is provided, either by
the hardware or the software.
Messages are delivered reliably, in the same order they were sent.
.PP
To implement consistent updating of objects in such a system, we
use a \fI2-phase update\fR protocol.
During the first phase, the primary copy is updated and locked,
and an update message is sent to all processors containing a secondary copy.
Unlike in the incorrect protocol outlined above,
all secondary copies are locked (and remain locked) before being
updated.
A user process that tries to read a locked copy blocks until the
lock is released (during the second phase).
When all update messages have been acknowledged (i.e., all copies are
updated and locked), the second phase begins.
The primary copy is unlocked and a message is sent to all processors
containing a secondary copy, instructing them to unlock their copies.
.PP
To implement the protocol, we use one \fImanager process\fR for every
processor.
We assume the manager process and user processes on the same processor
can share part of their address space.
Objects (and replicas) are stored in this shared address space.
Write operations on shared objects are directed to the manager of the
processor containing the primary copy;
user processes can directly \fIread\fR local copies, although they
may temporarily block, as described above.
Each manager process contains multiple threads of control.
One thread communicates with remote managers;
the remaining threads are created dynamically to handle write-operations.
So multiple write-operations to different objects may be in progress
simultaneously; write-operations to the same object are serialized,
as discussed below.
.PP
Upon receiving a request from a (possibly remote) user process \f5W\fR to perform
an operation ``\f5write(X, Val)\fR,''
the manager of \f5X\fR creates a new thread of control to handle the
request:
.DS
.ft 5

     \fBreceive\fR \fIwrite-req\f5(X, Val) \fBfrom\f5 W \fR\(->\f5
        \fBfork\f5 handle_write(X, Val, W);

     \fBprocess\f5 handle_write(X, Val, W);
     \fBbegin\f5
        set write-lock on X;
        store Val in X;
        \fBlet\f5 S = set of processors having a copy of X;
        -- first phase
        \fBforall\f5 P \fS\(mo\f5 S \fBdo\f5
           \fBsend\f5 \fIupdate-and-lock\f5(X, Val) \fBto\f5 manager of P;
        \fBfor\f5 i := 1 \fBto\f5 |S| \fBdo\f5
           \fBreceive\f5 \fIack\f5;
        -- second phase
        \fBforall\f5 P \fS\(mo\f5 S \fBdo\f5
           \fBsend\f5 \fIunlock\f5(X) \fBto\f5 manager of P
        unlock X;
        \fBsend\f5 \fIack\f5 \fBto\f5 W;
     \fBend\f5;

.ft R
.DE
The process issuing the write request waits until it receives an
acknowledgement.
A manager responds as follows to messages from remote managers:
.DS
.ft 5
     \fBreceive\fR \fIupdate-and-lock\f5(X, Val) \fBfrom\f5 P \fR\(->\f5
        set write-lock on local copy of X;
        store Val in local copy of X;
        \fBsend\f5 \fIack\f5 \fBto\f5 P;

     \fBreceive\f5 \fIunlock\f5(X)  \fR\(->\f5
        unlock local copy of X
.ft R
.DE
.PP
The 2-phase update protocol guarantees that no process uses the new value of
an object while other processes are still using the old value.
The new value is not used until the second phase.
When the second phase begins, all copies contain the new value.
Simultaneous write-operations on the same object are serialized by locking
the primary copy.
The next write-operation may start before all secondary copies are unlocked.
New requests to \fIupdate-and-lock\fR a secondary copy are not serviced
until the \fIunlock\fR message generated by the previous write has
been handled (recall that point-to-point messages are received in the
order they were sent).
.PP
Deadlock is prevented by using multi-threaded managers.
Setting a write-lock on a primary copy may block one thread of a manager,
but not an entire manager process.
Locking a secondary copy always succeeds within a finite amount of time,
provided that all read-operations terminate properly.
.PP
If an object has N secondary copies it takes 3*N messages to update
all these copies.
Reading a remote object takes 2 messages (one request, one reply).
So, objects should only be replicated on processors that read the
object at least twice before it is changed again.
This can be determined (or estimated) dynamically, as discussed earlier.
The protocol can easily be optimized into a 1-phase update protocol if an
object has only one secondary copy.
.PP
For a small object (like an integer) that is frequently changed,
it may be more efficient to invalidate copies when the object is changed
and to replicate it on reference.
The first read-operation after a write fetches the object from a remote
processor and creates a local copy.
Subsequent reads use this local copy, until it is invalidated by a
modification to the object.
.NH 2
Implementation with reliable multicast messages
.PP
The 2-phase update protocol adequately solves the consistency problem,
although at the cost of some communication overhead.
The semantics provided by the implementation closely resemble those
of shared variables.
If a write-operation completes at time T\s-2\v'0.4m'w\v'-0.4m'\s+2,
read operations issued
at time T\s-2\v'0.4m'r\v'-0.4m'\s+2 > T\s-2\v'0.4m'w\v'-0.4m'\s+2 return
the new value.
.PP
This strict \fItemporal\fR ordering, however, is not a necessary requirement
for programming MIMD-like systems, in which processors are
executing \fIasynchronously\fR.
Processors in such systems are not synchronized by physical clocks.
Each sequential process in an asynchronous system performs a sequence
of computation steps:
.DS
          C\s-2\v'0.4m'0\v'-0.4m'\s+2 , C\s-2\v'0.4m'1\v'-0.4m'\s+2, ... , C\s-2\v'0.4m'i\v'-0.4m'\s+2 , ...
.DE
Within a single process, these steps are \fItotally\fR ordered;
C\s-2\v'0.4m'i\v'-0.4m'\s+2 happens after C\s-2\v'0.4m'j\v'-0.4m'\s+2 if
and only if i>j.
There is no total ordering between computation steps of different
processes, however, as discussed by Lamport\*(<.\*([.Lamport 1978\*(.]\*(>.
.]< 34
There is only a \fIpartial\fR ordering, induced by explicit
interactions (like sending a message or setting and 
testing shared variables).
.PP
This lack of total ordering allows an implementation of shared data-objects
to slightly relax the semantics without affecting the underlying
programming model.
Suppose Process P\s-2\v'0.4m'1\v'-0.4m'\s+2 executes ``\f5write(X, Val)\fR''
and Process P\s-2\v'0.4m'2\v'-0.4m'\s+2 executes ``\f5read(X)\fR.''
If there is no precedence relation between these two actions (i.e., neither
one of them comes before the other in the partial ordering), the
value read by P\s-2\v'0.4m'2\v'-0.4m'\s+2 may be either the old value of \f5X\fR
or the new value.
Even if, physically, the write is executed before the read, the read still
can return the old value.
The major difference with systems that allow read-operations to return
arbitrary old (stale) data is that our model supports a consistent
logical ordering of events, as defined implicitly in the program.
Programs like those in Figure 1 still execute as expected.
.PP
In a distributed system supporting only point-to-point messages, a consistent
logical ordering is difficult to obtain, because messages sent to
different destinations may arrive with arbitrary delays.
Some distributed systems (e.g., broadcast-bus systems) give hardware
support to send a single message to several destinations simultaneously.
More precisely, we are interested in systems supporting reliable, indivisible
multicasts, which have the following properties:
.IP "   \(bu"
A message is sent reliably from one source to a set of destinations.
.IP "   \(bu"
If two processors simultaneously multicast two messages
(say m\s-2\v'0.4m'1\v'-0.4m'\s+2 and m\s-2\v'0.4m'2\v'-0.4m'\s+2),
then either all destinations first receive m\s-2\v'0.4m'1\v'-0.4m'\s+2,
or they all receive m\s-2\v'0.4m'2\v'-0.4m'\s+2 first.
.LP
With this multicast facility we can implement a simple update protocol.
A ``\f5write(X, Val)\fR'' request is handled as follows by the manager
of \f5X\fR:
.DS
.ft 5

     \fBreceive\fR \fIwrite-req\f5(X, Val) \fBfrom\f5 W \fR\(->\f5
        set write-lock on X;
        store Val in X;
        \fBlet\f5 S = set of processors having a copy of X;
        \fBmulticast\f5 \fIupdate\f5(X, Val) \fBto\f5 manager of every P \fS\(mo\f5 S;
        unlock X;
        \fBsend\f5 \fIwrite-ack\f5(W) \fBto\f5 manager of W;
.ft R
.DE
After the \fIwrite-req\fR message has been handled,
the acknowledgement is sent to the manager of \f5W\fR (the  process that issued
the request).
The manager forwards it to \f5W\fR.
This guarantees that the local copy of \f5X\fR on \f5W\fR's processor
has been updated when \f5W\fR resumes execution.
The manager can be a single-threaded process in this implementation.
A manager handles all incoming \fIwrite-req\fR, \fIupdate\fR, and \fIwrite-ack\fR
messages in the order they were sent.
A manager containing a secondary copy responds as follows
to messages from remote managers:
.DS
.ft 5
     \fBreceive\fR \fIupdate\f5(X, Val) \fR\(->\f5
        set write-lock on local copy of X;
        store Val in local copy of X;
        unlock local copy of X
     \fBreceive\fR \fIwrite-ack\f5(W) \fR\(->\f5
        \fBsend\f5 \fIack\f5 \fBto\f5 W;
.ft R
.DE
If a processor P reads a new value of an object \f5X\fR, 
an \fIupdate\fR message for \f5X\fR containing this value has also
been sent to all other processors.
Other processors may not have handled this message yet,
but they certainly will do so before they handle any other messages.
Any changes to shared objects initiated by P will be observed by
other processors after accepting the new value of \f5X\fR.
Problems like those in Figure 2 do not occur.
.NH 2
Implementation with unreliable multicast messages
.PP
A cost-effective way to build a distributed system is to connect
a collection of micro-computers by a local area network.
Such systems are easy to build and easy to extend.
Many distributed operating systems have been designed with
this model in mind\*(<.\*([.Tanenbaum and Van Renesse 1985\*(.]\*(>.
.]< 0
.PP
Many LANs have hardware support for doing multicasts.
An Ethernet, for example, physically sends a packet to every computer
on the net, although usually only one of them reads the packet.
There is no difference in transmission time between a multicast and
a point-to-point message.
.PP
Unfortunately, multicasts in a LAN are not totally reliable.
Occasionally, a network packet gets lost.
Worse, one or more receivers may be out of buffer space
when the packet arrives, so a packet may be delivered at
only part of the destinations.
In practice, multicast is highly reliable, although less than 100%.
Unreliable multicast can be made reliable by adding extra software protocols.
Such protocols have a high communication overhead and may result
in multicasts that are not indivisible (as defined above).
Instead, we have designed an implementation of shared data-objects that
directly uses unreliable multicasts.
.PP
The basic algorithm is the same as that for reliable multicast.
When a shared variable \f5X\fR is updated, some (or all) processors
containing a secondary copy of \f5X\fR may fail
to receive the \f5update(X,Val)\fR message.
They will continue to use the old value of \f5X\fR.
This is not disastrous, as long as the partial (logical) ordering of events is
obeyed, as described above.
To guarantee a consistent ordering, processors that failed to receive the
\f5update(X,Val)\fR message must detect this failure before
handling other update messages that logically should arrive
after X's message.
.PP
This is realized as follows.
Update messages are multicast to \fIall\fR processors participating
in the program, not just to those processors containing a secondary copy.
Every processor counts the number of update messages it sends.
This number is called its \fImc-count\fR.
Every processor records the \fImc-count\fRs of all processors.
These numbers are stored in a vector, called the \fImc-vector\fR (initialized
to all zeroes).
For Processor P, \fImc-vector\f5[P]\fR always contains the correct value
of P's \fImc-count\fR; entries for other processors may be slightly
out of date.
.PP
Whenever a processor multicasts a message, it sends its own \fImc-vector\fR
as part of the message.
When a processor Q receives a multicast message from P,
it increments the entry for P in its own \fImc-vector\fR and then compares
this vector with the \fImc-vector\fR contained in the message.
If an entry R in its own vector is less than the corresponding entry
in the message, Q has missed a multicast message from Processor R.
Q updates the entry for R in its own vector.
As Q does not know which variable should have been updated by R's message,
Q temporarily invalidates the local copies of all variables
that have their primary copy on Processor R.
It sends (reliable) point-to-point messages to the manager
of R, asking for the current values of these variables.
The reply messages from R also contain \fImc-vector\fRs,
and undergo the same procedure as for multicast messages.
Until the copies are up-to-date again, local read operations of these copies
block.
.PP
It is quite possible that lost update messages will remain
undetected for a while.
Suppose Processor Q misses an update message for a variable \f5Y\fR from
Processor R and then receives an update message for \f5X\fR
from Processor P.
If P also missed R's message, the entry for R in the \fImc-vector\fR
of P and Q will agree (although they are both wrong) and the copy
of \f5X\fR will be updated.
However, as P contained the old value of \f5Y\fR when it updated \f5X\fR,
the new value of \f5X\fR does not depend on the new value of \f5Y\fR,
so it is consistent to update \f5X\fR.
.PP
If a process misses an update message for \f5X\fR,
this failure will eventually be detected while handling subsequent
messages.
The assumption is that there will be subsequent messages.
This assumption need not be true.
For example, a process may set a shared flag-variable and wait for
other processes to respond.
If these other processes missed the flag's update message,
the system may very well come to a grinding halt.
To prevent this, dummy update messages are generated periodically,
which do not update any copy, but just cause the \fImc-vector\fRs to be
checked.
.PP
The implementation outlined above has one considerable advantage:
it takes a single message to update any number of copies,
provided that the message is delivered at all destinations.
There is a severe penalty on losing messages.
As modern LANs are highly reliable, we expect this to happen infrequently.
The implementation also has several disadvantages.
Update messages are sent to every processor.
Each message contains extra information (the \fImc-vector\fR),
which must be checked by all receiving processors.
For a limited number of processors, say 32, we think this
overhead is acceptable.
The protocol can be integrated with the 2-phase update protocol
described in Section 4.3.
For example, objects that are replicated on only a few processors
can be handled with the 2-phase update protocol while objects replicated on
many processors are handled by the multicast protocol.
.NH 1
A LANGUAGE BASED ON SHARED DATA-OBJECTS
.PP
We have designed a simple, general purpose programming language
called Orca, based on shared data-objects.
Unlike most other parallel languages, Orca is intended for
applications programming rather than for systems programming.
Parallelism in Orca is based on dynamic creation of sequential processes.
Processes communicate indirectly, through shared data-objects.
An object can be shared by passing it as \fBshared\fR parameter to
a newly created process, as discussed in Section 4.
.NH 2
Object type definitions
.PP
An object is an instance of an object type,
which is essentially an abstract data type.
An object type definition consists of a \fIspecification\fR part and
an \fIimplementation\fR part.
The specification part defines one or more operations
on objects of the given type.
For example, the declaration of an
object type \fIIntObject\fR is shown in Figure 3.
.KF
.DS
.ft 5

       \fBobject specification\f5 IntObject;
            \fBoperation\fR \f5value(): integer;   -- return current value
            \fBoperation\fR \f5assign(val: integer);  -- assign new value
            \fBoperation\fR \f5min(val: integer);
               -- set value to minimum of current value and \fR``\f5val\fR''\f5
            \fBoperation\fR \f5max(val: integer);
               -- set value to maximum of current value and \fR``\f5val\fR''\f5
       \fBend\f5;
.sp
       \fBFigure 3\fR: specification of object type IntObject.
.DE
.KE
The implementation part contains the data of the object, code to initialize
the data of new instances (objects) of the type, and
code implementing the operations.
The code implementing an operation on an object can access
the object's internal data.
The implementation of object type \f5IntObject\fR is shown in Figure 4.
.KF
.DS
.ft 5

       \fBobject implementation\f5 IntObject;
            X: integer;  -- the data stored in an IntObject

            \fBoperation\fR \f5value(): integer;
            \fBbegin\f5
                 \fBreturn\f5 X;
            \fBend\f5

            \fBoperation\fR \f5assign(val: integer);
            \fBbegin\f5
                 X := val;
            \fBend\f5

            \fBoperation\fR \f5min(val: integer);
            \fBbegin\f5
                 \fBif\f5 val < X \fBthen\f5 X := val; \fBfi\f5;
            \fBend\f5

            \fBoperation\fR \f5max(val: integer);
            \fBbegin\f5
                 \fBif\f5 val > X \fBthen\f5 X := val; \fBfi\f5;
            \fBend\f5
       \fBbegin\f5
            X := 0;  -- initialization of internal data
       \fBend\f5;
.sp
       \fBFigure 4\fR: implementation of object type IntObject.

.DE
.KE
.LP
Objects can be created and operated on as follows:
.DS
.ft 5
 myint: IntObject; -- create an object of type IntObject
 ...
 myint$assign(83);  -- assign 83 to myint
 ...
 x := myint$value();  -- read value of myint
.ft R
.DE
.NH 2
Synchronization
.PP
Access to the shared data is automatically synchronized.
All operations defined in the specification part are indivisible.
If two processes simultaneously invoke \f5X$min(A)\fR and \f5X$min(B)\fR,
the new value of X is the minimum of A, B, and the old value of X.
On the other hand, a sequence of operations, such as
.DS
\fBif\f5 A < X$value() \fBthen\f5 X$assign(A); \fBfi\fR
.DE
is not indivisible.
This rule for defining which actions are indivisible and which are not
is both easy to understand and flexible:
single operations are indivisible, sequences of operations are not.
The set of operations can be tailored to the needs of a specific application
by defining single operations to be as complex as necessary.
.PP
For condition synchronization, \fIblocking\fR operations can be defined.
A blocking  operation consists of one or more \fIguarded commands\fR:
.DS

\fBoperation\f5 name(parameters);
\fBbegin\f5
    \fBguard\f5 expr\s-2\v'0.4m'1\v'-0.4m'\s+2 \fBdo\f5 statements\s-2\v'0.4m'1\v'-0.4m'\s+2 \fBod\f5;
    \fBguard\f5 expr\s-2\v'0.4m'2\v'-0.4m'\s+2 \fBdo\f5 statements\s-2\v'0.4m'2\v'-0.4m'\s+2 \fBod\f5;
    ...
    \fBguard\f5 expr\s-2\v'0.4m'n\v'-0.4m'\s+2 \fBdo\f5 statements\s-2\v'0.4m'n\v'-0.4m'\s+2 \fBod\f5;
\fBend\f5;
.ft R
.DE
The expressions must be side-effect free boolean expressions.
The operation initially blocks (suspends) until at least one of the guards
evaluates to ``true.''
Next, one true guard is selected nondeterministically,
and its sequence of statements is executed.
As an example, a type \f5IntQueue\fR with a blocking
operation \f5remove_head\fR can be implemented as outlined in Figure 5.
.KF
.DS
.ft 5

       \fBobject implementation\f5 IntQueue;
          Q: list of integer;  -- internal representation

          \fBoperation\f5 append(X: integer);
          \fBbegin\f5   -- append X to the queue
              add X to end of Q;
          \fBend\f5

          \fBoperation\f5 remove_head(): integer;
              R: integer;
          \fBbegin\f5  -- wait until queue not empty, then get head element
              \fBguard\f5 Q not empty \fBdo\f5  -- blocking operation
                    R := first element of Q;
                    remove R from Q;
                    \fBreturn\f5 R;
              \fBod\f5;
          \fBend\f5
       \fBbegin\f5
          Q := empty;  -- initialization of an IntQueue object
       \fBend;\f5
.sp
       \fBFigure 5\fR: Outline of implementation of object type IntQueue.
.DE
.KE
.PP
An invocation of \f5remove_head\fR
suspends until the queue is not empty.
If the queue is initially empty,
the process waits until another process appends
an element to the queue.
If the queue contains only one element and
several processes try to execute the statement simultaneously,
only one process will succeed in calling \f5remove_head\fR.
Other processes will suspend until more elements are
appended to the queue.
.NH 2
An example program
.PP
We have used the object types discussed above
to design a distributed Traveling Salesman Problem (TSP)*
algorithm, based on an earlier algorithm described in\*(<.\*([.Bal et al. 1987\*(.]\*(>.
.]< 35
.FS
* The Traveling Salesman Problem is the problem of finding the
shortest route for a salesman to visit each of a number of cities in his
territory exactly once.
.FE
The algorithm uses one process to generate partial routes for the salesman
(containing only part of the cities)
and any number of worker processes to further
expand (search) these partial solutions.
A worker systematically generates all full routes that start with
the given initial route, and checks if they are better (shorter)
than the current best solution.
Every time a worker finds a shorter full route, it updates a variable
shared by all workers, containing the length of the shortest route so far.
This variable is used to cut-off partial routes that are already longer
than the current shortest route, as these will never lead to an optimal
solution.
The basic algorithm for the worker processes is outlined in Figure 6.
(Figure 6 does not show how termination of the worker processes is dealt with;
this requires an extension).
Conceptually, the distributed algorithm is as simple
as the sequential TSP algorithm.
.KF
.DS

\fBprocess\f5 worker(minimum: \fBshared\f5 IntObject; q: \fBshared\f5 TaskQueue);
     r: route;
\fBbegin\f5
     \fBdo\f5  -- forever
        r := q$remove_head();
        tsp(r, minimum);
     \fBod
end\f5;

\fBprocedure\f5 tsp(r: route; minimum: \fBshared\f5 IntObject);
\fBbegin\f5  -- cut-off partial routes longer than current best one
     \fBif\f5 length(r) < minimum$value() \fBthen\f5
         \fBif\f5 \fR``\f5r\fR''\f5 is a full solution (covering all cities) \fBthen\f5
             -- r is a full route shorter than the current best route.
             minimum$min(length(r));  -- update current best solution.
         \fBelse\f5
             \fBfor\f5 all cities \fR``\f5c\fR''\f5 not on route \fR``\f5r\fR''\f5 \fBdo\f5
                 tsp(r||c, minimum);  -- search route r extended with c
             \fBod\f5
         \fBfi\f5
     \fBfi\f5
\fBend\f5;
 
.ft R
.sp 1
\fBFigure 6\fR: Algorithm for TSP worker processes.
.DE
.KE
.LP
The shared variable is implemented as an object of type IntObject (see
Figure 3).
As several workers may simultaneously try to decrease the value of
this variable, it is updated using the indivisible \f5min\fR operation.
The work-to-do is stored in an ordered task queue, the order being determined
by one of the many heuristics that exist for the Traveling Salesman Problem,
such as ``nearest-city-first.''
The task queue is similar to the \f5IntQueue\fR data type of Figure 5, except
that the elements are ``routes'' rather than integers.
.NH 1
CONCLUSIONS
.PP
We have classified several communication primitives for distributed
programming that support the shared variable paradigm without
the presence of physical shared memory.
Of the many programming languages for distributed systems that
are around today\*(<,\*([.Bal et al. 1988\*(.]\*(>,
.]< 7
several recent ones present a computational model based on
sharing data.
More significant, novel programming styles are emerging.
Examples include distributed data structures and the replicated worker
model of Linda\*(<,\*([.Ahuja et al. 1986\*(.]\*(>,
.]< 24
and incomplete messages, difference streams, and the short-circuit
technique of concurrent logic programming languages\*(<.\*([.Shapiro 1986\*(.]\*(>.
.]< 29
These techniques achieve a much higher level of abstraction than
message passing languages, at the cost of some efficiency.
More research is still needed to achieve the same level of efficiency
for languages based on abstract shared data.
.NH 1
REFERENCES
.LP
.nr [W \w'10'
.in 0.3i
.]<
.ds [F Ahuja et al. 1986
.]-
.ds [T Linda and Friends
.ds [A \*([(A\*()]huja, S.
.as [A ", \*([(C\*()]arriero, N.
.as [A ", and \*([(G\*()]elernter, D.
.ds [J IEEE Computer
.ds [V 19
.ds [N 8
.ds [P 26-34
.nr [P 1
.ds [L Ahuja et al. 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m8 1986
.][ 1 journal-article
.ds [F Andrews and Schneider 1983
.]-
.ds [T Concepts and Notations for Concurrent Programming
.ds [A \*([(A\*()]ndrews, G. R.
.as [A " and \*([(S\*()]chneider, F. B.
.ds [J Computing Surveys
.ds [V 15
.ds [N 1
.ds [P 3-43
.nr [P 1
.ds [L Andrews and Schneider 1983@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m3 1983
.][ 1 journal-article
.ds [F Bal et al. 1988
.]-
.ds [T Programming Languages for Distributed Systems
.ds [A \*([(B\*()]al, H. E.
.as [A ", \*([(S\*()]teiner, J. G.
.as [A ", and \*([(T\*()]anenbaum, A. S.
.ds [R IR-147
.ds [I Vrije Universiteit
.ds [C Amsterdam, The Netherlands
.ds [L Bal et al. 1988@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m2 1988
.][ 4 tech-report
.ds [F Bal et al. 1987
.]-
.ds [T Implementing Distributed Algorithms Using Remote Procedure Calls
.ds [A \*([(B\*()]al, H. E.
.as [A ", \*([(V\*()]an Renesse, R.
.as [A ", and \*([(T\*()]anenbaum, A. S.
.ds [J Proc. AFIPS Nat. Computer Conf.
.ds [I AFIPS Press
.ds [C Chicago, Ill.
.ds [P 499-506
.nr [P 1
.ds [L Bal et al. 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m6 1987
.][ 1 journal-article
.ds [F Berglund 1986
.]-
.ds [T An Introduction to the V-system
.ds [A \*([(B\*()]erglund, E. J.
.ds [J IEEE Micro
.ds [V 6
.ds [N 4
.ds [P 35-52
.nr [P 1
.ds [L Berglund 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m8 1986
.][ 1 journal-article
.ds [F Bernstein and Goodman 1981
.]-
.ds [T Concurrency Control in Distributed Database Systems
.ds [A \*([(B\*()]ernstein, P. A.
.as [A " and \*([(G\*()]oodman, N.
.ds [J Computing Surveys
.ds [V 13
.ds [N 2
.ds [P 185-221
.nr [P 1
.ds [L Bernstein and Goodman 1981@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m6 1981
.][ 1 journal-article
.ds [F Bisiani and Forin 1987
.]-
.ds [T Architectural Support for Multilanguage Parallel Programming on
.as [T " Heterogenous Systems
.ds [A \*([(B\*()]isiani, R.
.as [A " and \*([(F\*()]orin, A.
.ds [J Proc. 2nd Int. Conf. on Architectural Support for Programming Languages and Operating Systems
.ds [C Palo Alto
.ds [P 21-30
.nr [P 1
.ds [L Bisiani and Forin 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(ma 1987
.][ 1 journal-article
.ds [F Black et al. 1987
.]-
.ds [T Distribution and Abstract Types in Emerald
.ds [A \*([(B\*()]lack, A.
.as [A ", \*([(H\*()]utchinson, N.
.as [A ", \*([(J\*()]ul, E.
.as [A ", \*([(L\*()]evy, H.
.as [A ", and \*([(C\*()]arter, L.
.ds [J IEEE Trans. Softw. Eng.
.ds [V SE-13
.ds [N 1
.ds [P 65-76
.nr [P 1
.ds [L Black et al. 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m1 1987
.][ 1 journal-article
.ds [F Carriero 1987
.]-
.ds [T The Implementation of Tuple Space Machines
.ds [A \*([(C\*()]arriero, N.
.ds [I Yale University
.ds [C New Haven, CT
.ds [R RR-567 (Ph.D. dissertation)
.ds [L Carriero 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(mc 1987
.][ 4 tech-report
.ds [F Carriero and Gelernter 1986
.]-
.ds [T The S/Net's Linda Kernel
.ds [A \*([(C\*()]arriero, N.
.as [A " and \*([(G\*()]elernter, D.
.ds [J ACM Trans. Comp. Syst.
.ds [V 4
.ds [N 2
.ds [P 110-129
.nr [P 1
.ds [L Carriero and Gelernter 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m5 1986
.][ 1 journal-article
.ds [F Cheriton 1985
.]-
.ds [T Preliminary Thoughts on Problem-oriented Shared Memory:
.as [T " A Decentralized Approach to Distributed Systems
.ds [A \*([(C\*()]heriton, D. R.
.ds [J Operating Systems Reviews
.ds [V 19
.ds [N 4
.ds [P 26-33
.nr [P 1
.ds [L Cheriton 1985@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(ma 1985
.][ 1 journal-article
.ds [F Clark and Gregory 1986
.]-
.ds [T PARLOG: Parallel Programming in Logic
.ds [A \*([(C\*()]lark, K. L.
.as [A " and \*([(G\*()]regory, S.
.ds [J ACM Trans. Program. Lang. Syst.
.ds [V 8
.ds [N 1
.ds [P 1-49
.nr [P 1
.ds [L Clark and Gregory 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m1 1986
.][ 1 journal-article
.ds [F Forman 1986
.]-
.ds [T On the Design of Large Distributed Systems
.ds [A \*([(F\*()]orman, I. R.
.ds [J Proc. IEEE CS 1986 Int. Conf. on Computer Languages
.ds [C Miami, Florida
.ds [P 84-95
.nr [P 1
.ds [L Forman 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(ma 1986
.][ 1 journal-article
.ds [F Gehani 1984
.]-
.ds [T Broadcasting Sequential Processes (BSP)
.ds [A \*([(G\*()]ehani, N. H.
.ds [J IEEE Trans. Softw. Eng.
.ds [V SE-10
.ds [N 4
.ds [P 343-351
.nr [P 1
.ds [L Gehani 1984@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m7 1984
.][ 1 journal-article
.ds [F Gelernter 1985
.]-
.ds [T Generative Communication in Linda
.ds [A \*([(G\*()]elernter, D.
.ds [J ACM Trans. Program. Lang. Syst.
.ds [V 7
.ds [N 1
.ds [P 80-112
.nr [P 1
.ds [L Gelernter 1985@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m1 1985
.][ 1 journal-article
.ds [F Gregory 1987
.]-
.ds [T Parallel Logic Programming in PARLOG
.ds [A \*([(G\*()]regory, S.
.ds [I Addison-Wesley
.ds [C Wokingham, England
.ds [L Gregory 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D 1987
.][ 2 book
.ds [F Hoare 1978
.]-
.ds [T Communicating Sequential Processes
.ds [A \*([(H\*()]oare, C. A. R.
.ds [J Commun. ACM
.ds [V 21
.ds [N 8
.ds [P 666-677
.nr [P 1
.ds [L Hoare 1978@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m8 1978
.][ 1 journal-article
.ds [F Lamport 1978
.]-
.ds [T Time, Clocks, and the Ordering of Events in a Distributed System
.ds [A \*([(L\*()]amport, L.
.ds [J Commun. ACM
.ds [V 21
.ds [N 7
.ds [P 558-565
.nr [P 1
.ds [L Lamport 1978@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m7 1978
.][ 1 journal-article
.ds [F Li 1986
.]-
.ds [T Shared Virtual Memory on Loosely Coupled Multiprocessors
.ds [A \*([(L\*()]i, K.
.ds [I Yale University
.ds [C New Haven, CT
.ds [R RR-492 (Ph.D. dissertation)
.ds [L Li 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m9 1986
.][ 4 tech-report
.ds [F Liskov et al. 1977
.]-
.ds [T Abstraction Mechanisms in CLU
.ds [A \*([(L\*()]iskov, B.
.as [A ", \*([(S\*()]nyder, A.
.as [A ", \*([(A\*()]tkinson, R.
.as [A ", and \*([(S\*()]chaffert, C.
.ds [J Commun. ACM
.ds [V 20
.ds [N 8
.ds [P 564-576
.nr [P 1
.ds [L Liskov et al. 1977@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m8 1977
.][ 1 journal-article
.ds [F Lucco 1987
.]-
.ds [T Parallel Programming in a Virtual Object Space
.ds [A \*([(L\*()]ucco, S. E.
.ds [J SIGPLAN Notices (Proc. Object-Oriented Programming Systems, Languages and Applications 1987)
.ds [C Orlando, Florida
.ds [V 22
.ds [N 12
.ds [P 26-34
.nr [P 1
.ds [L Lucco 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(mc 1987
.][ 1 journal-article
.ds [F Mullender and Tanenbaum 1986
.]-
.ds [T Design of a Capability-Based Distributed Operating System
.ds [A \*([(M\*()]ullender, S. J.
.as [A " and \*([(T\*()]anenbaum, A. S.
.ds [J Computer J.
.ds [V 29
.ds [N 4
.ds [P 289-299
.nr [P 1
.ds [L Mullender and Tanenbaum 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m8 1986
.][ 1 journal-article
.ds [F Shapiro 1986
.]-
.ds [T Concurrent Prolog: A Progress Report
.ds [A \*([(S\*()]hapiro, E.
.ds [J IEEE Computer
.ds [V 19
.ds [N 8
.ds [P 44-58
.nr [P 1
.ds [L Shapiro 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m8 1986
.][ 1 journal-article
.ds [F Shapiro 1987
.]-
.ds [T Concurrent Prolog: Collected Papers
.ds [A \*([(S\*()]hapiro, E.
.ds [I M.I.T. Press
.ds [C Cambridge, Mass.
.ds [L Shapiro 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D 1987
.][ 2 book
.ds [F Shibayama and Yonezawa 1987
.]-
.ds [T Distributed Computing in ABCL/1
.ds [A \*([(S\*()]hibayama, E.
.as [A " and \*([(Y\*()]onezawa, A.
.ds [B Object-Oriented Concurrent Programming
.ds [E A. Yonezawa and M. Tokoro
.ds [I M.I.T. Press
.ds [C Cambridge, Mass.
.ds [P 91-128
.nr [P 1
.ds [L Shibayama and Yonezawa 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D 1987
.][ 3 article-in-book
.ds [F Spector 1982
.]-
.ds [T Performing Remote Operations Efficiently on a Local Computer Network
.ds [A \*([(S\*()]pector, A. Z.
.ds [J Commun. ACM
.ds [V 25
.ds [N 4
.ds [P 246-258
.nr [P 1
.ds [L Spector 1982@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m4 1982
.][ 1 journal-article
.ds [F Stammers 1985
.]-
.ds [T Ada on Distributed Hardware
.ds [A \*([(S\*()]tammers, R. A.
.ds [B Concurrent Languages in Distributed Systems
.ds [E G.L. Reijns and E.L. Dagless
.ds [I Elsevier Science Publishers (North-Holland)
.ds [P 35-40
.nr [P 1
.ds [L Stammers 1985@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D 1985
.][ 3 article-in-book
.ds [F Strom and Yemini 1986
.]-
.ds [T Typestate: A Programming Language Concept for Enhancing Software Reliability
.ds [A \*([(S\*()]trom, R. E.
.as [A " and \*([(Y\*()]emini, S.
.ds [J IEEE Trans. Softw. Eng.
.ds [V SE-12
.ds [N 1
.ds [P 157-171
.nr [P 1
.ds [L Strom and Yemini 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m1 1986
.][ 1 journal-article
.ds [F Tanenbaum 1987
.]-
.ds [T Operating Systems: Design and Implementation
.ds [A \*([(T\*()]anenbaum, A. S.
.ds [I Prentice-Hall, Inc.
.ds [C Englewood Cliffs, NJ
.ds [L Tanenbaum 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D 1987
.][ 2 book
.ds [F Tanenbaum and Van Renesse 1985
.]-
.ds [T Distributed Operating Systems
.ds [A \*([(T\*()]anenbaum, A. S.
.as [A " and \*([(V\*()]an Renesse, R.
.ds [J Computing Surveys
.ds [V 17
.ds [N 4
.ds [P 419-470
.nr [P 1
.ds [L Tanenbaum and van Renesse 1985@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(mc 1985
.][ 1 journal-article
.ds [F Taylor et al. 1987
.]-
.ds [T A Parallel Implementation of Flat Concurrent Prolog
.ds [A \*([(T\*()]aylor, S.
.as [A ", \*([(S\*()]afra, S.
.as [A ", and \*([(S\*()]hapiro, E.
.ds [J Int. J. of Parallel Programming
.ds [V 15
.ds [N 3
.ds [P 245-275
.nr [P 1
.ds [L Taylor et al. 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D 1987
.][ 1 journal-article
.ds [F U.S.~Department~of~Defense 1983
.]-
.ds [T Reference Manual for the Ada Programming Language
.ds [A \*([(U\*()].S.~Department~of~Defense
.ds [R ANSI/MIL-STD-1815A
.ds [L U.S. Department of Defense 1983@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m1 1983
.][ 4 tech-report
.ds [F Upfal and Wigderson 1987
.]-
.ds [T How to Share Memory in a Distributed System
.ds [A \*([(U\*()]pfal, E.
.as [A " and \*([(W\*()]igderson, A.
.ds [J Journal of the ACM
.ds [V 34
.ds [N 1
.ds [P 116-127
.nr [P 1
.ds [L Upfal and Wigderson 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m1 1987
.][ 1 journal-article
.ds [F Wilkes and LeBlanc 1986
.]-
.ds [T Rationale for the Design of Aeolus: A Systems Programming Language for
.as [T " an Action/Object System
.ds [A \*([(W\*()]ilkes, C. T.
.as [A " and \*([(L\*()]eBlanc, R. J.
.ds [J Proc. IEEE CS 1986 Int. Conf. on Computer Languages
.ds [C Miami, Florida
.ds [P 107-122
.nr [P 1
.ds [L LeBlanc and Wilkes 1986@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(ma 1986
.][ 1 journal-article
.ds [F William Mao and Yeh 1980
.]-
.ds [T Communication Port: A Language Concept for Concurrent Programming
.ds [A \*([(W\*()]illiam Mao, T.
.as [A " and \*([(Y\*()]eh, R. T.
.ds [J IEEE Trans. Softw. Eng.
.ds [V SE-6
.ds [N 2
.ds [P 194-204
.nr [P 1
.ds [L Mao and Yeh 1980@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(m3 1980
.][ 1 journal-article
.ds [F Yokote and Tokoro 1987
.]-
.ds [T Experience and Evolution of ConcurrentSmalltalk
.ds [A \*([(Y\*()]okote, Y.
.as [A " and \*([(T\*()]okoro, M.
.ds [J SIGPLAN Notices (Proc. Object-Oriented Programming Systems, Languages and Applications 1987)
.ds [C Orlando, Florida
.ds [V 22
.ds [N 12
.ds [P 406-415
.nr [P 1
.ds [L Yokote and Tokoro 1987@@@
.nr [T 0
.nr [A 0
.nr [O 0
.ds [D \*(mc 1987
.][ 1 journal-article
.nr [W \w'10'
.]>
.nr [W \w'10'

