c
c   This file contains a test program for the routines that generate
c   BC programs for global arrays.  It shows the use of alternate
c   scheduling strategies.  It implements a simple Jacobi relaxation 
c   for a Dirichlet problem on a square.  DO NOT USE THIS FOR SOLVING
c   DIRICHLET PROBLEMS.  Better methods are provided as part of the
c   parallel solvers (PSLES) package.
c
c   In addition, this program shows how to generate parallel output 
c   for simple domains, using XBPQContour.  In order to make this 
c   program portable to systems that don't support X11 graphics, this 
c   is done  through a C call that can act as a nop on systems where the 
c   graphics is not supported.
c
c     int mdim,          /* size of array in this dimension */
c     int is_parallel,   /* true if the array is parallelized in
c 			    this dimension */
c     int start,         /* starting index for local piece */
c     int end,           /* ending index for local piece */
c     int loc            /* location in this dimension of the 
c                           processor */
c     int ndim           /* number of processors in this dimension,
c 			    -1 if unspecified */
c     int sg             /* start and end ghost limits */
c     int eg             /* start and end ghost limits */
c
       integer function worker()
       integer mx, my, nd
c       parameter(mx = 50, my = 20, nd=2, NBYTES=8)
       parameter(mx = 4, my = 4, nd=2, NBYTES=8)
       include '../meshf.h'
       integer pimytid, pinumtids
       integer pgm, sz(0:9,0:nd-1), iper(2)
       integer nstep, nx, ny
       integer sx,sxgp,ex,exgp,sy,sygp,ey,eygp
       double precision a((mx+2)*(my+2)), b((mx+2)*(my+2))
       integer myid, nproc
c
c
c Define the domain as a 2-d mesh of global size mx x my, to be 
c subdivided in both dimensions
c
       sz(szmdim,0)       = mx
       sz(szisparallel,0) =  1
       sz(szndim,0)       = -1
       sz(szmdim,1)       = my
       sz(szisparallel,1) =  1
       sz(szndim,1)       = -1
c
c
c      Build the communications pattern by:
c 
c      1. Compute the size of the ghost-points from the computational 
c         stencil
c      2. Compute the local part of the array that this processor 
c         is responsible for
c      3. Build the communication pattern and "compile" it
c
c      Setup the ghost points from the stencil 
       call BCFindGhostFromStencil( nd, sz, 0, 0, boxstencil )
c
       myid  = pimytid()
       nproc = pinumtids()
       call BCGlobalToLocalArray( nd, sz, nproc, myid )
       iper(1) = 0
       iper(2) = 0
       call BCSetGhostWidths( nd, sz, iper )
c
c      To print the local array description, use
c      call BCPrintArrayPart( 0, nd, sz )
c
       pgm = BCBuildArrayPGM( nd, sz, nproc, myid, NBYTES )
c
c      Chose amoung several schedules (Nonblocking sends and recieves
c      are used by default except on systems that don't support them;
c      on these, ordered, blocking send/receives are used).
c      call BCUseSyncSend( pgm )
c       call BCUseOrderedSend( pgm )
       call BCArrayCompile( pgm, 0 )
c      To print the program, use
c       call BCPrint_pgms( pgm, 0 )
c
c      Compute the parameters of our part of the domain
c
       sx   = sz(szstart,0) + 1
       ex   = sz(szend,0) + 1
       sxgp = sz(szsg,0)
       exgp = sz(szeg,0)
       nx   = ex - sx + 1 + sxgp + exgp
       sy   = sz(szstart,1) + 1
       ey   = sz(szend,1) + 1
       sygp = sz(szsg,1)
       eygp = sz(szeg,1)
       ny   = ey - sy + 1 + sygp + eygp
c      initialize a and b (this insures that the boundary values are set
c      for both)
       call InitDomain( a, mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp)
       call InitDomain( b, mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp)
c
       nstep = 1
       do i = 0, nstep-1, 2
          call BCexec( pgm, a, a )
          call dispmesh( a, sx, sxgp, ex, exgp, sy, sygp, ey, eygp, 
     *                   mx, my, 16 )
          call Relax( a, b, mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp)
          call BCexec( pgm, b, b )
          call dispmesh( b, sx, sxgp, ex, exgp, sy, sygp, ey, eygp, 
     *                   mx, my, 16 )
          call Relax( b, a, mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp)
       enddo

       call BCfree( pgm )

       worker = 0
       return
       end
c
       subroutine InitDomain( a, mx,my,
     +                           sx,sxgp,ex,exgp,sy,sygp,ey,eygp)
       integer      mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp
       double precision a(sx-sxgp:ex+exgp,sy-sygp:ey+eygp)
c
c     Zero everything
c
       do j = sy-sygp,ey+eygp
          do i = sx-sxgp,ex+exgp
             a(i,j) = 0.0d0
          enddo
       enddo
c
c ---  initialize the boundaries
c
c      Bottom (sy = 1)
       if (sy .eq. 1) then 
          do i=sx,ex
             a(i,sy) = 1.0
          enddo
       endif
c      Top    (ey = my)
       if (ey .eq. my) then
          do i=sx,ex
             a(i,ey) = 1.0
          enddo
       endif
c      Left   (sx = 1)
       if (sx .eq. 1) then
          do j=sy,ey
             a(sx,j) = 1.0
          enddo
       endif
c      Right  (ex = mx)
       if (ex .eq. mx) then
          do j=sy,ey
             a(ex,j) = 1.0
          enddo
       endif
       return
       end
c
       subroutine Relax( a, b, 
     *                   mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp )
       integer      mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp
       double precision a(sx-sxgp:ex+exgp,sy-sygp:ey+eygp)
       double precision b(sx-sxgp:ex+exgp,sy-sygp:ey+eygp)
       integer i, j, ssx, ssy, eex, eey
c     
c      Code to relax the solution
c
c      Make sure that we don't relax the boundaries...
       ssx = sx
       ssy = sy
       eex = ex
       eey = ey
       if (sx .eq. 1)  ssx = 2
       if (sy .eq. 1)  ssy = 2
       if (ex .eq. mx) eex = mx - 1
       if (ey .eq. my) eey = my - 1
c
       do 10 j=ssy,eey
          do 10 i=ssx,eex
             b(i,j) = 
     +         (a(i+1,j) + a(i-1,j) + a(i,j+1) + a(i,j-1)) * 0.25
 10    continue
       return
       end
