      PROGRAM parallel_matrix_transpose
      implicit none
      include 'fpvm3.h'
      integer m, i, j, NPROC, ME, n 
      parameter (NPROC=6)          ! Assumed to be even
      parameter (n=12, m=n/NPROC)  ! n divisible by NPROC
      real*8 a(m,n), buffer(m,m)
      integer taskid(0:NPROC-1), rc
      integer topside(NPROC/2), botside(NPROC/2)
      
      CALL setup_spmd(NPROC,taskid,ME)
      
      if (ME .eq. 0) then                
         print 1003,ME,': Square matrix dimension: ',n
         print 1003,ME,': NPROC=',NPROC
      endif
      
      CALL init_matrix(a,m,n,ME)
      print 1002,ME,' A(',ME*m+1,(ME+1)*m,1,n,')='
      do i=1,m
         print 1000,ME,(int(a(i,j)),j=1,n)
      enddo
      
      CALL transpose(a,m,n,ME,NPROC,taskid,buffer,
     $     topside,botside)
      print 1002,ME,' A-transpose(',ME*m+1,(ME+1)*m,1,n,')='
      do i=1,m
         print 1000,ME,(int(a(i,j)),j=1,n)
      enddo

      CALL PVMFexit(rc)         ! Leave PVM

 1000 format(1x,i3,':',16i4)
 1002 format(1x,i3,':',a,i2,':',i2,' , ',i2,':',i2,a)
 1003 format(1x,i3,a,i5)
      END

      SUBROUTINE init_matrix(a,m,n,ME)
      implicit none         
      integer m, i, j, k, n, ME, mME
      real*8 a(m,n)
      mME = m*ME
      k = (mME**2 - mME)/2 + mME
      do i=1,m
         do j=1,i+mME
            k = k + 1
            a(i,j) = k
         enddo
         do j=i+mME+1,n
            a(i,j) = 0
         enddo
      enddo
      END 

      SUBROUTINE setup_spmd(NPROC,taskid,ME)
      implicit none
      include 'fpvm3.h'
      integer PvmShowTids ! Omitted from PVM 3.3.6 fpvm3.h-file
      parameter (PvmShowTids = 14)
      integer ON, OFF
      parameter (ON = 1, OFF = 0)
      integer NPROC, taskid(0:NPROC-1), ME
      integer parent, mytid, rc, msglabel, i
      character*64 a_out

      msglabel = 0
      CALL PVMFmytid(mytid)
      CALL PVMFparent(parent)
      if (parent .lt. 0) then
         taskid(0) = mytid
         CALL getarg(0, a_out)
         CALL PVMFcatchout(ON,rc)
         CALL PVMFspawn(a_out,
     $        PvmTaskDefault,'*',
     $        NPROC-1,taskid(1),
     $        rc)
         CALL PVMFinitsend(PvmDataDefault,rc)
         CALL PVMFpack(INTEGER4,taskid(0),NPROC,1,rc)
         CALL PVMFmcast(NPROC-1,taskid(1),msglabel,rc)
      else
         CALL PVMFrecv(parent,msglabel,rc)
         CALL PVMFunpack(INTEGER4,taskid(0),NPROC,1,rc)
      endif

      CALL PVMFsetopt(PvmShowTids,OFF,rc)
      
      do i=0,NPROC-1
         if (taskid(i) .eq. mytid) then
            ME = i
            return
         endif
      enddo
      ME = -1
      END

      SUBROUTINE transpose(a,m,n,ME,NPROC,taskid,buffer,
     $     topside,botside)
      implicit none                   
      include 'fpvm3.h'
      integer m, i, j, n, ME, NPROC, NP2, k, rc, msglabel
      integer taskid(0:NPROC-1)
      real*8 a(m,n), tmp, buffer(m,m)   
      integer topside(NPROC/2), botside(NPROC/2)
      integer opponent, istart

      NP2 = NPROC/2
      do i=1,NP2
         topside(i) = i-1
         botside(i) = NPROC-i
      enddo
      do k=1,NPROC-1
         msglabel = 1000 + k
         CALL find(opponent,topside,botside,NP2,ME)
         istart = m*opponent
         if (ME .gt. opponent) then
            CALL PVMFinitsend(PvmDataDefault,rc)
            do j=1,m
               CALL PVMFpack(REAL8,a(1,istart+j),m,1,rc)
            enddo
            CALL PVMFsend(taskid(opponent),msglabel,rc)
            CALL PVMFrecv(taskid(opponent),msglabel,rc)
            do j=1,m
               CALL PVMFunpack(REAL8,a(1,istart+j),m,1,rc)
            enddo
         else
            do j=1,m
               do i=1,m
                  buffer(i,j) = a(i,istart+j)
               enddo     
            enddo
            CALL PVMFrecv(taskid(opponent),msglabel,rc)            
            do j=1,m
               CALL PVMFunpack(REAL8,a(1,istart+j),m,1,rc)
            enddo
            CALL PVMFinitsend(PvmDataDefault,rc)
            do j=1,m
               CALL PVMFpack(REAL8,buffer(1,j),m,1,rc)
            enddo
            CALL PVMFsend(taskid(opponent),msglabel,rc)            
         endif
         do j=1,m
            do i=j+1,m
               tmp = a(j,istart+i)
               a(j,istart+i) = a(i,istart+j)
               a(i,istart+j) = tmp
            enddo
         enddo
         if (k .lt. NPROC-1) CALL circulate(topside,botside,NP2)
      enddo
      istart = m*ME
      do j=1,m
         do i=j+1,m
            tmp = a(j,istart+i)
            a(j,istart+i) = a(i,istart+j)
            a(i,istart+j) = tmp
         enddo
      enddo
      END
      
      SUBROUTINE find(opponent,topside,botside,NP2,ME)
      implicit none
      integer opponent, NP2, ME, i
      integer topside(NP2), botside(NP2)
      do i=1,NP2
         if (topside(i) .eq. ME) then
            opponent = botside(i)
            return
         else if (botside(i). eq. ME) then
            opponent = topside(i)
            return
         endif
      enddo
      opponent = -1
      END          
      
      SUBROUTINE circulate(topside,botside,NP2)
      implicit none
      integer NP2, i, bot_last
      integer topside(NP2), botside(NP2)
      bot_last = topside(NP2)
      do i=NP2,2,-1
         topside(i) = topside(i-1)
      enddo
      topside(1) = botside(2)
      do i=2,NP2-1
         botside(i) = botside(i+1)
      enddo
      botside(NP2) = bot_last
      END
