      subroutine matvec1(
     $     MY_A, MY_V, V, MY_X, X,
     $     ncols, M,N,nproc, t)
c
c     Compute matrix-vector product (A*v = x) using EASYPVM
c
c     Method#1:
c
c     Initially every processor has local matrix A(M,N), where
c     M = N/nproc  ( and mod(N,nproc) == 0 ). Furthermore each proc
c     has partial v of length M.
c     
c     Solution phase:
c
c     (1) gcolx() the v_local to form the v_global for each proc
c     (2) Perform A_local(1:M,1:N) * v_global(1:N) = x_local(1:M)
c     (3) gcolx() the x_local if necessary
c
      implicit none
      
      include 'fpvm3.h'
      include 'feasy.h'

      integer M,N,nproc
      double precision MY_A(M,N), MY_V(M), V(N), MY_X(M), X(N)
      integer ncols(nproc)
      double precision t(2)

      integer i,j,k, where
      double precision sum, vj, tdiff
      integer oldtype
D     integer me

      do k=1,nproc
         ncols(k) = M
      enddo

      oldtype = setdatatype(REAL8)

D     me = mynode()
D     print *,me,': gsync'
      call gsync()
      t(1) = dclock()

D     print *,me,': gcolx#1 for V',N,M
      call gcolx(MY_V,ncols,V)
c--      call gcol(MY_V,M,V,N)

      do i=1,M
         MY_X(i) = 0
      enddo

      do j=1,N
         vj = V(j)
         do i=1,M
            MY_X(i) = MY_X(i) + MY_A(i,j)*vj
         enddo
      enddo

D     print *,me,': gcolx#2 for X'
      call gcolx(MY_X,ncols,X)
c--      call gcol(MY_X,M,X,N)
      t(2) = dclock()

D     print *,me,': Done.'
      tdiff = t(2) - t(1)
D     print *,me,tdiff,t

      call gmin(tdiff,1,t(1),where)
      call gmax(tdiff,1,t(2),where)

      oldtype = setdatatype(oldtype)

      end
