*dk pcom1 subroutine pcom1(coml,comr,crnr,nj,kr,kt,ighost) c... This routine broadcasts and receives boundary information c... to and from the neighboring subdomains. include 'size.h' include 'pcom.h' real coml((kt+1)*nd*nj*(kr+1),2) real comr((kt+1)*nd*nj*(kr+1),2) real crnr(nd*nj*(kr + 1)) integer ierror, sender common /nbr1/ ileft(4,0:6), iright(4,0:6) c... Increment the message number. msgnum = msgnum + 1 c... Compute message length along subdomain edges. mm = (kt + 1)*nd*nj*(kr + 1) c... Broadcast row/column data in coml to two subdomains to the left c... and receive similar information from two subdomains to the right. do ib=1,2 CALL MPI_IRECV( COMR( 1,IB ), MM, MPI_DATATYPE, & IRIGHT(ib,lvproc), MSGNUM, MPI_COMM_WORLD, & RECV_REQUESTS( ib ), IERROR ) enddo do ib = 1,2 CALL MPI_SEND( COML( 1,IB ), MM, MPI_DATATYPE, & ILEFT( IB,LVPROC ), MSGNUM, & MPI_COMM_WORLD, IERROR ) enddo c... Increment the message number. msgnum = msgnum + 1 c... Compute message length for subdomain corner. mm = nd*nj*(kr + 1) c... Receive right corner data to appropriate right subdomain. CALL MPI_IRECV( COML(1,1), MM, MPI_DATATYPE, MPI_ANY_SOURCE, & MSGNUM, MPI_COMM_WORLD, RECV_REQUESTS( 3 ), & IERROR ) c... Send left corner data to appropriate left subdomain. iproc = ileft(3,lvproc) if(ighost .eq. 1) iproc = ileft(4,lvproc) CALL MPI_SEND( CRNR, MM, MPI_DATATYPE, IPROC, & MSGNUM, MPI_COMM_WORLD, IERROR ) c... Complete the nonblocking corner data receive before processing. CALL MPI_WAIT( RECV_REQUESTS(3), RECV_STATUS(:,3), IERROR ) c... Copy conrner data into correct final location. CALL SCOPY( MM, COML(1,1), 1, CRNR, 1 ) c... Complete the nonblocking upper/lower neighbor receives before processing. CALL MPI_WAITALL( 2, RECV_REQUESTS, RECV_STATUS(:,1:2), & IERROR ) end *dk pcom1to4 subroutine pcom1to4(a4,a1,lva1,kr,nj) c... This routine copies array a1 that contains but one point in the c... lateral grid per active processor to array a4 that contains four c... points in the lateral grid per active processor (but on only c... one-fourth the number of active processors). The parameter lva1 c... specifies the grid level of the array a1. include 'size.h' include 'pcom.h' real a1(0:1,2,nd*nj*(kr+1)), a4(0:2,3,nd*nj*(kr+1)) integer ierror, sender common /bufr2/ a(0:1,2,189*nd*9) c... Compute the message length. mm = nd*nj*(kr+1) if(nj .eq. 189) mm = ndo*nj*(kr+1) c... Increment the message number. msgnum = msgnum + 1 c... Compute the number of processors npedg1 c... along a diamond edge for array a1. npedg1 = 2**lva1 c... Compute the destination processor number idest. idest = (mynum/(2*npedg1))*npedg1/2 + mod(mynum, npedg1)/2 c... Send array a1 to the destination processor. CALL MPI_ISEND( A1, 4*MM, MPI_DATATYPE, IDEST, MSGNUM, & MPI_COMM_WORLD, SEND_REQUESTS( 1 ), IERROR ) call nulvec(a4, 9*mm) if(mynum .lt. npedg1**2*10/(4*nd)) then c... Processor mynum is an active processor for array a4. Receive c... contributions from four other processors and load array a4. do iproc=1,4 CALL MPI_RECV( A, 4*MM, MPI_DATATYPE, MPI_ANY_SOURCE, & MSGNUM, MPI_COMM_WORLD, RECV_STATUS(:,1), & IERROR ) isrc_proc = RECV_STATUS( MPI_SOURCE, 1 ) c... Compute the (i1,i2) coordinates for this contribution. i1 = mod(isrc_proc, 2) + 1 i2 = mod(isrc_proc/npedg1, 2) + 1 c... Load array a4. if(nj .ne. 189) then do ii=1,mm a4(i1-1,i2 ,ii) = a(0,1,ii) a4(i1 ,i2 ,ii) = a(1,1,ii) a4(i1-1,i2+1,ii) = a(0,2,ii) a4(i1 ,i2+1,ii) = a(1,2,ii) end do else do ii=1,mm a4(i1-1,i2 ,ii) = a4(i1-1,i2 ,ii) + a(0,1,ii) a4(i1 ,i2 ,ii) = a4(i1 ,i2 ,ii) + a(1,1,ii) a4(i1-1,i2+1,ii) = a4(i1-1,i2+1,ii) + a(0,2,ii) a4(i1 ,i2+1,ii) = a4(i1 ,i2+1,ii) + a(1,2,ii) end do endif end do endif c... Make sure the non-blocking send has completed before we leave the routine. c... SLS 10/15/00 call MPI_WAIT( SEND_REQUESTS( 1 ), SEND_STATUS(:,1), IERROR ) end *dk pcom2 subroutine pcom2(coml,comr,crnr,nj,kr,kt,ighost) c... This routine broadcasts and receives boundary information c... to and from the neighboring subdomains. include 'size.h' include 'pcom.h' real coml((kt+1)*nd*nj*(kr+1),2) real comr((kt+1)*nd*nj*(kr+1),2) real crnr(nd*nj*(kr + 1)) integer ierror, sender common /nbr1/ ileft(4,0:6), iright(4,0:6) c... Increment the message number. msgnum = msgnum + 1 c... Compute message length along subdomain edges. mm = (kt + 1)*nd*nj*(kr + 1) c... Broadcast row/column data in comr to two subdomains to the right c... and receive similar information from two subdomains to the left. do ib=1,2 CALL MPI_IRECV( COML( 1,IB ), MM, MPI_DATATYPE, & ILEFT(ib,lvproc), MSGNUM, MPI_COMM_WORLD, & RECV_REQUESTS( ib ), IERROR ) enddo do ib = 1,2 CALL MPI_SEND( COMR( 1,IB ), MM, MPI_DATATYPE, & IRIGHT( IB,LVPROC ), MSGNUM, & MPI_COMM_WORLD, IERROR ) enddo c... Increment the message number. msgnum = msgnum + 1 c... Compute message length for subdomain corner. mm = nd*nj*(kr + 1) c... Receive left corner data to appropriate left subdomain. CALL MPI_IRECV( COMR(1,1), MM, MPI_DATATYPE, MPI_ANY_SOURCE, & MSGNUM, MPI_COMM_WORLD, RECV_REQUESTS( 3 ), & IERROR ) c... Send right corner data to appropriate right subdomain. iproc = iright(3,lvproc) if(ighost .eq. 1) iproc = iright(4,lvproc) CALL MPI_SEND( CRNR, MM, MPI_DATATYPE, IPROC, & MSGNUM, MPI_COMM_WORLD, IERROR ) c... Complete the nonblocking corner data receive before processing. CALL MPI_WAIT( RECV_REQUESTS(3), RECV_STATUS(:,3), IERROR ) c... Copy into correct location. CALL SCOPY( MM, COMR(1,1), 1, CRNR, 1 ) c... Complete the nonblocking upper/lower neighbor data receives before processing. CALL MPI_WAITALL( 2, RECV_REQUESTS, RECV_STATUS(:,1:2), & IERROR ) end *dk pcom4to1 subroutine pcom4to1(a1,a4,lva1,kr,nj) c... This routine copies array a4 that contains four points in the c... lateral grid per active processor to array a1 that contains but c... one point in the lateral grid per active processor (but on four c... times the number of active processors). The parameter lva1 c... specifies the grid level of the array a1. include 'size.h' include 'pcom.h' real a1(0:1,2,nd*nj*(kr+1)), a4(0:2,3,nd*nj*(kr+1)) real atemp(0:1,2,nd*nj*(kr+1)) integer ierror, sender c... Compute the message length. mm = nd*nj*(kr+1) c... Increment the message number. msgnum = msgnum + 1 c... Receive array a1. CALL MPI_IRECV( A1, 4*MM, MPI_DATATYPE, MPI_ANY_SOURCE, & MSGNUM, MPI_COMM_WORLD, RECV_REQUESTS( 1 ), & IERROR ) c... Compute the number of processors npedg1 c... along a diamond edge for array a1. npedg1 = 2**lva1 if(mynum .lt. npedg1**2*10/(4*nd)) then c... Processor mynum is an active processor for array a4. Send c... contributions from array a4 to four other processors. do i2=1,2 do i1=1,2 c... Copy the appropriate elements of a4 into array atemp. do ii=1,mm atemp(0,1,ii) = a4(i1-1,i2 ,ii) atemp(1,1,ii) = a4(i1 ,i2 ,ii) atemp(0,2,ii) = a4(i1-1,i2+1,ii) atemp(1,2,ii) = a4(i1 ,i2+1,ii) end do c... Compute the destination processor number idest. idest = (2*mynum/npedg1)*2*npedg1 + (i2 - 1)*npedg1 & + mod(mynum, npedg1/2)*2 + i1 - 1 c... Send array atemp to the destination processor. CALL MPI_SEND( ATEMP, 4*MM, MPI_DATATYPE, IDEST, & MSGNUM, MPI_COMM_WORLD, IERROR ) end do end do end if c... Complete the nonblocking receive before processing. CALL MPI_WAIT( RECV_REQUESTS( 1 ), RECV_STATUS(:,1), IERROR ) end *dk pcomfromzero subroutine pcomfromzero(a,ap,nc) c... At the lowest (lv = 0) grid level, this routine takes a global c... array a on processor zero and distributes the appropriate portions c... to all the active processors as array ap. include 'size.h' include 'pcom.h' real a(4,10,nc), ap(4,nd,nc) integer ierror, sender c... Increment the message number. msgnum = msgnum + 1 c... Compute the message length for the subarray. mm = 4*nd*nc if(mynum .eq. 0) then c... Send the appropriate portion of the global array to c... the other active processors. do iproc=1,10/nd-1 do id=1,nd jd = id + iproc*nd do ii=1,4 do j=1,nc ap(ii,id,j) = a(ii,jd,j) end do end do end do CALL MPI_SEND( AP, MM, MPI_DATATYPE, IPROC, & MSGNUM, MPI_COMM_WORLD, IERROR ) end do c... Now load the correct portion of the global arrary c... into the local array. do id=1,nd do ii=1,4 do j=1,nc ap(ii,id,j) = a(ii,id,j) end do end do end do else c... For all processes other than zero, receive the local c... array from process zero. CALL MPI_RECV( AP, MM, MPI_DATATYPE, MPI_ANY_SOURCE, & MSGNUM, MPI_COMM_WORLD, RECV_STATUS(:,1), & IERROR ) end if end *dk pcomptcl subroutine pcomptcl(ibndr,w,nsnd,nrcv) c... This routine performs interprocess communication to move c... particles across subdomain boundaries. include 'size.h' include 'pcom.h' parameter (nq=(nt+1)**2*nd*5) real w(5,(nt+1)**2*5,4) common /prt1/ npart, p(nq), q(nq), idp(nq), kpl(nq), ap(nq) common /nbr1/ ileft(4,0:6), iright(4,0:6) integer ibndr((nt+1)**2*5,4), nsnd(4), nrcv(4) integer ierror, sender c... Increment the message number. msgnum = msgnum + 1 do ib=1,4 c... Send particle data to the four neighboring subdomains. if(ib.eq.1) iproc = iright(1,lvproc) if(ib.eq.2) iproc = ileft(2,lvproc) if(ib.eq.3) iproc = ileft(1,lvproc) if(ib.eq.4) iproc = iright(2,lvproc) c... Load particle attributes into array w to be sent to a c... neighboring subdomain. do i=1,nsnd(ib) k = ibndr(i,ib) w(1,i,1) = p(k) w(2,i,1) = q(k) w(3,i,1) = idp(k) w(4,i,1) = kpl(k) w(5,i,1) = ap(k) end do CALL MPI_SEND( W( 1,1,1 ), 5*NSND( IB ), & MPI_DATATYPE, IPROC, MSGNUM, & MPI_COMM_WORLD, IERROR ) end do do ib=1,4 c... Receive particle data from the four neighboring subdomains. CALL MPI_PROBE( MPI_ANY_SOURCE, MSGNUM, MPI_COMM_WORLD, & MSG_STATUS, IERROR ) CALL MPI_GET_COUNT( MSG_STATUS, MPI_DATATYPE, ICOUNT ,IERROR ) iproc = MSG_STATUS( MPI_SOURCE ) CALL MPI_IRECV( W( 1,1,IB ), ICOUNT, MPI_DATATYPE, & IPROC, MSGNUM, MPI_COMM_WORLD, & RECV_REQUESTS( 1 ), IERROR ) c... Complete the nonblocking receives before processing. CALL MPI_WAITANY( 1, RECV_REQUESTS, SENDER, RECV_STATUS, & IERROR ) nrcv(ib) = ICOUNT/5 end do end *dk pcomtozero subroutine pcomtozero(a,ap,nc) c... At the lowest (lv = 0) grid level, this routine loads a global c... array a on processor zero from contributions ap from all the c... other active processors. include 'size.h' include 'pcom.h' real a(4,10,nc), ap(4,nd,nc) integer ierror, sender c... Increment the message number. msgnum = msgnum + 1 c... Compute the message length for the subarray. mm = 4*nd*nc if(mynum .ne. 0) then c... For all processes other than zero, broadcast the local c... array to process zero. CALL MPI_SEND( AP, MM, MPI_DATATYPE, 0, MSGNUM, & MPI_COMM_WORLD, IERROR ) else c... On processor zero, first load the local array into c... the global array. do id=1,nd do ii=1,4 do j=1,nc a(ii,id,j) = ap(ii,id,j) end do end do end do c... Next receive the arrays from all the other processes c... and copy them into the global array. do iproc=1,10/nd-1 CALL MPI_RECV( AP , MM, MPI_DATATYPE, MPI_ANY_SOURCE, & MSGNUM, MPI_COMM_WORLD, RECV_STATUS(:, 1 ), & IERROR ) c... Save this information in array comr. c... Determine the "iright" value of the isrc_proc subdomain. jproc = RECV_STATUS( MPI_SOURCE, 1 ) do id=1,nd jd = id + jproc*nd do ii=1,4 do j=1,nc a(ii,jd,j) = ap(ii,id,j) end do end do end do end do end if end *dk pexit subroutine pexit CALL MPI_FINALIZE( IERROR ) end *dk pinit subroutine pinit c... This routine initializes the MPI processes. include 'size.h' include 'pcom.h' common /nbr1/ ileft(4,0:6), iright(4,0:6) integer ierror c... Set the message number. msgnum = 1 c... Enroll this process in MPI. CALL MPI_INIT( IERROR ) CALL MPI_COMM_RANK( MPI_COMM_WORLD, MYNUM, IERROR ) CALL MPI_COMM_SIZE( MPI_COMM_WORLD, NPROC, IERROR ) c... Check the number of allocated processors against the number of c... compiled processors. if ( nproc /= (mt/nt)**2*10/nd ) then if (mynum == 0) write(*,*) "The number of compiled and", $ " allocated processors does not agree." call MPI_ABORT ( MPI_COMM_WORLD, -999, IERROR) stop endif c... Determine the process numbers of neigboring subdomains. lvg = 1.45*log(real(mt/nt)) do lv=0,lvg if(lv.ge.1 .and. mynum.lt.2**(2*lv)*10/nd) then call neighborid(2**lv,ileft(1,lv),iright(1,lv)) else ileft(1,lv) = mynum ileft(2,lv) = mynum ileft(3,lv) = mynum ileft(4,lv) = mynum iright(1,lv) = mynum iright(2,lv) = mynum iright(3,lv) = mynum iright(4,lv) = mynum end if if(lv.eq.0 .and. mynum.le.1) then ileft(2,lv) = 1 - mynum ileft(3,lv) = 1 - mynum ileft(4,lv) = 1 - mynum iright(2,lv) = 1 - mynum iright(3,lv) = 1 - mynum iright(4,lv) = 1 - mynum end if end do end *dk pmax subroutine pmax(smax) c... This routine obtains the global maximum of the scalar c... quantity smax from all of the nproc processes. include 'pcom.h' qmax = smax CALL MPI_ALLREDUCE( QMAX, SMAX, 1, MPI_DATATYPE, & MPI_MAX, MPI_COMM_WORLD, IERROR ) end *dk pmin subroutine pmin(smin) c... This routine obtains the global minimum of the scalar c... quantity smin from all of the nproc processes. include 'pcom.h' qmin = smin CALL MPI_ALLREDUCE( QMIN, SMIN, 1, MPI_DATATYPE, & MPI_MIN, MPI_COMM_WORLD, IERROR ) end *dk psum subroutine psum(u,n) c... This routine obtains the global sum for each of the n c... components of array u from all of the nproc processes. include 'size.h' include 'pcom.h' real u(n), v(300) kproc = mproc*10/nd if(kproc .eq. 1) return if(mynum .lt. kproc) then do i=1,n v(i) = u(i) end do else do i=1,n v(i) = 0. end do end if CALL MPI_ALLREDUCE( V, U, N, MPI_DATATYPE, & MPI_SUM, MPI_COMM_WORLD, IERROR ) end *dk psumlong subroutine psumlong(u,wk,n) c... This routine obtains the global sum for each of the n c components of array u from all of the nproc processes. c The difference to routine psum lies in the explicit c passing in of work array wk. All computations are done c on processor zero. Results are passed back to the rest. include 'size.h' include 'pcom.h' real u(n), wk(n) kproc = mproc*10/nd if(kproc .eq. 1) return if(mynum .lt. kproc) then do i=1,n wk(i) = u(i) end do else do i=1,n wk(i) = 0. end do end if CALL MPI_ALLREDUCE( WK, U, N, MPI_DATATYPE, & MPI_SUM, MPI_COMM_WORLD, IERROR ) end