Actual source code: mpimesg.c


  2: #include <petscsys.h>

  4: /*@C
  5:   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive

  7:   Collective

  9:   Input Parameters:
 10: + comm     - Communicator
 11: . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
 12:              message from current node to ith node. Optionally NULL
 13: - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
 14:              Optionally NULL.

 16:   Output Parameters:
 17: . nrecvs    - number of messages received

 19:   Level: developer

 21:   Notes:
 22:   With this info, the correct message lengths can be determined using
 23:   PetscGatherMessageLengths()

 25:   Either iflags or ilengths should be provided.  If iflags is not
 26:   provided (NULL) it can be computed from ilengths. If iflags is
 27:   provided, ilengths is not required.

 29: .seealso: PetscGatherMessageLengths()
 30: @*/
 31: PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
 32: {
 33:   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;

 37:   MPI_Comm_size(comm,&size);
 38:   MPI_Comm_rank(comm,&rank);

 40:   PetscMalloc2(size,&recv_buf,size,&iflags_localm);

 42:   /* If iflags not provided, compute iflags from ilengths */
 43:   if (!iflags) {
 44:     if (!ilengths) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
 45:     iflags_local = iflags_localm;
 46:     for (i=0; i<size; i++) {
 47:       if (ilengths[i]) iflags_local[i] = 1;
 48:       else iflags_local[i] = 0;
 49:     }
 50:   } else iflags_local = (PetscMPIInt*) iflags;

 52:   /* Post an allreduce to determine the numer of messages the current node will receive */
 53:   MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);
 54:   *nrecvs = recv_buf[rank];

 56:   PetscFree2(recv_buf,iflags_localm);
 57:   return(0);
 58: }

 60: /*@C
 61:   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
 62:   including (from-id,length) pairs for each message.

 64:   Collective

 66:   Input Parameters:
 67: + comm      - Communicator
 68: . nsends    - number of messages that are to be sent.
 69: . nrecvs    - number of messages being received
 70: - ilengths  - an array of integers of length sizeof(comm)
 71:               a non zero ilengths[i] represent a message to i of length ilengths[i]

 73:   Output Parameters:
 74: + onodes    - list of node-ids from which messages are expected
 75: - olengths  - corresponding message lengths

 77:   Level: developer

 79:   Notes:
 80:   With this info, the correct MPI_Irecv() can be posted with the correct
 81:   from-id, with a buffer with the right amount of memory required.

 83:   The calling function deallocates the memory in onodes and olengths

 85:   To determine nrecvs, one can use PetscGatherNumberOfMessages()

 87: .seealso: PetscGatherNumberOfMessages()
 88: @*/
 89: PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
 90: {
 92:   PetscMPIInt    size,rank,tag,i,j;
 93:   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
 94:   MPI_Status     *w_status = NULL;

 97:   MPI_Comm_size(comm,&size);
 98:   MPI_Comm_rank(comm,&rank);
 99:   PetscCommGetNewTag(comm,&tag);

101:   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
102:   PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);
103:   s_waits = r_waits+nrecvs;

105:   /* Post the Irecv to get the message length-info */
106:   PetscMalloc1(nrecvs,olengths);
107:   for (i=0; i<nrecvs; i++) {
108:     MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);
109:   }

111:   /* Post the Isends with the message length-info */
112:   for (i=0,j=0; i<size; ++i) {
113:     if (ilengths[i]) {
114:       MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);
115:       j++;
116:     }
117:   }

119:   /* Post waits on sends and receivs */
120:   if (nrecvs+nsends) {MPI_Waitall(nrecvs+nsends,r_waits,w_status);}

122:   /* Pack up the received data */
123:   PetscMalloc1(nrecvs,onodes);
124:   for (i=0; i<nrecvs; ++i) {
125:     (*onodes)[i] = w_status[i].MPI_SOURCE;
126: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
127:     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
128:        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
129:        does not put correct value in recv buffer. See also
130:        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
131:        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
132:      */
133:     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
134: #endif
135:   }
136:   PetscFree2(r_waits,w_status);
137:   return(0);
138: }

140: /*@C
141:   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
142:   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
143:   except it takes TWO ilenths and output TWO olengths.

145:   Collective

147:   Input Parameters:
148: + comm      - Communicator
149: . nsends    - number of messages that are to be sent.
150: . nrecvs    - number of messages being received
151: . ilengths1 - first array of integers of length sizeof(comm)
152: - ilengths2 - second array of integers of length sizeof(comm)

154:   Output Parameters:
155: + onodes    - list of node-ids from which messages are expected
156: . olengths1 - first corresponding message lengths
157: - olengths2 - second  message lengths

159:   Level: developer

161:   Notes:
162:   With this info, the correct MPI_Irecv() can be posted with the correct
163:   from-id, with a buffer with the right amount of memory required.

165:   The calling function deallocates the memory in onodes and olengths

167:   To determine nrecvs, one can use PetscGatherNumberOfMessages()

169: .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
170: @*/
171: PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
172: {
174:   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
175:   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
176:   MPI_Status     *w_status = NULL;

179:   MPI_Comm_size(comm,&size);
180:   PetscCommGetNewTag(comm,&tag);

182:   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
183:   PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);
184:   s_waits = r_waits + nrecvs;

186:   /* Post the Irecv to get the message length-info */
187:   PetscMalloc1(nrecvs+1,olengths1);
188:   PetscMalloc1(nrecvs+1,olengths2);
189:   for (i=0; i<nrecvs; i++) {
190:     buf_j = buf_r + (2*i);
191:     MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);
192:   }

194:   /* Post the Isends with the message length-info */
195:   for (i=0,j=0; i<size; ++i) {
196:     if (ilengths1[i]) {
197:       buf_j    = buf_s + (2*j);
198:       buf_j[0] = *(ilengths1+i);
199:       buf_j[1] = *(ilengths2+i);
200:       MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);
201:       j++;
202:     }
203:   }
204:   if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends);

206:   /* Post waits on sends and receivs */
207:   if (nrecvs+nsends) {MPI_Waitall(nrecvs+nsends,r_waits,w_status);}

209:   /* Pack up the received data */
210:   PetscMalloc1(nrecvs+1,onodes);
211:   for (i=0; i<nrecvs; ++i) {
212:     (*onodes)[i]    = w_status[i].MPI_SOURCE;
213:     buf_j           = buf_r + (2*i);
214:     (*olengths1)[i] = buf_j[0];
215:     (*olengths2)[i] = buf_j[1];
216:   }

218:   PetscFree4(r_waits,buf_r,buf_s,w_status);
219:   return(0);
220: }

222: /*

224:   Allocate a buffer sufficient to hold messages of size specified in olengths.
225:   And post Irecvs on these buffers using node info from onodes

227:  */
228: PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
229: {
231:   PetscInt       **rbuf_t,i,len = 0;
232:   MPI_Request    *r_waits_t;

235:   /* compute memory required for recv buffers */
236:   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */

238:   /* allocate memory for recv buffers */
239:   PetscMalloc1(nrecvs+1,&rbuf_t);
240:   PetscMalloc1(len,&rbuf_t[0]);
241:   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];

243:   /* Post the receives */
244:   PetscMalloc1(nrecvs,&r_waits_t);
245:   for (i=0; i<nrecvs; ++i) {
246:     MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);
247:   }

249:   *rbuf    = rbuf_t;
250:   *r_waits = r_waits_t;
251:   return(0);
252: }

254: PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
255: {
257:   PetscMPIInt    i;
258:   PetscScalar    **rbuf_t;
259:   MPI_Request    *r_waits_t;
260:   PetscInt       len = 0;

263:   /* compute memory required for recv buffers */
264:   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */

266:   /* allocate memory for recv buffers */
267:   PetscMalloc1(nrecvs+1,&rbuf_t);
268:   PetscMalloc1(len,&rbuf_t[0]);
269:   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];

271:   /* Post the receives */
272:   PetscMalloc1(nrecvs,&r_waits_t);
273:   for (i=0; i<nrecvs; ++i) {
274:     MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);
275:   }

277:   *rbuf    = rbuf_t;
278:   *r_waits = r_waits_t;
279:   return(0);
280: }