Actual source code: matstash.c
2: #include <petsc/private/matimpl.h>
4: #define DEFAULT_STASH_SIZE 10000
6: static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9: #if !defined(PETSC_HAVE_MPIUNI)
10: static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
11: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
12: static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
13: #endif
15: /*
16: MatStashCreate_Private - Creates a stash,currently used for all the parallel
17: matrix implementations. The stash is where elements of a matrix destined
18: to be stored on other processors are kept until matrix assembly is done.
20: This is a simple minded stash. Simply adds entries to end of stash.
22: Input Parameters:
23: comm - communicator, required for scatters.
24: bs - stash block size. used when stashing blocks of values
26: Output Parameters:
27: stash - the newly created stash
28: */
29: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
30: {
32: PetscInt max,*opt,nopt,i;
33: PetscBool flg;
36: /* Require 2 tags,get the second using PetscCommGetNewTag() */
37: stash->comm = comm;
39: PetscCommGetNewTag(stash->comm,&stash->tag1);
40: PetscCommGetNewTag(stash->comm,&stash->tag2);
41: MPI_Comm_size(stash->comm,&stash->size);
42: MPI_Comm_rank(stash->comm,&stash->rank);
43: PetscMalloc1(2*stash->size,&stash->flg_v);
44: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
46: nopt = stash->size;
47: PetscMalloc1(nopt,&opt);
48: PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);
49: if (flg) {
50: if (nopt == 1) max = opt[0];
51: else if (nopt == stash->size) max = opt[stash->rank];
52: else if (stash->rank < nopt) max = opt[stash->rank];
53: else max = 0; /* Use default */
54: stash->umax = max;
55: } else {
56: stash->umax = 0;
57: }
58: PetscFree(opt);
59: if (bs <= 0) bs = 1;
61: stash->bs = bs;
62: stash->nmax = 0;
63: stash->oldnmax = 0;
64: stash->n = 0;
65: stash->reallocs = -1;
66: stash->space_head = NULL;
67: stash->space = NULL;
69: stash->send_waits = NULL;
70: stash->recv_waits = NULL;
71: stash->send_status = NULL;
72: stash->nsends = 0;
73: stash->nrecvs = 0;
74: stash->svalues = NULL;
75: stash->rvalues = NULL;
76: stash->rindices = NULL;
77: stash->nprocessed = 0;
78: stash->reproduce = PETSC_FALSE;
79: stash->blocktype = MPI_DATATYPE_NULL;
81: PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);
82: #if !defined(PETSC_HAVE_MPIUNI)
83: flg = PETSC_FALSE;
84: PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);
85: if (!flg) {
86: stash->ScatterBegin = MatStashScatterBegin_BTS;
87: stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88: stash->ScatterEnd = MatStashScatterEnd_BTS;
89: stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90: } else {
91: #endif
92: stash->ScatterBegin = MatStashScatterBegin_Ref;
93: stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
94: stash->ScatterEnd = MatStashScatterEnd_Ref;
95: stash->ScatterDestroy = NULL;
96: #if !defined(PETSC_HAVE_MPIUNI)
97: }
98: #endif
99: return(0);
100: }
102: /*
103: MatStashDestroy_Private - Destroy the stash
104: */
105: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
106: {
110: PetscMatStashSpaceDestroy(&stash->space_head);
111: if (stash->ScatterDestroy) {(*stash->ScatterDestroy)(stash);}
113: stash->space = NULL;
115: PetscFree(stash->flg_v);
116: return(0);
117: }
119: /*
120: MatStashScatterEnd_Private - This is called as the final stage of
121: scatter. The final stages of message passing is done here, and
122: all the memory used for message passing is cleaned up. This
123: routine also resets the stash, and deallocates the memory used
124: for the stash. It also keeps track of the current memory usage
125: so that the same value can be used the next time through.
126: */
127: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
128: {
132: (*stash->ScatterEnd)(stash);
133: return(0);
134: }
136: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
137: {
139: PetscInt nsends=stash->nsends,bs2,oldnmax,i;
140: MPI_Status *send_status;
143: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
144: /* wait on sends */
145: if (nsends) {
146: PetscMalloc1(2*nsends,&send_status);
147: MPI_Waitall(2*nsends,stash->send_waits,send_status);
148: PetscFree(send_status);
149: }
151: /* Now update nmaxold to be app 10% more than max n used, this way the
152: wastage of space is reduced the next time this stash is used.
153: Also update the oldmax, only if it increases */
154: if (stash->n) {
155: bs2 = stash->bs*stash->bs;
156: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
157: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
158: }
160: stash->nmax = 0;
161: stash->n = 0;
162: stash->reallocs = -1;
163: stash->nprocessed = 0;
165: PetscMatStashSpaceDestroy(&stash->space_head);
167: stash->space = NULL;
169: PetscFree(stash->send_waits);
170: PetscFree(stash->recv_waits);
171: PetscFree2(stash->svalues,stash->sindices);
172: PetscFree(stash->rvalues[0]);
173: PetscFree(stash->rvalues);
174: PetscFree(stash->rindices[0]);
175: PetscFree(stash->rindices);
176: return(0);
177: }
179: /*
180: MatStashGetInfo_Private - Gets the relavant statistics of the stash
182: Input Parameters:
183: stash - the stash
184: nstash - the size of the stash. Indicates the number of values stored.
185: reallocs - the number of additional mallocs incurred.
187: */
188: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
189: {
190: PetscInt bs2 = stash->bs*stash->bs;
193: if (nstash) *nstash = stash->n*bs2;
194: if (reallocs) {
195: if (stash->reallocs < 0) *reallocs = 0;
196: else *reallocs = stash->reallocs;
197: }
198: return(0);
199: }
201: /*
202: MatStashSetInitialSize_Private - Sets the initial size of the stash
204: Input Parameters:
205: stash - the stash
206: max - the value that is used as the max size of the stash.
207: this value is used while allocating memory.
208: */
209: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
210: {
212: stash->umax = max;
213: return(0);
214: }
216: /* MatStashExpand_Private - Expand the stash. This function is called
217: when the space in the stash is not sufficient to add the new values
218: being inserted into the stash.
220: Input Parameters:
221: stash - the stash
222: incr - the minimum increase requested
224: Notes:
225: This routine doubles the currently used memory.
226: */
227: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
228: {
230: PetscInt newnmax,bs2= stash->bs*stash->bs;
233: /* allocate a larger stash */
234: if (!stash->oldnmax && !stash->nmax) { /* new stash */
235: if (stash->umax) newnmax = stash->umax/bs2;
236: else newnmax = DEFAULT_STASH_SIZE/bs2;
237: } else if (!stash->nmax) { /* resuing stash */
238: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
239: else newnmax = stash->oldnmax/bs2;
240: } else newnmax = stash->nmax*2;
241: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
243: /* Get a MatStashSpace and attach it to stash */
244: PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
245: if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
246: stash->space_head = stash->space;
247: }
249: stash->reallocs++;
250: stash->nmax = newnmax;
251: return(0);
252: }
253: /*
254: MatStashValuesRow_Private - inserts values into the stash. This function
255: expects the values to be roworiented. Multiple columns belong to the same row
256: can be inserted with a single call to this function.
258: Input Parameters:
259: stash - the stash
260: row - the global row correspoiding to the values
261: n - the number of elements inserted. All elements belong to the above row.
262: idxn - the global column indices corresponding to each of the values.
263: values - the values inserted
264: */
265: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
266: {
267: PetscErrorCode ierr;
268: PetscInt i,k,cnt = 0;
269: PetscMatStashSpace space=stash->space;
272: /* Check and see if we have sufficient memory */
273: if (!space || space->local_remaining < n) {
274: MatStashExpand_Private(stash,n);
275: }
276: space = stash->space;
277: k = space->local_used;
278: for (i=0; i<n; i++) {
279: if (ignorezeroentries && values && values[i] == 0.0) continue;
280: space->idx[k] = row;
281: space->idy[k] = idxn[i];
282: space->val[k] = values ? values[i] : 0.0;
283: k++;
284: cnt++;
285: }
286: stash->n += cnt;
287: space->local_used += cnt;
288: space->local_remaining -= cnt;
289: return(0);
290: }
292: /*
293: MatStashValuesCol_Private - inserts values into the stash. This function
294: expects the values to be columnoriented. Multiple columns belong to the same row
295: can be inserted with a single call to this function.
297: Input Parameters:
298: stash - the stash
299: row - the global row correspoiding to the values
300: n - the number of elements inserted. All elements belong to the above row.
301: idxn - the global column indices corresponding to each of the values.
302: values - the values inserted
303: stepval - the consecutive values are sepated by a distance of stepval.
304: this happens because the input is columnoriented.
305: */
306: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
307: {
308: PetscErrorCode ierr;
309: PetscInt i,k,cnt = 0;
310: PetscMatStashSpace space=stash->space;
313: /* Check and see if we have sufficient memory */
314: if (!space || space->local_remaining < n) {
315: MatStashExpand_Private(stash,n);
316: }
317: space = stash->space;
318: k = space->local_used;
319: for (i=0; i<n; i++) {
320: if (ignorezeroentries && values && values[i*stepval] == 0.0) continue;
321: space->idx[k] = row;
322: space->idy[k] = idxn[i];
323: space->val[k] = values ? values[i*stepval] : 0.0;
324: k++;
325: cnt++;
326: }
327: stash->n += cnt;
328: space->local_used += cnt;
329: space->local_remaining -= cnt;
330: return(0);
331: }
333: /*
334: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
335: This function expects the values to be roworiented. Multiple columns belong
336: to the same block-row can be inserted with a single call to this function.
337: This function extracts the sub-block of values based on the dimensions of
338: the original input block, and the row,col values corresponding to the blocks.
340: Input Parameters:
341: stash - the stash
342: row - the global block-row correspoiding to the values
343: n - the number of elements inserted. All elements belong to the above row.
344: idxn - the global block-column indices corresponding to each of the blocks of
345: values. Each block is of size bs*bs.
346: values - the values inserted
347: rmax - the number of block-rows in the original block.
348: cmax - the number of block-columns on the original block.
349: idx - the index of the current block-row in the original block.
350: */
351: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
352: {
353: PetscErrorCode ierr;
354: PetscInt i,j,k,bs2,bs=stash->bs,l;
355: const PetscScalar *vals;
356: PetscScalar *array;
357: PetscMatStashSpace space=stash->space;
360: if (!space || space->local_remaining < n) {
361: MatStashExpand_Private(stash,n);
362: }
363: space = stash->space;
364: l = space->local_used;
365: bs2 = bs*bs;
366: for (i=0; i<n; i++) {
367: space->idx[l] = row;
368: space->idy[l] = idxn[i];
369: /* Now copy over the block of values. Store the values column oriented.
370: This enables inserting multiple blocks belonging to a row with a single
371: funtion call */
372: array = space->val + bs2*l;
373: vals = values + idx*bs2*n + bs*i;
374: for (j=0; j<bs; j++) {
375: for (k=0; k<bs; k++) array[k*bs] = values ? vals[k] : 0.0;
376: array++;
377: vals += cmax*bs;
378: }
379: l++;
380: }
381: stash->n += n;
382: space->local_used += n;
383: space->local_remaining -= n;
384: return(0);
385: }
387: /*
388: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
389: This function expects the values to be roworiented. Multiple columns belong
390: to the same block-row can be inserted with a single call to this function.
391: This function extracts the sub-block of values based on the dimensions of
392: the original input block, and the row,col values corresponding to the blocks.
394: Input Parameters:
395: stash - the stash
396: row - the global block-row correspoiding to the values
397: n - the number of elements inserted. All elements belong to the above row.
398: idxn - the global block-column indices corresponding to each of the blocks of
399: values. Each block is of size bs*bs.
400: values - the values inserted
401: rmax - the number of block-rows in the original block.
402: cmax - the number of block-columns on the original block.
403: idx - the index of the current block-row in the original block.
404: */
405: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
406: {
407: PetscErrorCode ierr;
408: PetscInt i,j,k,bs2,bs=stash->bs,l;
409: const PetscScalar *vals;
410: PetscScalar *array;
411: PetscMatStashSpace space=stash->space;
414: if (!space || space->local_remaining < n) {
415: MatStashExpand_Private(stash,n);
416: }
417: space = stash->space;
418: l = space->local_used;
419: bs2 = bs*bs;
420: for (i=0; i<n; i++) {
421: space->idx[l] = row;
422: space->idy[l] = idxn[i];
423: /* Now copy over the block of values. Store the values column oriented.
424: This enables inserting multiple blocks belonging to a row with a single
425: funtion call */
426: array = space->val + bs2*l;
427: vals = values + idx*bs2*n + bs*i;
428: for (j=0; j<bs; j++) {
429: for (k=0; k<bs; k++) array[k] = values ? vals[k] : 0.0;
430: array += bs;
431: vals += rmax*bs;
432: }
433: l++;
434: }
435: stash->n += n;
436: space->local_used += n;
437: space->local_remaining -= n;
438: return(0);
439: }
440: /*
441: MatStashScatterBegin_Private - Initiates the transfer of values to the
442: correct owners. This function goes through the stash, and check the
443: owners of each stashed value, and sends the values off to the owner
444: processors.
446: Input Parameters:
447: stash - the stash
448: owners - an array of size 'no-of-procs' which gives the ownership range
449: for each node.
451: Notes:
452: The 'owners' array in the cased of the blocked-stash has the
453: ranges specified blocked global indices, and for the regular stash in
454: the proper global indices.
455: */
456: PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
457: {
461: (*stash->ScatterBegin)(mat,stash,owners);
462: return(0);
463: }
465: static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
466: {
467: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
468: PetscInt size=stash->size,nsends;
469: PetscErrorCode ierr;
470: PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l;
471: PetscScalar **rvalues,*svalues;
472: MPI_Comm comm = stash->comm;
473: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
474: PetscMPIInt *sizes,*nlengths,nreceives;
475: PetscInt *sp_idx,*sp_idy;
476: PetscScalar *sp_val;
477: PetscMatStashSpace space,space_next;
480: { /* make sure all processors are either in INSERTMODE or ADDMODE */
481: InsertMode addv;
482: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
483: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
484: mat->insertmode = addv; /* in case this processor had no cache */
485: }
487: bs2 = stash->bs*stash->bs;
489: /* first count number of contributors to each processor */
490: PetscCalloc1(size,&nlengths);
491: PetscMalloc1(stash->n+1,&owner);
493: i = j = 0;
494: lastidx = -1;
495: space = stash->space_head;
496: while (space) {
497: space_next = space->next;
498: sp_idx = space->idx;
499: for (l=0; l<space->local_used; l++) {
500: /* if indices are NOT locally sorted, need to start search at the beginning */
501: if (lastidx > (idx = sp_idx[l])) j = 0;
502: lastidx = idx;
503: for (; j<size; j++) {
504: if (idx >= owners[j] && idx < owners[j+1]) {
505: nlengths[j]++; owner[i] = j; break;
506: }
507: }
508: i++;
509: }
510: space = space_next;
511: }
513: /* Now check what procs get messages - and compute nsends. */
514: PetscCalloc1(size,&sizes);
515: for (i=0, nsends=0; i<size; i++) {
516: if (nlengths[i]) {
517: sizes[i] = 1; nsends++;
518: }
519: }
521: {PetscMPIInt *onodes,*olengths;
522: /* Determine the number of messages to expect, their lengths, from from-ids */
523: PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
524: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
525: /* since clubbing row,col - lengths are multiplied by 2 */
526: for (i=0; i<nreceives; i++) olengths[i] *=2;
527: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
528: /* values are size 'bs2' lengths (and remove earlier factor 2 */
529: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
530: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
531: PetscFree(onodes);
532: PetscFree(olengths);}
534: /* do sends:
535: 1) starts[i] gives the starting index in svalues for stuff going to
536: the ith processor
537: */
538: PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
539: PetscMalloc1(2*nsends,&send_waits);
540: PetscMalloc2(size,&startv,size,&starti);
541: /* use 2 sends the first with all_a, the next with all_i and all_j */
542: startv[0] = 0; starti[0] = 0;
543: for (i=1; i<size; i++) {
544: startv[i] = startv[i-1] + nlengths[i-1];
545: starti[i] = starti[i-1] + 2*nlengths[i-1];
546: }
548: i = 0;
549: space = stash->space_head;
550: while (space) {
551: space_next = space->next;
552: sp_idx = space->idx;
553: sp_idy = space->idy;
554: sp_val = space->val;
555: for (l=0; l<space->local_used; l++) {
556: j = owner[i];
557: if (bs2 == 1) {
558: svalues[startv[j]] = sp_val[l];
559: } else {
560: PetscInt k;
561: PetscScalar *buf1,*buf2;
562: buf1 = svalues+bs2*startv[j];
563: buf2 = space->val + bs2*l;
564: for (k=0; k<bs2; k++) buf1[k] = buf2[k];
565: }
566: sindices[starti[j]] = sp_idx[l];
567: sindices[starti[j]+nlengths[j]] = sp_idy[l];
568: startv[j]++;
569: starti[j]++;
570: i++;
571: }
572: space = space_next;
573: }
574: startv[0] = 0;
575: for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
577: for (i=0,count=0; i<size; i++) {
578: if (sizes[i]) {
579: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
580: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
581: }
582: }
583: #if defined(PETSC_USE_INFO)
584: PetscInfo1(NULL,"No of messages: %d \n",nsends);
585: for (i=0; i<size; i++) {
586: if (sizes[i]) {
587: PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
588: }
589: }
590: #endif
591: PetscFree(nlengths);
592: PetscFree(owner);
593: PetscFree2(startv,starti);
594: PetscFree(sizes);
596: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
597: PetscMalloc1(2*nreceives,&recv_waits);
599: for (i=0; i<nreceives; i++) {
600: recv_waits[2*i] = recv_waits1[i];
601: recv_waits[2*i+1] = recv_waits2[i];
602: }
603: stash->recv_waits = recv_waits;
605: PetscFree(recv_waits1);
606: PetscFree(recv_waits2);
608: stash->svalues = svalues;
609: stash->sindices = sindices;
610: stash->rvalues = rvalues;
611: stash->rindices = rindices;
612: stash->send_waits = send_waits;
613: stash->nsends = nsends;
614: stash->nrecvs = nreceives;
615: stash->reproduce_count = 0;
616: return(0);
617: }
619: /*
620: MatStashScatterGetMesg_Private - This function waits on the receives posted
621: in the function MatStashScatterBegin_Private() and returns one message at
622: a time to the calling function. If no messages are left, it indicates this
623: by setting flg = 0, else it sets flg = 1.
625: Input Parameters:
626: stash - the stash
628: Output Parameters:
629: nvals - the number of entries in the current message.
630: rows - an array of row indices (or blocked indices) corresponding to the values
631: cols - an array of columnindices (or blocked indices) corresponding to the values
632: vals - the values
633: flg - 0 indicates no more message left, and the current call has no values associated.
634: 1 indicates that the current call successfully received a message, and the
635: other output parameters nvals,rows,cols,vals are set appropriately.
636: */
637: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
638: {
642: (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);
643: return(0);
644: }
646: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
647: {
649: PetscMPIInt i,*flg_v = stash->flg_v,i1,i2;
650: PetscInt bs2;
651: MPI_Status recv_status;
652: PetscBool match_found = PETSC_FALSE;
655: *flg = 0; /* When a message is discovered this is reset to 1 */
656: /* Return if no more messages to process */
657: if (stash->nprocessed == stash->nrecvs) return(0);
659: bs2 = stash->bs*stash->bs;
660: /* If a matching pair of receives are found, process them, and return the data to
661: the calling function. Until then keep receiving messages */
662: while (!match_found) {
663: if (stash->reproduce) {
664: i = stash->reproduce_count++;
665: MPI_Wait(stash->recv_waits+i,&recv_status);
666: } else {
667: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
668: }
669: if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
671: /* Now pack the received message into a structure which is usable by others */
672: if (i % 2) {
673: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
675: flg_v[2*recv_status.MPI_SOURCE] = i/2;
677: *nvals = *nvals/bs2;
678: } else {
679: MPI_Get_count(&recv_status,MPIU_INT,nvals);
681: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
683: *nvals = *nvals/2; /* This message has both row indices and col indices */
684: }
686: /* Check if we have both messages from this proc */
687: i1 = flg_v[2*recv_status.MPI_SOURCE];
688: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
689: if (i1 != -1 && i2 != -1) {
690: *rows = stash->rindices[i2];
691: *cols = *rows + *nvals;
692: *vals = stash->rvalues[i1];
693: *flg = 1;
694: stash->nprocessed++;
695: match_found = PETSC_TRUE;
696: }
697: }
698: return(0);
699: }
701: #if !defined(PETSC_HAVE_MPIUNI)
702: typedef struct {
703: PetscInt row;
704: PetscInt col;
705: PetscScalar vals[1]; /* Actually an array of length bs2 */
706: } MatStashBlock;
708: static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
709: {
711: PetscMatStashSpace space;
712: PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
713: PetscScalar **valptr;
716: PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);
717: for (space=stash->space_head,cnt=0; space; space=space->next) {
718: for (i=0; i<space->local_used; i++) {
719: row[cnt] = space->idx[i];
720: col[cnt] = space->idy[i];
721: valptr[cnt] = &space->val[i*bs2];
722: perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
723: cnt++;
724: }
725: }
726: if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
727: PetscSortIntWithArrayPair(n,row,col,perm);
728: /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
729: for (rowstart=0,cnt=0,i=1; i<=n; i++) {
730: if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
731: PetscInt colstart;
732: PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);
733: for (colstart=rowstart; colstart<i;) { /* Compress multiple insertions to the same location */
734: PetscInt j,l;
735: MatStashBlock *block;
736: PetscSegBufferGet(stash->segsendblocks,1,&block);
737: block->row = row[rowstart];
738: block->col = col[colstart];
739: PetscArraycpy(block->vals,valptr[perm[colstart]],bs2);
740: for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
741: if (insertmode == ADD_VALUES) {
742: for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
743: } else {
744: PetscArraycpy(block->vals,valptr[perm[j]],bs2);
745: }
746: }
747: colstart = j;
748: }
749: rowstart = i;
750: }
751: }
752: PetscFree4(row,col,valptr,perm);
753: return(0);
754: }
756: static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
757: {
761: if (stash->blocktype == MPI_DATATYPE_NULL) {
762: PetscInt bs2 = PetscSqr(stash->bs);
763: PetscMPIInt blocklens[2];
764: MPI_Aint displs[2];
765: MPI_Datatype types[2],stype;
766: /* Note that DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
767: std::complex itself has standard layout, so does DummyBlock, recursively.
768: To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
769: though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
770: offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
772: We can test if std::complex has standard layout with the following code:
773: #include <iostream>
774: #include <type_traits>
775: #include <complex>
776: int main() {
777: std::cout << std::boolalpha;
778: std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
779: }
780: Output: true
781: */
782: struct DummyBlock {PetscInt row,col; PetscScalar vals;};
784: stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
785: if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
786: stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
787: }
788: PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);
789: PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);
790: PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);
791: blocklens[0] = 2;
792: blocklens[1] = bs2;
793: displs[0] = offsetof(struct DummyBlock,row);
794: displs[1] = offsetof(struct DummyBlock,vals);
795: types[0] = MPIU_INT;
796: types[1] = MPIU_SCALAR;
797: MPI_Type_create_struct(2,blocklens,displs,types,&stype);
798: MPI_Type_commit(&stype);
799: MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);
800: MPI_Type_commit(&stash->blocktype);
801: MPI_Type_free(&stype);
802: }
803: return(0);
804: }
806: /* Callback invoked after target rank has initiatied receive of rendezvous message.
807: * Here we post the main sends.
808: */
809: static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
810: {
811: MatStash *stash = (MatStash*)ctx;
812: MatStashHeader *hdr = (MatStashHeader*)sdata;
816: if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
817: MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
818: stash->sendframes[rankid].count = hdr->count;
819: stash->sendframes[rankid].pending = 1;
820: return(0);
821: }
823: /* Callback invoked by target after receiving rendezvous message.
824: * Here we post the main recvs.
825: */
826: static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
827: {
828: MatStash *stash = (MatStash*)ctx;
829: MatStashHeader *hdr = (MatStashHeader*)rdata;
830: MatStashFrame *frame;
834: PetscSegBufferGet(stash->segrecvframe,1,&frame);
835: PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);
836: MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
837: frame->count = hdr->count;
838: frame->pending = 1;
839: return(0);
840: }
842: /*
843: * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
844: */
845: static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
846: {
848: size_t nblocks;
849: char *sendblocks;
852: if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
853: InsertMode addv;
854: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
855: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
856: }
858: MatStashBlockTypeSetUp(stash);
859: MatStashSortCompress_Private(stash,mat->insertmode);
860: PetscSegBufferGetSize(stash->segsendblocks,&nblocks);
861: PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);
862: if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
863: PetscInt i;
864: size_t b;
865: for (i=0,b=0; i<stash->nsendranks; i++) {
866: stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
867: /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
868: stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
869: for (; b<nblocks; b++) {
870: MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
871: if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
872: if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
873: stash->sendhdr[i].count++;
874: }
875: }
876: } else { /* Dynamically count and pack (first time) */
877: PetscInt sendno;
878: size_t i,rowstart;
880: /* Count number of send ranks and allocate for sends */
881: stash->nsendranks = 0;
882: for (rowstart=0; rowstart<nblocks;) {
883: PetscInt owner;
884: MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
885: PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
886: if (owner < 0) owner = -(owner+2);
887: for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
888: MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
889: if (sendblock_i->row >= owners[owner+1]) break;
890: }
891: stash->nsendranks++;
892: rowstart = i;
893: }
894: PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);
896: /* Set up sendhdrs and sendframes */
897: sendno = 0;
898: for (rowstart=0; rowstart<nblocks;) {
899: PetscInt owner;
900: MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
901: PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
902: if (owner < 0) owner = -(owner+2);
903: stash->sendranks[sendno] = owner;
904: for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
905: MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
906: if (sendblock_i->row >= owners[owner+1]) break;
907: }
908: stash->sendframes[sendno].buffer = sendblock_rowstart;
909: stash->sendframes[sendno].pending = 0;
910: stash->sendhdr[sendno].count = i - rowstart;
911: sendno++;
912: rowstart = i;
913: }
914: if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
915: }
917: /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
918: * message or a dummy entry of some sort. */
919: if (mat->insertmode == INSERT_VALUES) {
920: size_t i;
921: for (i=0; i<nblocks; i++) {
922: MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
923: sendblock_i->row = -(sendblock_i->row+1);
924: }
925: }
927: if (stash->first_assembly_done) {
928: PetscMPIInt i,tag;
929: PetscCommGetNewTag(stash->comm,&tag);
930: for (i=0; i<stash->nrecvranks; i++) {
931: MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);
932: }
933: for (i=0; i<stash->nsendranks; i++) {
934: MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);
935: }
936: stash->use_status = PETSC_TRUE; /* Use count from message status. */
937: } else {
938: PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
939: &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
940: MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);
941: PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);
942: stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
943: }
945: PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);
946: stash->recvframe_active = NULL;
947: stash->recvframe_i = 0;
948: stash->some_i = 0;
949: stash->some_count = 0;
950: stash->recvcount = 0;
951: stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
952: stash->insertmode = &mat->insertmode;
953: return(0);
954: }
956: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
957: {
959: MatStashBlock *block;
962: *flg = 0;
963: while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
964: if (stash->some_i == stash->some_count) {
965: if (stash->recvcount == stash->nrecvranks) return(0); /* Done */
966: MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);
967: stash->some_i = 0;
968: }
969: stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
970: stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
971: if (stash->use_status) { /* Count what was actually sent */
972: MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);
973: }
974: if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
975: block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
976: if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
977: if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
978: if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
979: }
980: stash->some_i++;
981: stash->recvcount++;
982: stash->recvframe_i = 0;
983: }
984: *n = 1;
985: block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
986: if (block->row < 0) block->row = -(block->row + 1);
987: *row = &block->row;
988: *col = &block->col;
989: *val = block->vals;
990: stash->recvframe_i++;
991: *flg = 1;
992: return(0);
993: }
995: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
996: {
1000: MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);
1001: if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */
1002: void *dummy;
1003: PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);
1004: } else { /* No reuse, so collect everything. */
1005: MatStashScatterDestroy_BTS(stash);
1006: }
1008: /* Now update nmaxold to be app 10% more than max n used, this way the
1009: wastage of space is reduced the next time this stash is used.
1010: Also update the oldmax, only if it increases */
1011: if (stash->n) {
1012: PetscInt bs2 = stash->bs*stash->bs;
1013: PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1014: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1015: }
1017: stash->nmax = 0;
1018: stash->n = 0;
1019: stash->reallocs = -1;
1020: stash->nprocessed = 0;
1022: PetscMatStashSpaceDestroy(&stash->space_head);
1024: stash->space = NULL;
1026: return(0);
1027: }
1029: PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1030: {
1034: PetscSegBufferDestroy(&stash->segsendblocks);
1035: PetscSegBufferDestroy(&stash->segrecvframe);
1036: stash->recvframes = NULL;
1037: PetscSegBufferDestroy(&stash->segrecvblocks);
1038: if (stash->blocktype != MPI_DATATYPE_NULL) {
1039: MPI_Type_free(&stash->blocktype);
1040: }
1041: stash->nsendranks = 0;
1042: stash->nrecvranks = 0;
1043: PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);
1044: PetscFree(stash->sendreqs);
1045: PetscFree(stash->recvreqs);
1046: PetscFree(stash->recvranks);
1047: PetscFree(stash->recvhdr);
1048: PetscFree2(stash->some_indices,stash->some_statuses);
1049: return(0);
1050: }
1051: #endif