Actual source code: comb.c
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h>
24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
25: {
29: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
30: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
31: #else
32: MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
33: *request = MPI_REQUEST_NULL;
34: #endif
35: return(0);
36: }
38: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);
40: /*
41: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
42: */
43: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
44: {
48: PetscNew(sr);
49: (*sr)->numopsbegin = 0;
50: (*sr)->numopsend = 0;
51: (*sr)->state = STATE_BEGIN;
52: #define MAXOPS 32
53: (*sr)->maxops = MAXOPS;
54: PetscMalloc6(MAXOPS,&(*sr)->lvalues,MAXOPS,&(*sr)->gvalues,MAXOPS,&(*sr)->invecs,MAXOPS,&(*sr)->reducetype,MAXOPS,&(*sr)->lvalues_mix,MAXOPS,&(*sr)->gvalues_mix);
55: #undef MAXOPS
56: (*sr)->comm = comm;
57: (*sr)->request = MPI_REQUEST_NULL;
58: (*sr)->mix = PETSC_FALSE;
59: (*sr)->async = PETSC_FALSE;
60: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
61: (*sr)->async = PETSC_TRUE; /* Enable by default */
62: #endif
63: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
64: PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
65: return(0);
66: }
68: /*
69: This function is the MPI reduction operation used when there is
70: a combination of sums and max in the reduction. The call below to
71: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
72: MPI operator PetscSplitReduction_Op.
73: */
74: MPI_Op PetscSplitReduction_Op = 0;
76: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
77: {
78: struct PetscScalarInt { PetscScalar v; PetscInt i; };
79: struct PetscScalarInt *xin = (struct PetscScalarInt*)in;
80: struct PetscScalarInt *xout = (struct PetscScalarInt*)out;
81: PetscInt i,count = (PetscInt)*cnt;
84: if (*datatype != MPIU_SCALAR_INT) {
85: (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types");
86: PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
87: }
88: for (i=0; i<count; i++) {
89: if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
90: else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v),PetscRealPart(xin[i].v));
91: else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v),PetscRealPart(xin[i].v));
92: else {
93: (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
94: PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
95: }
96: }
97: PetscFunctionReturnVoid();
98: }
100: /*@
101: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
103: Collective but not synchronizing
105: Input Parameter:
106: comm - communicator on which split reduction has been queued
108: Level: advanced
110: Note:
111: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
112: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
114: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
115: @*/
116: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
117: {
118: PetscErrorCode ierr;
119: PetscSplitReduction *sr;
122: PetscSplitReductionGet(comm,&sr);
123: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
124: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
125: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
126: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
127: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
128: MPI_Comm comm = sr->comm;
129: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
131: PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
132: MPI_Comm_size(sr->comm,&size);
133: if (size == 1) {
134: PetscArraycpy(gvalues,lvalues,numops);
135: } else {
136: /* determine if all reductions are sum, max, or min */
137: for (i=0; i<numops; i++) {
138: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
139: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
140: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
141: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
142: }
143: if (sum_flg + max_flg + min_flg > 1 && sr->mix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
144: if (sum_flg + max_flg + min_flg > 1) {
145: sr->mix = PETSC_TRUE;
146: for (i=0; i<numops; i++) { sr->lvalues_mix[i].v = lvalues[i]; sr->lvalues_mix[i].i = reducetype[i]; }
147: MPIPetsc_Iallreduce(sr->lvalues_mix,sr->gvalues_mix,numops,MPIU_SCALAR_INT,PetscSplitReduction_Op,comm,&sr->request);
148: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
149: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
150: } else if (min_flg) {
151: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
152: } else {
153: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
154: }
155: }
156: sr->state = STATE_PENDING;
157: sr->numopsend = 0;
158: PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
159: } else {
160: PetscSplitReductionApply(sr);
161: }
162: return(0);
163: }
165: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
166: {
170: switch (sr->state) {
171: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
172: PetscSplitReductionApply(sr);
173: break;
174: case STATE_PENDING:
175: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
176: PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
177: if (sr->request != MPI_REQUEST_NULL) {
178: MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
179: }
180: sr->state = STATE_END;
181: if (sr->mix) {
182: PetscInt i;
183: for (i=0; i<sr->numopsbegin; i++) { sr->gvalues[i] = sr->gvalues_mix[i].v; }
184: sr->mix = PETSC_FALSE;
185: }
186: PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
187: break;
188: default: break; /* everything is already done */
189: }
190: return(0);
191: }
193: /*
194: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
195: */
196: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
197: {
199: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
200: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
201: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
202: MPI_Comm comm = sr->comm;
203: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
206: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
207: PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
208: MPI_Comm_size(sr->comm,&size);
209: if (size == 1) {
210: PetscArraycpy(gvalues,lvalues,numops);
211: } else {
212: /* determine if all reductions are sum, max, or min */
213: for (i=0; i<numops; i++) {
214: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
215: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
216: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
217: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
218: }
219: if (sum_flg + max_flg + min_flg > 1) {
220: if (sr->mix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
221: for (i=0; i<numops; i++) { sr->lvalues_mix[i].v = lvalues[i]; sr->lvalues_mix[i].i = reducetype[i]; }
222: MPIU_Allreduce(sr->lvalues_mix,sr->gvalues_mix,numops,MPIU_SCALAR_INT,PetscSplitReduction_Op,comm);
223: for (i=0; i<numops; i++) { sr->gvalues[i] = sr->gvalues_mix[i].v; }
224: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
225: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
226: } else if (min_flg) {
227: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
228: } else {
229: MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
230: }
231: }
232: sr->state = STATE_END;
233: sr->numopsend = 0;
234: PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
235: return(0);
236: }
238: /*
239: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
240: */
241: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
242: {
243: struct PetscScalarInt { PetscScalar v; PetscInt i; };
244: PetscErrorCode ierr;
245: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
246: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
247: struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt*)sr->lvalues_mix;
248: struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt*)sr->gvalues_mix;
249: void **invecs = sr->invecs;
252: sr->maxops = 2*maxops;
253: PetscMalloc6(2*maxops,&sr->lvalues,2*maxops,&sr->gvalues,2*maxops,&sr->reducetype,2*maxops,&sr->invecs,2*maxops,&sr->lvalues_mix,2*maxops,&sr->gvalues_mix);
254: PetscArraycpy(sr->lvalues,lvalues,maxops);
255: PetscArraycpy(sr->gvalues,gvalues,maxops);
256: PetscArraycpy(sr->reducetype,reducetype,maxops);
257: PetscArraycpy(sr->invecs,invecs,maxops);
258: PetscArraycpy(sr->lvalues_mix,lvalues_mix,maxops);
259: PetscArraycpy(sr->gvalues_mix,gvalues_mix,maxops);
260: PetscFree6(lvalues,gvalues,reducetype,invecs,lvalues_mix,gvalues_mix);
261: return(0);
262: }
264: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
265: {
269: PetscFree6(sr->lvalues,sr->gvalues,sr->reducetype,sr->invecs,sr->lvalues_mix,sr->gvalues_mix);
270: PetscFree(sr);
271: return(0);
272: }
274: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
276: /*
277: Private routine to delete internal storage when a communicator is freed.
278: This is called by MPI, not by users.
280: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
281: it was MPI_Comm *comm.
282: */
283: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
284: {
288: PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
289: PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
290: return(0);
291: }
293: /*
294: PetscSplitReductionGet - Gets the split reduction object from a
295: PETSc vector, creates if it does not exit.
297: */
298: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
299: {
301: PetscMPIInt flag;
304: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
305: /*
306: The calling sequence of the 2nd argument to this function changed
307: between MPI Standard 1.0 and the revisions 1.1 Here we match the
308: new standard, if you are using an MPI implementation that uses
309: the older version you will get a warning message about the next line;
310: it is only a warning message and should do no harm.
311: */
312: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,NULL);
313: }
314: MPI_Comm_get_attr(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
315: if (!flag) { /* doesn't exist yet so create it and put it in */
316: PetscSplitReductionCreate(comm,sr);
317: MPI_Comm_set_attr(comm,Petsc_Reduction_keyval,*sr);
318: PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
319: }
320: return(0);
321: }
323: /* ----------------------------------------------------------------------------------------------------*/
325: /*@
326: VecDotBegin - Starts a split phase dot product computation.
328: Input Parameters:
329: + x - the first vector
330: . y - the second vector
331: - result - where the result will go (can be NULL)
333: Level: advanced
335: Notes:
336: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
338: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
339: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
340: @*/
341: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
342: {
343: PetscErrorCode ierr;
344: PetscSplitReduction *sr;
345: MPI_Comm comm;
350: PetscObjectGetComm((PetscObject)x,&comm);
351: PetscSplitReductionGet(comm,&sr);
352: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
353: if (sr->numopsbegin >= sr->maxops) {
354: PetscSplitReductionExtend(sr);
355: }
356: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
357: sr->invecs[sr->numopsbegin] = (void*)x;
358: if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
359: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
360: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
361: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
362: return(0);
363: }
365: /*@
366: VecDotEnd - Ends a split phase dot product computation.
368: Input Parameters:
369: + x - the first vector (can be NULL)
370: . y - the second vector (can be NULL)
371: - result - where the result will go
373: Level: advanced
375: Notes:
376: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
378: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
379: VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()
381: @*/
382: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
383: {
384: PetscErrorCode ierr;
385: PetscSplitReduction *sr;
386: MPI_Comm comm;
389: PetscObjectGetComm((PetscObject)x,&comm);
390: PetscSplitReductionGet(comm,&sr);
391: PetscSplitReductionEnd(sr);
393: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
394: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
395: if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
396: *result = sr->gvalues[sr->numopsend++];
398: /*
399: We are finished getting all the results so reset to no outstanding requests
400: */
401: if (sr->numopsend == sr->numopsbegin) {
402: sr->state = STATE_BEGIN;
403: sr->numopsend = 0;
404: sr->numopsbegin = 0;
405: sr->mix = PETSC_FALSE;
406: }
407: return(0);
408: }
410: /*@
411: VecTDotBegin - Starts a split phase transpose dot product computation.
413: Input Parameters:
414: + x - the first vector
415: . y - the second vector
416: - result - where the result will go (can be NULL)
418: Level: advanced
420: Notes:
421: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
423: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
424: VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
426: @*/
427: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
428: {
429: PetscErrorCode ierr;
430: PetscSplitReduction *sr;
431: MPI_Comm comm;
434: PetscObjectGetComm((PetscObject)x,&comm);
435: PetscSplitReductionGet(comm,&sr);
436: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
437: if (sr->numopsbegin >= sr->maxops) {
438: PetscSplitReductionExtend(sr);
439: }
440: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
441: sr->invecs[sr->numopsbegin] = (void*)x;
442: if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
443: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
444: (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
445: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
446: return(0);
447: }
449: /*@
450: VecTDotEnd - Ends a split phase transpose dot product computation.
452: Input Parameters:
453: + x - the first vector (can be NULL)
454: . y - the second vector (can be NULL)
455: - result - where the result will go
457: Level: advanced
459: Notes:
460: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
462: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
463: VecDotBegin(), VecDotEnd()
464: @*/
465: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
466: {
470: /*
471: TDotEnd() is the same as DotEnd() so reuse the code
472: */
473: VecDotEnd(x,y,result);
474: return(0);
475: }
477: /* -------------------------------------------------------------------------*/
479: /*@
480: VecNormBegin - Starts a split phase norm computation.
482: Input Parameters:
483: + x - the first vector
484: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
485: - result - where the result will go (can be NULL)
487: Level: advanced
489: Notes:
490: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
492: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
494: @*/
495: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
496: {
497: PetscErrorCode ierr;
498: PetscSplitReduction *sr;
499: PetscReal lresult[2];
500: MPI_Comm comm;
504: PetscObjectGetComm((PetscObject)x,&comm);
505: PetscSplitReductionGet(comm,&sr);
506: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
507: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
508: PetscSplitReductionExtend(sr);
509: }
511: sr->invecs[sr->numopsbegin] = (void*)x;
512: if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
513: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
514: (*x->ops->norm_local)(x,ntype,lresult);
515: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
516: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
517: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
518: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
519: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
520: sr->lvalues[sr->numopsbegin++] = lresult[0];
521: if (ntype == NORM_1_AND_2) {
522: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
523: sr->lvalues[sr->numopsbegin++] = lresult[1];
524: }
525: return(0);
526: }
528: /*@
529: VecNormEnd - Ends a split phase norm computation.
531: Input Parameters:
532: + x - the first vector
533: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
534: - result - where the result will go
536: Level: advanced
538: Notes:
539: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
541: The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value
543: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
545: @*/
546: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
547: {
548: PetscErrorCode ierr;
549: PetscSplitReduction *sr;
550: MPI_Comm comm;
554: PetscObjectGetComm((PetscObject)x,&comm);
555: PetscSplitReductionGet(comm,&sr);
556: PetscSplitReductionEnd(sr);
558: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
559: if ((void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
560: if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
561: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
563: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
564: else if (ntype == NORM_1_AND_2) {
565: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
566: result[1] = PetscSqrtReal(result[1]);
567: }
568: if (ntype!=NORM_1_AND_2) {
569: PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
570: }
572: if (sr->numopsend == sr->numopsbegin) {
573: sr->state = STATE_BEGIN;
574: sr->numopsend = 0;
575: sr->numopsbegin = 0;
576: }
577: return(0);
578: }
580: /*
581: Possibly add
583: PetscReductionSumBegin/End()
584: PetscReductionMaxBegin/End()
585: PetscReductionMinBegin/End()
586: or have more like MPI with a single function with flag for Op? Like first better
587: */
589: /*@
590: VecMDotBegin - Starts a split phase multiple dot product computation.
592: Input Parameters:
593: + x - the first vector
594: . nv - number of vectors
595: . y - array of vectors
596: - result - where the result will go (can be NULL)
598: Level: advanced
600: Notes:
601: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
603: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
604: VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
605: @*/
606: PetscErrorCode VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
607: {
608: PetscErrorCode ierr;
609: PetscSplitReduction *sr;
610: MPI_Comm comm;
611: int i;
614: PetscObjectGetComm((PetscObject)x,&comm);
615: PetscSplitReductionGet(comm,&sr);
616: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
617: for (i=0; i<nv; i++) {
618: if (sr->numopsbegin+i >= sr->maxops) {
619: PetscSplitReductionExtend(sr);
620: }
621: sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
622: sr->invecs[sr->numopsbegin+i] = (void*)x;
623: }
624: if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
625: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
626: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
627: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
628: sr->numopsbegin += nv;
629: return(0);
630: }
632: /*@
633: VecMDotEnd - Ends a split phase multiple dot product computation.
635: Input Parameters:
636: + x - the first vector (can be NULL)
637: . nv - number of vectors
638: - y - array of vectors (can be NULL)
640: Output Parameters:
641: . result - where the result will go
643: Level: advanced
645: Notes:
646: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
648: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
649: VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
651: @*/
652: PetscErrorCode VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
653: {
654: PetscErrorCode ierr;
655: PetscSplitReduction *sr;
656: MPI_Comm comm;
657: int i;
660: PetscObjectGetComm((PetscObject)x,&comm);
661: PetscSplitReductionGet(comm,&sr);
662: PetscSplitReductionEnd(sr);
664: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
665: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
666: if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
667: for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];
669: /*
670: We are finished getting all the results so reset to no outstanding requests
671: */
672: if (sr->numopsend == sr->numopsbegin) {
673: sr->state = STATE_BEGIN;
674: sr->numopsend = 0;
675: sr->numopsbegin = 0;
676: }
677: return(0);
678: }
680: /*@
681: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
683: Input Parameters:
684: + x - the first vector
685: . nv - number of vectors
686: . y - array of vectors
687: - result - where the result will go (can be NULL)
689: Level: advanced
691: Notes:
692: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
694: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
695: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
697: @*/
698: PetscErrorCode VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
699: {
700: PetscErrorCode ierr;
701: PetscSplitReduction *sr;
702: MPI_Comm comm;
703: int i;
706: PetscObjectGetComm((PetscObject)x,&comm);
707: PetscSplitReductionGet(comm,&sr);
708: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
709: for (i=0; i<nv; i++) {
710: if (sr->numopsbegin+i >= sr->maxops) {
711: PetscSplitReductionExtend(sr);
712: }
713: sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
714: sr->invecs[sr->numopsbegin+i] = (void*)x;
715: }
716: if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
717: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
718: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
719: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
720: sr->numopsbegin += nv;
721: return(0);
722: }
724: /*@
725: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
727: Input Parameters:
728: + x - the first vector (can be NULL)
729: . nv - number of vectors
730: - y - array of vectors (can be NULL)
732: Output Parameters:
733: . result - where the result will go
735: Level: advanced
737: Notes:
738: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
740: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
741: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
742: @*/
743: PetscErrorCode VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
744: {
748: /*
749: MTDotEnd() is the same as MDotEnd() so reuse the code
750: */
751: VecMDotEnd(x,nv,y,result);
752: return(0);
753: }