Actual source code: mpiviennacl.cxx
2: /*
3: This file contains routines for Parallel vector operations.
4: */
5: #include <petscconf.h>
6: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
7: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
9: /*MC
10: VECVIENNACL - VECVIENNACL = "viennacl" - A VECSEQVIENNACL on a single-process communicator, and VECMPIVIENNACL otherwise.
12: Options Database Keys:
13: . -vec_type viennacl - sets the vector type to VECVIENNACL during a call to VecSetFromOptions()
15: Level: beginner
17: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECSEQVIENNACL, VECMPIVIENNACL, VECSTANDARD, VecType, VecCreateMPI(), VecCreateMPI()
18: M*/
20: PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
21: {
25: try {
26: if (v->spptr) {
27: delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
28: delete (Vec_ViennaCL*) v->spptr;
29: }
30: } catch(std::exception const & ex) {
31: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
32: }
33: VecDestroy_MPI(v);
34: return(0);
35: }
37: PetscErrorCode VecNorm_MPIViennaCL(Vec xin,NormType type,PetscReal *z)
38: {
39: PetscReal sum,work = 0.0;
43: if (type == NORM_2 || type == NORM_FROBENIUS) {
44: VecNorm_SeqViennaCL(xin,NORM_2,&work);
45: work *= work;
46: MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
47: *z = PetscSqrtReal(sum);
48: } else if (type == NORM_1) {
49: /* Find the local part */
50: VecNorm_SeqViennaCL(xin,NORM_1,&work);
51: /* Find the global max */
52: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
53: } else if (type == NORM_INFINITY) {
54: /* Find the local max */
55: VecNorm_SeqViennaCL(xin,NORM_INFINITY,&work);
56: /* Find the global max */
57: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
58: } else if (type == NORM_1_AND_2) {
59: PetscReal temp[2];
60: VecNorm_SeqViennaCL(xin,NORM_1,temp);
61: VecNorm_SeqViennaCL(xin,NORM_2,temp+1);
62: temp[1] = temp[1]*temp[1];
63: MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
64: z[1] = PetscSqrtReal(z[1]);
65: }
66: return(0);
67: }
69: PetscErrorCode VecDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
70: {
71: PetscScalar sum,work;
75: VecDot_SeqViennaCL(xin,yin,&work);
76: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
77: *z = sum;
78: return(0);
79: }
81: PetscErrorCode VecTDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
82: {
83: PetscScalar sum,work;
87: VecTDot_SeqViennaCL(xin,yin,&work);
88: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
89: *z = sum;
90: return(0);
91: }
93: PetscErrorCode VecMDot_MPIViennaCL(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
94: {
95: PetscScalar awork[128],*work = awork;
99: if (nv > 128) {
100: PetscMalloc1(nv,&work);
101: }
102: VecMDot_SeqViennaCL(xin,nv,y,work);
103: MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
104: if (nv > 128) {
105: PetscFree(work);
106: }
107: return(0);
108: }
110: /*MC
111: VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL
113: Options Database Keys:
114: . -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()
116: Level: beginner
118: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateMPI()
119: M*/
121: PetscErrorCode VecDuplicate_MPIViennaCL(Vec win,Vec *v)
122: {
124: Vec_MPI *vw,*w = (Vec_MPI*)win->data;
125: PetscScalar *array;
128: VecCreate(PetscObjectComm((PetscObject)win),v);
129: PetscLayoutReference(win->map,&(*v)->map);
131: VecCreate_MPI_Private(*v,PETSC_FALSE,w->nghost,0);
132: vw = (Vec_MPI*)(*v)->data;
133: PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));
135: /* save local representation of the parallel vector (and scatter) if it exists */
136: if (w->localrep) {
137: VecGetArray(*v,&array);
138: VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
139: PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
140: VecRestoreArray(*v,&array);
141: PetscLogObjectParent((PetscObject)*v,(PetscObject)vw->localrep);
142: vw->localupdate = w->localupdate;
143: if (vw->localupdate) {
144: PetscObjectReference((PetscObject)vw->localupdate);
145: }
146: }
148: /* New vector should inherit stashing property of parent */
149: (*v)->stash.donotstash = win->stash.donotstash;
150: (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
152: /* change type_name appropriately */
153: PetscObjectChangeTypeName((PetscObject)(*v),VECMPIVIENNACL);
155: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
156: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
157: (*v)->map->bs = PetscAbs(win->map->bs);
158: (*v)->bstash.bs = win->bstash.bs;
159: return(0);
160: }
162: PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
163: {
165: PetscScalar work[2],sum[2];
168: VecDotNorm2_SeqViennaCL(s,t,work,work+1);
169: MPIU_Allreduce((void*)&work,(void*)&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));
170: *dp = sum[0];
171: *nm = sum[1];
172: return(0);
173: }
175: PetscErrorCode VecBindToCPU_MPIViennaCL(Vec vv, PetscBool pin)
176: {
179: vv->boundtocpu = pin;
181: if (pin) {
182: VecViennaCLCopyFromGPU(vv);
183: vv->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
184: vv->ops->dotnorm2 = NULL;
185: vv->ops->waxpy = VecWAXPY_Seq;
186: vv->ops->dot = VecDot_MPI;
187: vv->ops->mdot = VecMDot_MPI;
188: vv->ops->tdot = VecTDot_MPI;
189: vv->ops->norm = VecNorm_MPI;
190: vv->ops->scale = VecScale_Seq;
191: vv->ops->copy = VecCopy_Seq;
192: vv->ops->set = VecSet_Seq;
193: vv->ops->swap = VecSwap_Seq;
194: vv->ops->axpy = VecAXPY_Seq;
195: vv->ops->axpby = VecAXPBY_Seq;
196: vv->ops->maxpy = VecMAXPY_Seq;
197: vv->ops->aypx = VecAYPX_Seq;
198: vv->ops->axpbypcz = VecAXPBYPCZ_Seq;
199: vv->ops->pointwisemult = VecPointwiseMult_Seq;
200: vv->ops->setrandom = VecSetRandom_Seq;
201: vv->ops->placearray = VecPlaceArray_Seq;
202: vv->ops->replacearray = VecReplaceArray_Seq;
203: vv->ops->resetarray = VecResetArray_Seq;
204: vv->ops->dot_local = VecDot_Seq;
205: vv->ops->tdot_local = VecTDot_Seq;
206: vv->ops->norm_local = VecNorm_Seq;
207: vv->ops->mdot_local = VecMDot_Seq;
208: vv->ops->pointwisedivide = VecPointwiseDivide_Seq;
209: vv->ops->getlocalvector = NULL;
210: vv->ops->restorelocalvector = NULL;
211: vv->ops->getlocalvectorread = NULL;
212: vv->ops->restorelocalvectorread = NULL;
213: vv->ops->getarraywrite = NULL;
214: } else {
215: vv->ops->dotnorm2 = VecDotNorm2_MPIViennaCL;
216: vv->ops->waxpy = VecWAXPY_SeqViennaCL;
217: vv->ops->duplicate = VecDuplicate_MPIViennaCL;
218: vv->ops->dot = VecDot_MPIViennaCL;
219: vv->ops->mdot = VecMDot_MPIViennaCL;
220: vv->ops->tdot = VecTDot_MPIViennaCL;
221: vv->ops->norm = VecNorm_MPIViennaCL;
222: vv->ops->scale = VecScale_SeqViennaCL;
223: vv->ops->copy = VecCopy_SeqViennaCL;
224: vv->ops->set = VecSet_SeqViennaCL;
225: vv->ops->swap = VecSwap_SeqViennaCL;
226: vv->ops->axpy = VecAXPY_SeqViennaCL;
227: vv->ops->axpby = VecAXPBY_SeqViennaCL;
228: vv->ops->maxpy = VecMAXPY_SeqViennaCL;
229: vv->ops->aypx = VecAYPX_SeqViennaCL;
230: vv->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
231: vv->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
232: vv->ops->setrandom = VecSetRandom_SeqViennaCL;
233: vv->ops->dot_local = VecDot_SeqViennaCL;
234: vv->ops->tdot_local = VecTDot_SeqViennaCL;
235: vv->ops->norm_local = VecNorm_SeqViennaCL;
236: vv->ops->mdot_local = VecMDot_SeqViennaCL;
237: vv->ops->destroy = VecDestroy_MPIViennaCL;
238: vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
239: vv->ops->placearray = VecPlaceArray_SeqViennaCL;
240: vv->ops->replacearray = VecReplaceArray_SeqViennaCL;
241: vv->ops->resetarray = VecResetArray_SeqViennaCL;
242: vv->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
243: vv->ops->getarray = VecGetArray_SeqViennaCL;
244: vv->ops->restorearray = VecRestoreArray_SeqViennaCL;
245: }
246: return(0);
247: }
249: PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
250: {
254: PetscLayoutSetUp(vv->map);
255: VecViennaCLAllocateCheck(vv);
256: VecCreate_MPIViennaCL_Private(vv,PETSC_FALSE,0,((Vec_ViennaCL*)(vv->spptr))->GPUarray);
257: VecViennaCLAllocateCheckHost(vv);
258: VecSet(vv,0.0);
259: VecSet_Seq(vv,0.0);
260: vv->offloadmask = PETSC_OFFLOAD_BOTH;
261: return(0);
262: }
264: PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
265: {
267: PetscMPIInt size;
270: MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);
271: if (size == 1) {
272: VecSetType(v,VECSEQVIENNACL);
273: } else {
274: VecSetType(v,VECMPIVIENNACL);
275: }
276: return(0);
277: }
279: /*@C
280: VecCreateMPIViennaCLWithArray - Creates a parallel, array-style vector,
281: where the user provides the viennacl vector to store the vector values.
283: Collective
285: Input Parameters:
286: + comm - the MPI communicator to use
287: . bs - block size, same meaning as VecSetBlockSize()
288: . n - local vector length, cannot be PETSC_DECIDE
289: . N - global vector length (or PETSC_DECIDE to have calculated)
290: - array - the user provided GPU array to store the vector values
292: Output Parameter:
293: . vv - the vector
295: Notes:
296: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
297: same type as an existing vector.
299: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
300: at a later stage to SET the array for storing the vector values.
302: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
303: The user should not free the array until the vector is destroyed.
305: Level: intermediate
307: .seealso: VecCreateSeqViennaCLWithArray(), VecCreateMPIWithArray(), VecCreateSeqWithArray(),
308: VecCreate(), VecCreateMPI(), VecCreateGhostWithArray(), VecViennaCLPlaceArray()
310: @*/
311: PetscErrorCode VecCreateMPIViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const ViennaCLVector *array,Vec *vv)
312: {
316: if (n == PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must set local size of vector");
317: PetscSplitOwnership(comm,&n,&N);
318: VecCreate(comm,vv);
319: VecSetSizes(*vv,n,N);
320: VecSetBlockSize(*vv,bs);
321: VecCreate_MPIViennaCL_Private(*vv,PETSC_FALSE,0,array);
322: return(0);
323: }
325: /*@C
326: VecCreateMPIViennaCLWithArrays - Creates a parallel, array-style vector,
327: where the user provides the ViennaCL vector to store the vector values.
329: Collective
331: Input Parameters:
332: + comm - the MPI communicator to use
333: . bs - block size, same meaning as VecSetBlockSize()
334: . n - local vector length, cannot be PETSC_DECIDE
335: . N - global vector length (or PETSC_DECIDE to have calculated)
336: - cpuarray - the user provided CPU array to store the vector values
337: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
339: Output Parameter:
340: . vv - the vector
342: Notes:
343: If both cpuarray and viennaclvec are provided, the caller must ensure that
344: the provided arrays have identical values.
346: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
347: same type as an existing vector.
349: PETSc does NOT free the provided arrays when the vector is destroyed via
350: VecDestroy(). The user should not free the array until the vector is
351: destroyed.
353: Level: intermediate
355: .seealso: VecCreateSeqViennaCLWithArrays(), VecCreateMPIWithArray()
356: VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
357: VecCreateMPI(), VecCreateGhostWithArray(), VecViennaCLPlaceArray(),
358: VecPlaceArray(), VecCreateMPICUDAWithArrays(),
359: VecViennaCLAllocateCheckHost()
360: @*/
361: PetscErrorCode VecCreateMPIViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar cpuarray[],const ViennaCLVector *viennaclvec,Vec *vv)
362: {
366: VecCreateMPIViennaCLWithArray(comm,bs,n,N,viennaclvec,vv);
368: if (cpuarray && viennaclvec) {
369: Vec_MPI *s = (Vec_MPI*)((*vv)->data);
370: s->array = (PetscScalar*)cpuarray;
371: (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
372: } else if (cpuarray) {
373: Vec_MPI *s = (Vec_MPI*)((*vv)->data);
374: s->array = (PetscScalar*)cpuarray;
375: (*vv)->offloadmask = PETSC_OFFLOAD_CPU;
376: } else if (viennaclvec) {
377: (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
378: } else {
379: (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
380: }
382: return(0);
383: }
385: PetscErrorCode VecCreate_MPIViennaCL_Private(Vec vv,PetscBool alloc,PetscInt nghost,const ViennaCLVector *array)
386: {
388: Vec_ViennaCL *vecviennacl;
391: VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);
392: PetscObjectChangeTypeName((PetscObject)vv,VECMPIVIENNACL);
394: VecBindToCPU_MPIViennaCL(vv,PETSC_FALSE);
395: vv->ops->bindtocpu = VecBindToCPU_MPIViennaCL;
397: if (alloc && !array) {
398: VecViennaCLAllocateCheck(vv);
399: VecViennaCLAllocateCheckHost(vv);
400: VecSet(vv,0.0);
401: VecSet_Seq(vv,0.0);
402: vv->offloadmask = PETSC_OFFLOAD_BOTH;
403: }
404: if (array) {
405: if (!vv->spptr)
406: vv->spptr = new Vec_ViennaCL;
407: vecviennacl = (Vec_ViennaCL*)vv->spptr;
408: vecviennacl->GPUarray_allocated = 0;
409: vecviennacl->GPUarray = (ViennaCLVector*)array;
410: vv->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
411: }
413: return(0);
414: }