Actual source code: mpicuda.cu


  2: /*
  3:    This file contains routines for Parallel vector operations.
  4:  */
  5: #define PETSC_SKIP_SPINLOCK

  7: #include <petscconf.h>
  8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  9: #include <petsc/private/cudavecimpl.h>

 11: /*MC
 12:    VECCUDA - VECCUDA = "cuda" - A VECSEQCUDA on a single-process communicator, and VECMPICUDA otherwise.

 14:    Options Database Keys:
 15: . -vec_type cuda - sets the vector type to VECCUDA during a call to VecSetFromOptions()

 17:   Level: beginner

 19: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECSEQCUDA, VECMPICUDA, VECSTANDARD, VecType, VecCreateMPI(), VecSetPinnedMemoryMin()
 20: M*/

 22: PetscErrorCode VecDestroy_MPICUDA(Vec v)
 23: {
 24:   Vec_MPI        *vecmpi = (Vec_MPI*)v->data;
 25:   Vec_CUDA       *veccuda;
 27:   cudaError_t    err;

 30:   if (v->spptr) {
 31:     veccuda = (Vec_CUDA*)v->spptr;
 32:     if (veccuda->GPUarray_allocated) {
 33:       err = cudaFree(((Vec_CUDA*)v->spptr)->GPUarray_allocated);CHKERRCUDA(err);
 34:       veccuda->GPUarray_allocated = NULL;
 35:     }
 36:     if (veccuda->stream) {
 37:       err = cudaStreamDestroy(((Vec_CUDA*)v->spptr)->stream);CHKERRCUDA(err);
 38:     }
 39:     if (v->pinned_memory) {
 40:       PetscMallocSetCUDAHost();
 41:       PetscFree(vecmpi->array_allocated);
 42:       PetscMallocResetCUDAHost();
 43:       v->pinned_memory = PETSC_FALSE;
 44:     }
 45:     PetscFree(v->spptr);
 46:   }
 47:   VecDestroy_MPI(v);
 48:   return(0);
 49: }

 51: PetscErrorCode VecNorm_MPICUDA(Vec xin,NormType type,PetscReal *z)
 52: {
 53:   PetscReal      sum,work = 0.0;

 57:   if (type == NORM_2 || type == NORM_FROBENIUS) {
 58:     VecNorm_SeqCUDA(xin,NORM_2,&work);
 59:     work *= work;
 60:     MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 61:     *z    = PetscSqrtReal(sum);
 62:   } else if (type == NORM_1) {
 63:     /* Find the local part */
 64:     VecNorm_SeqCUDA(xin,NORM_1,&work);
 65:     /* Find the global max */
 66:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 67:   } else if (type == NORM_INFINITY) {
 68:     /* Find the local max */
 69:     VecNorm_SeqCUDA(xin,NORM_INFINITY,&work);
 70:     /* Find the global max */
 71:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
 72:   } else if (type == NORM_1_AND_2) {
 73:     PetscReal temp[2];
 74:     VecNorm_SeqCUDA(xin,NORM_1,temp);
 75:     VecNorm_SeqCUDA(xin,NORM_2,temp+1);
 76:     temp[1] = temp[1]*temp[1];
 77:     MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 78:     z[1] = PetscSqrtReal(z[1]);
 79:   }
 80:   return(0);
 81: }

 83: PetscErrorCode VecDot_MPICUDA(Vec xin,Vec yin,PetscScalar *z)
 84: {
 85:   PetscScalar    sum,work;

 89:   VecDot_SeqCUDA(xin,yin,&work);
 90:   MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 91:   *z   = sum;
 92:   return(0);
 93: }

 95: PetscErrorCode VecTDot_MPICUDA(Vec xin,Vec yin,PetscScalar *z)
 96: {
 97:   PetscScalar    sum,work;

101:   VecTDot_SeqCUDA(xin,yin,&work);
102:   MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
103:   *z   = sum;
104:   return(0);
105: }

107: PetscErrorCode VecMDot_MPICUDA(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
108: {
109:   PetscScalar    awork[128],*work = awork;

113:   if (nv > 128) {
114:     PetscMalloc1(nv,&work);
115:   }
116:   VecMDot_SeqCUDA(xin,nv,y,work);
117:   MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
118:   if (nv > 128) {
119:     PetscFree(work);
120:   }
121:   return(0);
122: }

124: /*MC
125:    VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA

127:    Options Database Keys:
128: . -vec_type mpicuda - sets the vector type to VECMPICUDA during a call to VecSetFromOptions()

130:   Level: beginner

132: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI, VecType, VecCreateMPI(), VecSetPinnedMemoryMin()
133: M*/

135: PetscErrorCode VecDuplicate_MPICUDA(Vec win,Vec *v)
136: {
138:   Vec_MPI        *vw,*w = (Vec_MPI*)win->data;
139:   PetscScalar    *array;

142:   VecCreate(PetscObjectComm((PetscObject)win),v);
143:   PetscLayoutReference(win->map,&(*v)->map);

145:   VecCreate_MPICUDA_Private(*v,PETSC_TRUE,w->nghost,0);
146:   vw   = (Vec_MPI*)(*v)->data;
147:   PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));

149:   /* save local representation of the parallel vector (and scatter) if it exists */
150:   if (w->localrep) {
151:     VecGetArray(*v,&array);
152:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
153:     PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
154:     VecRestoreArray(*v,&array);
155:     PetscLogObjectParent((PetscObject)*v,(PetscObject)vw->localrep);
156:     vw->localupdate = w->localupdate;
157:     if (vw->localupdate) {
158:       PetscObjectReference((PetscObject)vw->localupdate);
159:     }
160:   }

162:   /* New vector should inherit stashing property of parent */
163:   (*v)->stash.donotstash   = win->stash.donotstash;
164:   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;

166:   /* change type_name appropriately */
167:   VecCUDAAllocateCheck(*v);
168:   PetscObjectChangeTypeName((PetscObject)(*v),VECMPICUDA);

170:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
171:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
172:   (*v)->map->bs   = PetscAbs(win->map->bs);
173:   (*v)->bstash.bs = win->bstash.bs;
174:   return(0);
175: }

177: PetscErrorCode VecDotNorm2_MPICUDA(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
178: {
180:   PetscScalar    work[2],sum[2];

183:   VecDotNorm2_SeqCUDA(s,t,work,work+1);
184:   MPIU_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));
185:   *dp  = sum[0];
186:   *nm  = sum[1];
187:   return(0);
188: }

190: PetscErrorCode VecCreate_MPICUDA(Vec vv)
191: {

195:   PetscCUDAInitializeCheck();
196:   PetscLayoutSetUp(vv->map);
197:   VecCUDAAllocateCheck(vv);
198:   VecCreate_MPICUDA_Private(vv,PETSC_FALSE,0,((Vec_CUDA*)vv->spptr)->GPUarray_allocated);
199:   VecCUDAAllocateCheckHost(vv);
200:   VecSet(vv,0.0);
201:   VecSet_Seq(vv,0.0);
202:   vv->offloadmask = PETSC_OFFLOAD_BOTH;
203:   return(0);
204: }

206: PetscErrorCode VecCreate_CUDA(Vec v)
207: {
209:   PetscMPIInt    size;

212:   MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);
213:   if (size == 1) {
214:     VecSetType(v,VECSEQCUDA);
215:   } else {
216:     VecSetType(v,VECMPICUDA);
217:   }
218:   return(0);
219: }

221: /*@
222:  VecCreateMPICUDA - Creates a standard, parallel array-style vector for CUDA devices.

224:  Collective

226:  Input Parameters:
227:  +  comm - the MPI communicator to use
228:  .  n - local vector length (or PETSC_DECIDE to have calculated if N is given)
229:  -  N - global vector length (or PETSC_DETERMINE to have calculated if n is given)

231:     Output Parameter:
232:  .  v - the vector

234:     Notes:
235:     Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
236:     same type as an existing vector.

238:     Level: intermediate

240:  .seealso: VecCreateMPICUDAWithArray(), VecCreateMPICUDAWithArrays(), VecCreateSeqCUDA(), VecCreateSeq(),
241:            VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
242:            VecCreateMPIWithArray(), VecCreateGhostWithArray(), VecMPISetGhost()

244:  @*/
245:  PetscErrorCode VecCreateMPICUDA(MPI_Comm comm,PetscInt n,PetscInt N,Vec *v)
246:  {

250:    VecCreate(comm,v);
251:    VecSetSizes(*v,n,N);
252:    VecSetType(*v,VECMPICUDA);
253:    return(0);
254:  }

256: /*@C
257:    VecCreateMPICUDAWithArray - Creates a parallel, array-style vector,
258:    where the user provides the GPU array space to store the vector values.

260:    Collective

262:    Input Parameters:
263: +  comm  - the MPI communicator to use
264: .  bs    - block size, same meaning as VecSetBlockSize()
265: .  n     - local vector length, cannot be PETSC_DECIDE
266: .  N     - global vector length (or PETSC_DECIDE to have calculated)
267: -  array - the user provided GPU array to store the vector values

269:    Output Parameter:
270: .  vv - the vector

272:    Notes:
273:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
274:    same type as an existing vector.

276:    If the user-provided array is NULL, then VecCUDAPlaceArray() can be used
277:    at a later stage to SET the array for storing the vector values.

279:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
280:    The user should not free the array until the vector is destroyed.

282:    Level: intermediate

284: .seealso: VecCreateMPICUDA(), VecCreateSeqCUDAWithArray(), VecCreateMPIWithArray(), VecCreateSeqWithArray(),
285:           VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
286:           VecCreateMPI(), VecCreateGhostWithArray(), VecPlaceArray()

288: @*/
289: PetscErrorCode  VecCreateMPICUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar array[],Vec *vv)
290: {

294:   if (n == PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must set local size of vector");
295:   PetscCUDAInitializeCheck();
296:   VecCreate(comm,vv);
297:   VecSetSizes(*vv,n,N);
298:   VecSetBlockSize(*vv,bs);
299:   VecCreate_MPICUDA_Private(*vv,PETSC_FALSE,0,array);
300:   return(0);
301: }

303: /*@C
304:    VecCreateMPICUDAWithArrays - Creates a parallel, array-style vector,
305:    where the user provides the GPU array space to store the vector values.

307:    Collective

309:    Input Parameters:
310: +  comm  - the MPI communicator to use
311: .  bs    - block size, same meaning as VecSetBlockSize()
312: .  n     - local vector length, cannot be PETSC_DECIDE
313: .  N     - global vector length (or PETSC_DECIDE to have calculated)
314: -  cpuarray - the user provided CPU array to store the vector values
315: -  gpuarray - the user provided GPU array to store the vector values

317:    Output Parameter:
318: .  vv - the vector

320:    Notes:
321:    If both cpuarray and gpuarray are provided, the caller must ensure that
322:    the provided arrays have identical values.

324:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
325:    same type as an existing vector.

327:    PETSc does NOT free the provided arrays when the vector is destroyed via
328:    VecDestroy(). The user should not free the array until the vector is
329:    destroyed.

331:    Level: intermediate

333: .seealso: VecCreateSeqCUDAWithArrays(), VecCreateMPIWithArray(), VecCreateSeqWithArray(),
334:           VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
335:           VecCreateMPI(), VecCreateGhostWithArray(), VecCUDAPlaceArray(), VecPlaceArray(),
336:           VecCUDAAllocateCheckHost()
337: @*/
338: PetscErrorCode  VecCreateMPICUDAWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec *vv)
339: {

343:   VecCreateMPICUDAWithArray(comm,bs,n,N,gpuarray,vv);

345:   if (cpuarray && gpuarray) {
346:     Vec_MPI *s         = (Vec_MPI*)((*vv)->data);
347:     s->array           = (PetscScalar*)cpuarray;
348:     (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
349:   } else if (cpuarray) {
350:     Vec_MPI *s         = (Vec_MPI*)((*vv)->data);
351:     s->array           = (PetscScalar*)cpuarray;
352:     (*vv)->offloadmask =  PETSC_OFFLOAD_CPU;
353:   } else if (gpuarray) {
354:     (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
355:   } else {
356:     (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
357:   }

359:   return(0);
360: }

362: PetscErrorCode VecMax_MPICUDA(Vec xin,PetscInt *idx,PetscReal *z)
363: {
365:   PetscReal      work;

368:   VecMax_SeqCUDA(xin,idx,&work);
369: #if defined(PETSC_HAVE_MPIUNI)
370:   *z = work;
371: #else
372:   if (!idx) {
373:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
374:   } else {
375:     struct { PetscReal v; PetscInt i; } in,out;

377:     in.v  = work;
378:     in.i  = *idx + xin->map->rstart;
379:     MPIU_Allreduce(&in,&out,1,MPIU_REAL_INT,MPIU_MAXLOC,PetscObjectComm((PetscObject)xin));
380:     *z    = out.v;
381:     *idx  = out.i;
382:   }
383: #endif
384:   return(0);
385: }

387: PetscErrorCode VecMin_MPICUDA(Vec xin,PetscInt *idx,PetscReal *z)
388: {
390:   PetscReal      work;

393:   VecMin_SeqCUDA(xin,idx,&work);
394: #if defined(PETSC_HAVE_MPIUNI)
395:   *z = work;
396: #else
397:   if (!idx) {
398:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)xin));
399:   } else {
400:     struct { PetscReal v; PetscInt i; } in,out;

402:     in.v  = work;
403:     in.i  = *idx + xin->map->rstart;
404:     MPIU_Allreduce(&in,&out,1,MPIU_REAL_INT,MPIU_MINLOC,PetscObjectComm((PetscObject)xin));
405:     *z    = out.v;
406:     *idx  = out.i;
407:   }
408: #endif
409:   return(0);
410: }

412: PetscErrorCode VecBindToCPU_MPICUDA(Vec V,PetscBool pin)
413: {

417:   V->boundtocpu = pin;
418:   if (pin) {
419:     VecCUDACopyFromGPU(V);
420:     V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
421:     V->ops->dotnorm2               = NULL;
422:     V->ops->waxpy                  = VecWAXPY_Seq;
423:     V->ops->dot                    = VecDot_MPI;
424:     V->ops->mdot                   = VecMDot_MPI;
425:     V->ops->tdot                   = VecTDot_MPI;
426:     V->ops->norm                   = VecNorm_MPI;
427:     V->ops->scale                  = VecScale_Seq;
428:     V->ops->copy                   = VecCopy_Seq;
429:     V->ops->set                    = VecSet_Seq;
430:     V->ops->swap                   = VecSwap_Seq;
431:     V->ops->axpy                   = VecAXPY_Seq;
432:     V->ops->axpby                  = VecAXPBY_Seq;
433:     V->ops->maxpy                  = VecMAXPY_Seq;
434:     V->ops->aypx                   = VecAYPX_Seq;
435:     V->ops->axpbypcz               = VecAXPBYPCZ_Seq;
436:     V->ops->pointwisemult          = VecPointwiseMult_Seq;
437:     V->ops->setrandom              = VecSetRandom_Seq;
438:     V->ops->placearray             = VecPlaceArray_Seq;
439:     V->ops->replacearray           = VecReplaceArray_SeqCUDA;
440:     V->ops->resetarray             = VecResetArray_Seq;
441:     V->ops->dot_local              = VecDot_Seq;
442:     V->ops->tdot_local             = VecTDot_Seq;
443:     V->ops->norm_local             = VecNorm_Seq;
444:     V->ops->mdot_local             = VecMDot_Seq;
445:     V->ops->pointwisedivide        = VecPointwiseDivide_Seq;
446:     V->ops->getlocalvector         = NULL;
447:     V->ops->restorelocalvector     = NULL;
448:     V->ops->getlocalvectorread     = NULL;
449:     V->ops->restorelocalvectorread = NULL;
450:     V->ops->getarraywrite          = NULL;
451:     V->ops->max                    = VecMax_MPI;
452:     V->ops->min                    = VecMin_MPI;
453:     V->ops->reciprocal             = VecReciprocal_Default;
454:     V->ops->sum                    = NULL;
455:     V->ops->shift                  = NULL;
456:     /* default random number generator */
457:     PetscFree(V->defaultrandtype);
458:     PetscStrallocpy(PETSCRANDER48,&V->defaultrandtype);
459:   } else {
460:     V->ops->dotnorm2               = VecDotNorm2_MPICUDA;
461:     V->ops->waxpy                  = VecWAXPY_SeqCUDA;
462:     V->ops->duplicate              = VecDuplicate_MPICUDA;
463:     V->ops->dot                    = VecDot_MPICUDA;
464:     V->ops->mdot                   = VecMDot_MPICUDA;
465:     V->ops->tdot                   = VecTDot_MPICUDA;
466:     V->ops->norm                   = VecNorm_MPICUDA;
467:     V->ops->scale                  = VecScale_SeqCUDA;
468:     V->ops->copy                   = VecCopy_SeqCUDA;
469:     V->ops->set                    = VecSet_SeqCUDA;
470:     V->ops->swap                   = VecSwap_SeqCUDA;
471:     V->ops->axpy                   = VecAXPY_SeqCUDA;
472:     V->ops->axpby                  = VecAXPBY_SeqCUDA;
473:     V->ops->maxpy                  = VecMAXPY_SeqCUDA;
474:     V->ops->aypx                   = VecAYPX_SeqCUDA;
475:     V->ops->axpbypcz               = VecAXPBYPCZ_SeqCUDA;
476:     V->ops->pointwisemult          = VecPointwiseMult_SeqCUDA;
477:     V->ops->setrandom              = VecSetRandom_SeqCUDA;
478:     V->ops->placearray             = VecPlaceArray_SeqCUDA;
479:     V->ops->replacearray           = VecReplaceArray_SeqCUDA;
480:     V->ops->resetarray             = VecResetArray_SeqCUDA;
481:     V->ops->dot_local              = VecDot_SeqCUDA;
482:     V->ops->tdot_local             = VecTDot_SeqCUDA;
483:     V->ops->norm_local             = VecNorm_SeqCUDA;
484:     V->ops->mdot_local             = VecMDot_SeqCUDA;
485:     V->ops->destroy                = VecDestroy_MPICUDA;
486:     V->ops->pointwisedivide        = VecPointwiseDivide_SeqCUDA;
487:     V->ops->getlocalvector         = VecGetLocalVector_SeqCUDA;
488:     V->ops->restorelocalvector     = VecRestoreLocalVector_SeqCUDA;
489:     V->ops->getlocalvectorread     = VecGetLocalVectorRead_SeqCUDA;
490:     V->ops->restorelocalvectorread = VecRestoreLocalVectorRead_SeqCUDA;
491:     V->ops->getarraywrite          = VecGetArrayWrite_SeqCUDA;
492:     V->ops->getarray               = VecGetArray_SeqCUDA;
493:     V->ops->restorearray           = VecRestoreArray_SeqCUDA;
494:     V->ops->getarrayandmemtype     = VecGetArrayAndMemType_SeqCUDA;
495:     V->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqCUDA;
496:     V->ops->max                    = VecMax_MPICUDA;
497:     V->ops->min                    = VecMin_MPICUDA;
498:     V->ops->reciprocal             = VecReciprocal_SeqCUDA;
499:     V->ops->sum                    = VecSum_SeqCUDA;
500:     V->ops->shift                  = VecShift_SeqCUDA;
501:     /* default random number generator */
502:     PetscFree(V->defaultrandtype);
503:     PetscStrallocpy(PETSCCURAND,&V->defaultrandtype);
504:   }
505:   return(0);
506: }

508: PetscErrorCode VecCreate_MPICUDA_Private(Vec vv,PetscBool alloc,PetscInt nghost,const PetscScalar array[])
509: {
511:   Vec_CUDA       *veccuda;

514:   VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);
515:   PetscObjectChangeTypeName((PetscObject)vv,VECMPICUDA);

517:   VecBindToCPU_MPICUDA(vv,PETSC_FALSE);
518:   vv->ops->bindtocpu = VecBindToCPU_MPICUDA;

520:   /* Later, functions check for the Vec_CUDA structure existence, so do not create it without array */
521:   if (alloc && !array) {
522:     VecCUDAAllocateCheck(vv);
523:     VecCUDAAllocateCheckHost(vv);
524:     VecSet(vv,0.0);
525:     VecSet_Seq(vv,0.0);
526:     vv->offloadmask = PETSC_OFFLOAD_BOTH;
527:   }
528:   if (array) {
529:     if (!vv->spptr) {
530:       PetscReal pinned_memory_min;
531:       PetscBool flag;
532:       /* Cannot use PetscNew() here because spptr is void* */
533:       PetscCalloc(sizeof(Vec_CUDA),&vv->spptr);
534:       veccuda = (Vec_CUDA*)vv->spptr;
535:       vv->minimum_bytes_pinned_memory = 0;

537:       /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
538:          Note: This same code duplicated in VecCreate_SeqCUDA_Private() and VecCUDAAllocateCheck(). Is there a good way to avoid this? */
539:       PetscOptionsBegin(PetscObjectComm((PetscObject)vv),((PetscObject)vv)->prefix,"VECCUDA Options","Vec");
540:       pinned_memory_min = vv->minimum_bytes_pinned_memory;
541:       PetscOptionsReal("-vec_pinned_memory_min","Minimum size (in bytes) for an allocation to use pinned memory on host","VecSetPinnedMemoryMin",pinned_memory_min,&pinned_memory_min,&flag);
542:       if (flag) vv->minimum_bytes_pinned_memory = pinned_memory_min;
543:       PetscOptionsEnd();
544:     }
545:     veccuda = (Vec_CUDA*)vv->spptr;
546:     veccuda->GPUarray = (PetscScalar*)array;
547:     vv->offloadmask = PETSC_OFFLOAD_GPU;
548:   }
549:   return(0);
550: }