Actual source code: vechip.hip.cpp

  1: /*
  2:  Implementation of the sequential hip vectors.

  4:  This file contains the code that can be compiled with a C
  5:  compiler.  The companion file vechip2.hip.cpp contains the code that
  6:  must be compiled with hipcc compiler.
  7:  */

  9: #define PETSC_SKIP_SPINLOCK

 11: #include <petscconf.h>
 12: #include <petsc/private/vecimpl.h>
 13: #include <../src/vec/vec/impls/dvecimpl.h>
 14: #include <petsc/private/hipvecimpl.h>

 16: PetscErrorCode VecHIPGetArrays_Private(Vec v,const PetscScalar** x,const PetscScalar** x_d,PetscOffloadMask* flg)
 17: {
 20:   if (x) {
 21:     Vec_Seq *h = (Vec_Seq*)v->data;

 23:     *x = h->array;
 24:   }
 25:   if (x_d) {
 26:     Vec_HIP *d = (Vec_HIP*)v->spptr;

 28:     *x_d = d ? d->GPUarray : NULL;
 29:   }
 30:   if (flg) *flg = v->offloadmask;
 31:   return(0);
 32: }

 34: /*
 35:     Allocates space for the vector array on the Host if it does not exist.
 36:     Does NOT change the PetscHIPFlag for the vector
 37:     Does NOT zero the HIP array
 38:  */
 39: PetscErrorCode VecHIPAllocateCheckHost(Vec v)
 40: {
 42:   PetscScalar    *array;
 43:   Vec_Seq        *s = (Vec_Seq*)v->data;
 44:   PetscInt       n = v->map->n;

 47:   if (!s) {
 48:     PetscNewLog((PetscObject)v,&s);
 49:     v->data = s;
 50:   }
 51:   if (!s->array) {
 52:     if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
 53:       PetscMallocSetHIPHost();
 54:       v->pinned_memory = PETSC_TRUE;
 55:     }
 56:     PetscMalloc1(n,&array);
 57:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
 58:     s->array           = array;
 59:     s->array_allocated = array;
 60:     if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
 61:       PetscMallocResetHIPHost();
 62:     }
 63:     if (v->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
 64:       v->offloadmask = PETSC_OFFLOAD_CPU;
 65:     }
 66:   }
 67:   return(0);
 68: }

 70: PetscErrorCode VecCopy_SeqHIP_Private(Vec xin,Vec yin)
 71: {
 72:   PetscScalar       *ya;
 73:   const PetscScalar *xa;
 74:   PetscErrorCode    ierr;

 77:   VecHIPAllocateCheckHost(xin);
 78:   VecHIPAllocateCheckHost(yin);
 79:   if (xin != yin) {
 80:     VecGetArrayRead(xin,&xa);
 81:     VecGetArray(yin,&ya);
 82:     PetscArraycpy(ya,xa,xin->map->n);
 83:     VecRestoreArrayRead(xin,&xa);
 84:     VecRestoreArray(yin,&ya);
 85:   }
 86:   return(0);
 87: }

 89: PetscErrorCode VecSetRandom_SeqHIP(Vec xin,PetscRandom r)
 90: {
 92:   PetscInt       n = xin->map->n;
 93:   PetscScalar    *xx;

 96:   VecGetArrayWrite(xin,&xx);
 97:   PetscRandomGetValues(r,n,xx);
 98:   VecRestoreArrayWrite(xin,&xx);
 99:   return(0);
100: }

102: PetscErrorCode VecDestroy_SeqHIP_Private(Vec v)
103: {
104:   Vec_Seq        *vs = (Vec_Seq*)v->data;

108:   PetscObjectSAWsViewOff(v);
109: #if defined(PETSC_USE_LOG)
110:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
111: #endif
112:   if (vs) {
113:     if (vs->array_allocated) {
114:       if (v->pinned_memory) {
115:         PetscMallocSetHIPHost();
116:       }
117:       PetscFree(vs->array_allocated);
118:       if (v->pinned_memory) {
119:         PetscMallocResetHIPHost();
120:         v->pinned_memory = PETSC_FALSE;
121:       }
122:     }
123:     PetscFree(vs);
124:   }
125:   return(0);
126: }

128: PetscErrorCode VecResetArray_SeqHIP_Private(Vec vin)
129: {
130:   Vec_Seq *v = (Vec_Seq*)vin->data;

133:   v->array         = v->unplacedarray;
134:   v->unplacedarray = 0;
135:   return(0);
136: }

138: PetscErrorCode VecResetArray_SeqHIP(Vec vin)
139: {

143:   VecHIPCopyFromGPU(vin);
144:   VecResetArray_SeqHIP_Private(vin);
145:   vin->offloadmask = PETSC_OFFLOAD_CPU;
146:   return(0);
147: }

149: PetscErrorCode VecPlaceArray_SeqHIP(Vec vin,const PetscScalar *a)
150: {

154:   VecHIPCopyFromGPU(vin);
155:   VecPlaceArray_Seq(vin,a);
156:   vin->offloadmask = PETSC_OFFLOAD_CPU;
157:   return(0);
158: }

160: PetscErrorCode VecReplaceArray_SeqHIP(Vec vin,const PetscScalar *a)
161: {
163:   Vec_Seq        *vs = (Vec_Seq*)vin->data;

166:   if (vs->array != vs->array_allocated) {
167:     /* make sure the users array has the latest values */
168:     VecHIPCopyFromGPU(vin);
169:   }
170:   if (vs->array_allocated) {
171:     if (vin->pinned_memory) {
172:       PetscMallocSetHIPHost();
173:     }
174:     PetscFree(vs->array_allocated);
175:     if (vin->pinned_memory) {
176:       PetscMallocResetHIPHost();
177:     }
178:   }
179:   vin->pinned_memory = PETSC_FALSE;
180:   vs->array_allocated = vs->array = (PetscScalar*)a;
181:   vin->offloadmask = PETSC_OFFLOAD_CPU;
182:   return(0);
183: }

185: /*@
186:  VecCreateSeqHIP - Creates a standard, sequential array-style vector.

188:  Collective

190:  Input Parameter:
191:  +  comm - the communicator, should be PETSC_COMM_SELF
192:  -  n - the vector length

194:  Output Parameter:
195:  .  v - the vector

197:  Notes:
198:  Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
199:  same type as an existing vector.

201:  Level: intermediate

203:  .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
204:  @*/
205: PetscErrorCode VecCreateSeqHIP(MPI_Comm comm,PetscInt n,Vec *v)
206: {

210:   VecCreate(comm,v);
211:   VecSetSizes(*v,n,n);
212:   VecSetType(*v,VECSEQHIP);
213:   return(0);
214: }

216: PetscErrorCode VecDuplicate_SeqHIP(Vec win,Vec *V)
217: {

221:   VecCreateSeqHIP(PetscObjectComm((PetscObject)win),win->map->n,V);
222:   PetscLayoutReference(win->map,&(*V)->map);
223:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
224:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
225:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
226:   return(0);
227: }

229: PetscErrorCode VecCreate_SeqHIP(Vec V)
230: {

234:   PetscHIPInitializeCheck();
235:   PetscLayoutSetUp(V->map);
236:   VecHIPAllocateCheck(V);
237:   VecCreate_SeqHIP_Private(V,((Vec_HIP*)V->spptr)->GPUarray_allocated);
238:   VecHIPAllocateCheckHost(V);
239:   VecSet(V,0.0);
240:   VecSet_Seq(V,0.0);
241:   V->offloadmask = PETSC_OFFLOAD_BOTH;
242:   return(0);
243: }

245: /*@C
246:    VecCreateSeqHIPWithArray - Creates a HIP sequential array-style vector,
247:    where the user provides the array space to store the vector values. The array
248:    provided must be a GPU array.

250:    Collective

252:    Input Parameters:
253: +  comm - the communicator, should be PETSC_COMM_SELF
254: .  bs - the block size
255: .  n - the vector length
256: -  array - GPU memory where the vector elements are to be stored.

258:    Output Parameter:
259: .  V - the vector

261:    Notes:
262:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
263:    same type as an existing vector.

265:    If the user-provided array is NULL, then VecHIPPlaceArray() can be used
266:    at a later stage to SET the array for storing the vector values.

268:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
269:    The user should not free the array until the vector is destroyed.

271:    Level: intermediate

273: .seealso: VecCreateMPIHIPWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
274:           VecCreateGhost(), VecCreateSeq(), VecHIPPlaceArray(), VecCreateSeqWithArray(),
275:           VecCreateMPIWithArray()
276: @*/
277: PetscErrorCode  VecCreateSeqHIPWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec *V)
278: {

282:   PetscHIPInitializeCheck();
283:   VecCreate(comm,V);
284:   VecSetSizes(*V,n,n);
285:   VecSetBlockSize(*V,bs);
286:   VecCreate_SeqHIP_Private(*V,array);
287:   return(0);
288: }

290: /*@C
291:    VecCreateSeqHIPWithArrays - Creates a HIP sequential array-style vector,
292:    where the user provides the array space to store the vector values.

294:    Collective

296:    Input Parameters:
297: +  comm - the communicator, should be PETSC_COMM_SELF
298: .  bs - the block size
299: .  n - the vector length
300: -  cpuarray - CPU memory where the vector elements are to be stored.
301: -  gpuarray - GPU memory where the vector elements are to be stored.

303:    Output Parameter:
304: .  V - the vector

306:    Notes:
307:    If both cpuarray and gpuarray are provided, the caller must ensure that
308:    the provided arrays have identical values.

310:    PETSc does NOT free the provided arrays when the vector is destroyed via
311:    VecDestroy(). The user should not free the array until the vector is
312:    destroyed.

314:    Level: intermediate

316: .seealso: VecCreateMPIHIPWithArrays(), VecCreate(), VecCreateSeqWithArray(),
317:           VecHIPPlaceArray(), VecCreateSeqHIPWithArray(),
318:           VecHIPAllocateCheckHost()
319: @*/
320: PetscErrorCode  VecCreateSeqHIPWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec *V)
321: {

325:   // set V's gpuarray to be gpuarray, do not allocate memory on host yet.
326:   VecCreateSeqHIPWithArray(comm,bs,n,gpuarray,V);

328:   if (cpuarray && gpuarray) {
329:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
330:     s->array = (PetscScalar*)cpuarray;
331:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
332:   } else if (cpuarray) {
333:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
334:     s->array = (PetscScalar*)cpuarray;
335:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
336:   } else if (gpuarray) {
337:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
338:   } else {
339:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
340:   }

342:   return(0);
343: }

345: PetscErrorCode VecGetArray_SeqHIP(Vec v,PetscScalar **a)
346: {

350:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
351:     VecHIPCopyFromGPU(v);
352:   } else {
353:     VecHIPAllocateCheckHost(v);
354:   }
355:   *a = *((PetscScalar**)v->data);
356:   return(0);
357: }

359: PetscErrorCode VecRestoreArray_SeqHIP(Vec v,PetscScalar **a)
360: {
362:   v->offloadmask = PETSC_OFFLOAD_CPU;
363:   return(0);
364: }

366: PetscErrorCode VecGetArrayWrite_SeqHIP(Vec v,PetscScalar **a)
367: {

371:   VecHIPAllocateCheckHost(v);
372:   *a   = *((PetscScalar**)v->data);
373:   return(0);
374: }

376: PetscErrorCode VecGetArrayAndMemType_SeqHIP(Vec v,PetscScalar** a,PetscMemType *mtype)
377: {

381:   if (v->offloadmask & PETSC_OFFLOAD_GPU) { /* Prefer working on GPU when offloadmask is PETSC_OFFLOAD_BOTH */
382:     *a = ((Vec_HIP*)v->spptr)->GPUarray;
383:     v->offloadmask    = PETSC_OFFLOAD_GPU; /* Change the mask once GPU gets write access, don't wait until restore array */
384:     if (mtype) *mtype = PETSC_MEMTYPE_HIP;
385:   } else {
386:     VecHIPAllocateCheckHost(v);
387:     *a = *((PetscScalar**)v->data);
388:     if (mtype) *mtype = PETSC_MEMTYPE_HOST;
389:   }
390:   return(0);
391: }

393: PetscErrorCode VecRestoreArrayAndMemType_SeqHIP(Vec v,PetscScalar** a)
394: {
396:   if (v->offloadmask & PETSC_OFFLOAD_GPU) {
397:     v->offloadmask = PETSC_OFFLOAD_GPU;
398:   } else {
399:     v->offloadmask = PETSC_OFFLOAD_CPU;
400:   }
401:   return(0);
402: }

404: PetscErrorCode VecBindToCPU_SeqHIP(Vec V,PetscBool pin)
405: {

409:   V->boundtocpu = pin;
410:   if (pin) {
411:     VecHIPCopyFromGPU(V);
412:     V->offloadmask                 = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
413:     V->ops->dot                    = VecDot_Seq;
414:     V->ops->norm                   = VecNorm_Seq;
415:     V->ops->tdot                   = VecTDot_Seq;
416:     V->ops->scale                  = VecScale_Seq;
417:     V->ops->copy                   = VecCopy_Seq;
418:     V->ops->set                    = VecSet_Seq;
419:     V->ops->swap                   = VecSwap_Seq;
420:     V->ops->axpy                   = VecAXPY_Seq;
421:     V->ops->axpby                  = VecAXPBY_Seq;
422:     V->ops->axpbypcz               = VecAXPBYPCZ_Seq;
423:     V->ops->pointwisemult          = VecPointwiseMult_Seq;
424:     V->ops->pointwisedivide        = VecPointwiseDivide_Seq;
425:     V->ops->setrandom              = VecSetRandom_Seq;
426:     V->ops->dot_local              = VecDot_Seq;
427:     V->ops->tdot_local             = VecTDot_Seq;
428:     V->ops->norm_local             = VecNorm_Seq;
429:     V->ops->mdot_local             = VecMDot_Seq;
430:     V->ops->mtdot_local            = VecMTDot_Seq;
431:     V->ops->maxpy                  = VecMAXPY_Seq;
432:     V->ops->mdot                   = VecMDot_Seq;
433:     V->ops->mtdot                  = VecMTDot_Seq;
434:     V->ops->aypx                   = VecAYPX_Seq;
435:     V->ops->waxpy                  = VecWAXPY_Seq;
436:     V->ops->dotnorm2               = NULL;
437:     V->ops->placearray             = VecPlaceArray_Seq;
438:     V->ops->replacearray           = VecReplaceArray_SeqHIP;
439:     V->ops->resetarray             = VecResetArray_Seq;
440:     V->ops->duplicate              = VecDuplicate_Seq;
441:     V->ops->conjugate              = VecConjugate_Seq;
442:     V->ops->getlocalvector         = NULL;
443:     V->ops->restorelocalvector     = NULL;
444:     V->ops->getlocalvectorread     = NULL;
445:     V->ops->restorelocalvectorread = NULL;
446:     V->ops->getarraywrite          = NULL;
447:     V->ops->max                    = VecMax_Seq;
448:     V->ops->min                    = VecMin_Seq;
449:     V->ops->reciprocal             = VecReciprocal_Default;
450:     V->ops->sum                    = NULL;
451:     V->ops->shift                  = NULL;
452:   } else {
453:     V->ops->dot                    = VecDot_SeqHIP;
454:     V->ops->norm                   = VecNorm_SeqHIP;
455:     V->ops->tdot                   = VecTDot_SeqHIP;
456:     V->ops->scale                  = VecScale_SeqHIP;
457:     V->ops->copy                   = VecCopy_SeqHIP;
458:     V->ops->set                    = VecSet_SeqHIP;
459:     V->ops->swap                   = VecSwap_SeqHIP;
460:     V->ops->axpy                   = VecAXPY_SeqHIP;
461:     V->ops->axpby                  = VecAXPBY_SeqHIP;
462:     V->ops->axpbypcz               = VecAXPBYPCZ_SeqHIP;
463:     V->ops->pointwisemult          = VecPointwiseMult_SeqHIP;
464:     V->ops->pointwisedivide        = VecPointwiseDivide_SeqHIP;
465:     V->ops->setrandom              = VecSetRandom_SeqHIP;
466:     V->ops->dot_local              = VecDot_SeqHIP;
467:     V->ops->tdot_local             = VecTDot_SeqHIP;
468:     V->ops->norm_local             = VecNorm_SeqHIP;
469:     V->ops->mdot_local             = VecMDot_SeqHIP;
470:     V->ops->maxpy                  = VecMAXPY_SeqHIP;
471:     V->ops->mdot                   = VecMDot_SeqHIP;
472:     V->ops->aypx                   = VecAYPX_SeqHIP;
473:     V->ops->waxpy                  = VecWAXPY_SeqHIP;
474:     V->ops->dotnorm2               = VecDotNorm2_SeqHIP;
475:     V->ops->placearray             = VecPlaceArray_SeqHIP;
476:     V->ops->replacearray           = VecReplaceArray_SeqHIP;
477:     V->ops->resetarray             = VecResetArray_SeqHIP;
478:     V->ops->destroy                = VecDestroy_SeqHIP;
479:     V->ops->duplicate              = VecDuplicate_SeqHIP;
480:     V->ops->conjugate              = VecConjugate_SeqHIP;
481:     V->ops->getlocalvector         = VecGetLocalVector_SeqHIP;
482:     V->ops->restorelocalvector     = VecRestoreLocalVector_SeqHIP;
483:     V->ops->getlocalvectorread     = VecGetLocalVectorRead_SeqHIP;
484:     V->ops->restorelocalvectorread = VecRestoreLocalVectorRead_SeqHIP;
485:     V->ops->getarraywrite          = VecGetArrayWrite_SeqHIP;
486:     V->ops->getarray               = VecGetArray_SeqHIP;
487:     V->ops->restorearray           = VecRestoreArray_SeqHIP;
488:     V->ops->getarrayandmemtype     = VecGetArrayAndMemType_SeqHIP;
489:     V->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqHIP;
490:     V->ops->max                    = VecMax_SeqHIP;
491:     V->ops->min                    = VecMin_SeqHIP;
492:     V->ops->reciprocal             = VecReciprocal_SeqHIP;
493:     V->ops->sum                    = VecSum_SeqHIP;
494:     V->ops->shift                  = VecShift_SeqHIP;
495:   }
496:   return(0);
497: }

499: PetscErrorCode VecCreate_SeqHIP_Private(Vec V,const PetscScalar *array)
500: {
502:   Vec_HIP       *vechip;
503:   PetscMPIInt    size;
504:   PetscBool      option_set;

507:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
508:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQHIP on more than one process");
509:   VecCreate_Seq_Private(V,0);
510:   PetscObjectChangeTypeName((PetscObject)V,VECSEQHIP);
511:   VecBindToCPU_SeqHIP(V,PETSC_FALSE);
512:   V->ops->bindtocpu = VecBindToCPU_SeqHIP;

514:   /* Later, functions check for the Vec_HIP structure existence, so do not create it without array */
515:   if (array) {
516:     if (!V->spptr) {
517:       PetscReal pinned_memory_min;
518:       PetscCalloc(sizeof(Vec_HIP),&V->spptr);
519:       vechip = (Vec_HIP*)V->spptr;
520:       V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;

522:       pinned_memory_min = 0;
523:       /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
524:          Note: This same code duplicated in VecHIPAllocateCheck() and VecCreate_MPIHIP_Private(). Is there a good way to avoid this? */
525:       PetscOptionsBegin(PetscObjectComm((PetscObject)V),((PetscObject)V)->prefix,"VECHIP Options","Vec");
526:       PetscOptionsReal("-vec_pinned_memory_min","Minimum size (in bytes) for an allocation to use pinned memory on host","VecSetPinnedMemoryMin",pinned_memory_min,&pinned_memory_min,&option_set);
527:       if (option_set) V->minimum_bytes_pinned_memory = pinned_memory_min;
528:       PetscOptionsEnd();
529:     }
530:     vechip = (Vec_HIP*)V->spptr;
531:     vechip->GPUarray = (PetscScalar*)array;
532:     V->offloadmask = PETSC_OFFLOAD_GPU;

534:   }
535:   return(0);
536: }