Actual source code: flow.c


  2: static char help[] = "FUN3D - 3-D, Unstructured Incompressible Euler Solver.\n\
  3: originally written by W. K. Anderson of NASA Langley, \n\
  4: and ported into PETSc by D. K. Kaushik, ODU and ICASE.\n\n";

  6: #include <petscsnes.h>
  7: #include <petsctime.h>
  8: #include <petscao.h>
  9: #include "user.h"
 10: #if defined(_OPENMP)
 11: #include "omp.h"
 12: #if !defined(HAVE_REDUNDANT_WORK)
 13: #include "metis.h"
 14: #endif
 15: #endif

 17: #define ICALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);
 18: #define FCALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);

 20: typedef struct {
 21:   Vec    qnew,qold,func;
 22:   double fnorm_ini,dt_ini,cfl_ini;
 23:   double ptime;
 24:   double cfl_max,max_time;
 25:   double fnorm,dt,cfl;
 26:   double fnorm_ratio;
 27:   int    ires,iramp,itstep;
 28:   int    max_steps,print_freq;
 29:   int    LocalTimeStepping;
 30: } TstepCtx;

 32: typedef struct {                               /*============================*/
 33:   GRID      *grid;                                 /* Pointer to Grid info       */
 34:   TstepCtx  *tsCtx;                                /* Pointer to Time Stepping Context */
 35:   PetscBool PreLoading;
 36: } AppCtx;                                      /*============================*/

 38: extern int  FormJacobian(SNES,Vec,Mat,Mat,void*),
 39:             FormFunction(SNES,Vec,Vec,void*),
 40:             FormInitialGuess(SNES,GRID*),
 41:             Update(SNES,void*),
 42:             ComputeTimeStep(SNES,int,void*),
 43:             GetLocalOrdering(GRID*),
 44:             SetPetscDS(GRID *,TstepCtx*);
 45: static PetscErrorCode WritePVTU(AppCtx*,const char*,PetscBool);
 46: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
 47: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncolor,int *ncount);
 48: #endif
 49: /* Global Variables */

 51:                                                /*============================*/
 52: CINFO  *c_info;                                /* Pointer to COMMON INFO     */
 53: CRUNGE *c_runge;                               /* Pointer to COMMON RUNGE    */
 54: CGMCOM *c_gmcom;                               /* Pointer to COMMON GMCOM    */
 55:                                                /*============================*/
 56: int  rank,size,rstart;
 57: REAL memSize = 0.0,grad_time = 0.0;
 58: #if defined(_OPENMP)
 59: int max_threads = 2,tot_threads,my_thread_id;
 60: #endif

 62: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
 63: int       event0,event1;
 64: Scalar    time_counters;
 65: long long counter0,counter1;
 66: #endif
 67: int  ntran[max_nbtran];        /* transition stuff put here to make global */
 68: REAL dxtran[max_nbtran];

 70: /* ======================== MAIN ROUTINE =================================== */
 71: /*                                                                           */
 72: /* Finite volume flux split solver for general polygons                      */
 73: /*                                                                           */
 74: /*===========================================================================*/

 76: int main(int argc,char **args)
 77: {
 78:   AppCtx      user;
 79:   GRID        f_pntr;
 80:   TstepCtx    tsCtx;
 81:   SNES        snes;                    /* nonlinear solver context */
 82:   Mat         Jpc;                     /* Jacobian and Preconditioner matrices */
 83:   PetscScalar *qnode;
 84:   int         ierr;
 85:   PetscBool   flg,write_pvtu,pvtu_base64;
 86:   MPI_Comm    comm;
 87:   PetscInt    maxfails                       = 10000;
 88:   char        pvtu_fname[PETSC_MAX_PATH_LEN] = "incomp";

 90:   PetscInitialize(&argc,&args,NULL,help);if (ierr) return ierr;
 91:   PetscInitializeFortran();
 92:   PetscOptionsInsertFile(PETSC_COMM_WORLD,"petsc.opt",PETSC_FALSE);

 94:   comm = PETSC_COMM_WORLD;
 95:   f77FORLINK();                               /* Link FORTRAN and C COMMONS */

 97:   MPI_Comm_rank(comm,&rank);
 98:   MPI_Comm_size(comm,&size);

100:   flg  = PETSC_FALSE;
101:   PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
102:   if (flg) {PetscMemorySetGetMaximumUsage();}

104:   /*======================================================================*/
105:   /* Initilize stuff related to time stepping */
106:   /*======================================================================*/
107:   tsCtx.fnorm_ini         = 0.0;  tsCtx.cfl_ini     = 50.0;    tsCtx.cfl_max = 1.0e+05;
108:   tsCtx.max_steps         = 50;   tsCtx.max_time    = 1.0e+12; tsCtx.iramp   = -50;
109:   tsCtx.dt                = -5.0; tsCtx.fnorm_ratio = 1.0e+10;
110:   tsCtx.LocalTimeStepping = 1;
111:   PetscOptionsGetInt(NULL,NULL,"-max_st",&tsCtx.max_steps,NULL);
112:   PetscOptionsGetReal(NULL,"-ts_rtol",&tsCtx.fnorm_ratio,NULL);
113:   PetscOptionsGetReal(NULL,"-cfl_ini",&tsCtx.cfl_ini,NULL);
114:   PetscOptionsGetReal(NULL,"-cfl_max",&tsCtx.cfl_max,NULL);
115:   tsCtx.print_freq        = tsCtx.max_steps;
116:   PetscOptionsGetInt(NULL,NULL,"-print_freq",&tsCtx.print_freq,&flg);
117:   PetscOptionsGetString(NULL,NULL,"-pvtu",pvtu_fname,sizeof(pvtu_fname),&write_pvtu);
118:   pvtu_base64             = PETSC_FALSE;
119:   PetscOptionsGetBool(NULL,NULL,"-pvtu_base64",&pvtu_base64,NULL);

121:   c_info->alpha = 3.0;
122:   c_info->beta  = 15.0;
123:   c_info->ivisc = 0;

125:   c_gmcom->ilu0  = 1;
126:   c_gmcom->nsrch = 10;

128:   c_runge->nitfo = 0;

130:   PetscMemzero(&f_pntr,sizeof(f_pntr));
131:   f_pntr.jvisc  = c_info->ivisc;
132:   f_pntr.ileast = 4;
133:   PetscOptionsGetReal(NULL,"-alpha",&c_info->alpha,NULL);
134:   PetscOptionsGetReal(NULL,"-beta",&c_info->beta,NULL);

136:   /*======================================================================*/

138:   /*Set the maximum number of threads for OpenMP */
139: #if defined(_OPENMP)
140:   PetscOptionsGetInt(NULL,NULL,"-max_threads",&max_threads,&flg);
141:   omp_set_num_threads(max_threads);
142:   PetscPrintf(comm,"Using %d threads for each MPI process\n",max_threads);
143: #endif

145:   /* Get the grid information into local ordering */
146:   GetLocalOrdering(&f_pntr);

148:   /* Allocate Memory for Some Other Grid Arrays */
149:   set_up_grid(&f_pntr);

151:   /* If using least squares for the gradients,calculate the r's */
152:   if (f_pntr.ileast == 4) f77SUMGS(&f_pntr.nnodesLoc,&f_pntr.nedgeLoc,f_pntr.eptr,f_pntr.xyz,f_pntr.rxy,&rank,&f_pntr.nvertices);

154:   user.grid  = &f_pntr;
155:   user.tsCtx = &tsCtx;

157:   /* SAWs Stuff */

159:   /*
160:     Preload the executable to get accurate timings. This runs the following chunk of
161:     code twice, first to get the executable pages into memory and the second time for
162:     accurate timings.
163:   */
164:   PetscPreLoadBegin(PETSC_TRUE,"Time integration");
165:   user.PreLoading = PetscPreLoading;

167:   /* Create nonlinear solver */
168:   SetPetscDS(&f_pntr,&tsCtx);
169:   SNESCreate(comm,&snes);
170:   SNESSetType(snes,"newtonls");

172:   /* Set various routines and options */
173:   SNESSetFunction(snes,user.grid->res,FormFunction,&user);
174:   flg  = PETSC_FALSE;
175:   PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
176:   if (flg) {
177:     /* Use matrix-free to define Newton system; use explicit (approx) Jacobian for preconditioner */
178:     MatCreateSNESMF(snes,&Jpc);
179:     SNESSetJacobian(snes,Jpc,user.grid->A,FormJacobian,&user);
180:   } else {
181:     /* Use explicit (approx) Jacobian to define Newton system and preconditioner */
182:     SNESSetJacobian(snes,user.grid->A,user.grid->A,FormJacobian,&user);
183:   }

185:   SNESSetMaxLinearSolveFailures(snes,maxfails);
186:   SNESSetFromOptions(snes);

188:   /* Initialize the flowfield */
189:   FormInitialGuess(snes,user.grid);

191:   /* Solve nonlinear system */
192:   Update(snes,&user);

194:   /* Write restart file */
195:   VecGetArray(user.grid->qnode,&qnode);
196:   /*f77WREST(&user.grid->nnodes,qnode,user.grid->turbre,user.grid->amut);*/

198:   /* Write Tecplot solution file */
199: #if 0
200:   if (rank == 0)
201:     f77TECFLO(&user.grid->nnodes,
202:               &user.grid->nnbound,&user.grid->nvbound,&user.grid->nfbound,
203:               &user.grid->nnfacet,&user.grid->nvfacet,&user.grid->nffacet,
204:               &user.grid->nsnode, &user.grid->nvnode, &user.grid->nfnode,
205:               c_info->title,
206:               user.grid->x,       user.grid->y,       user.grid->z,
207:               qnode,
208:               user.grid->nnpts,   user.grid->nntet,   user.grid->nvpts,
209:               user.grid->nvtet,   user.grid->nfpts,   user.grid->nftet,
210:               user.grid->f2ntn,   user.grid->f2ntv,   user.grid->f2ntf,
211:               user.grid->isnode,  user.grid->ivnode,  user.grid->ifnode,
212:               &rank);
213: #endif
214:   if (write_pvtu) {WritePVTU(&user,pvtu_fname,pvtu_base64);}

216:   /* Write residual,lift,drag,and moment history file */
217:   /*
218:     if (rank == 0) f77PLLAN(&user.grid->nnodes,&rank);
219:   */

221:   VecRestoreArray(user.grid->qnode,&qnode);
222:   flg  = PETSC_FALSE;
223:   PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
224:   if (flg) {
225:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage before destroying\n");
226:   }

228:   VecDestroy(&user.grid->qnode);
229:   VecDestroy(&user.grid->qnodeLoc);
230:   VecDestroy(&user.tsCtx->qold);
231:   VecDestroy(&user.tsCtx->func);
232:   VecDestroy(&user.grid->res);
233:   VecDestroy(&user.grid->grad);
234:   VecDestroy(&user.grid->gradLoc);
235:   MatDestroy(&user.grid->A);
236:   flg  = PETSC_FALSE;
237:   PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
238:   if (flg) { MatDestroy(&Jpc);}
239:   SNESDestroy(&snes);
240:   VecScatterDestroy(&user.grid->scatter);
241:   VecScatterDestroy(&user.grid->gradScatter);
242:   flg  = PETSC_FALSE;
243:   PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
244:   if (flg) {
245:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after destroying\n");
246:   }
247:   PetscPreLoadEnd();

249:   /* allocated in set_up_grid() */
250:   PetscFree(user.grid->isface);
251:   PetscFree(user.grid->ivface);
252:   PetscFree(user.grid->ifface);
253:   PetscFree(user.grid->us);
254:   PetscFree(user.grid->vs);
255:   PetscFree(user.grid->as);

257:   /* Allocated in GetLocalOrdering() */
258:   PetscFree(user.grid->eptr);
259:   PetscFree(user.grid->ia);
260:   PetscFree(user.grid->ja);
261:   PetscFree(user.grid->loc2glo);
262:   PetscFree(user.grid->loc2pet);
263:   PetscFree(user.grid->xyzn);
264: #if defined(_OPENMP)
265: #  if defined(HAVE_REDUNDANT_WORK)
266:   PetscFree(user.grid->resd);
267: #  else
268:   PetscFree(user.grid->part_thr);
269:   PetscFree(user.grid->nedge_thr);
270:   PetscFree(user.grid->edge_thr);
271:   PetscFree(user.grid->xyzn_thr);
272: #  endif
273: #endif
274:   PetscFree(user.grid->xyz);
275:   PetscFree(user.grid->area);

277:   PetscFree(user.grid->nntet);
278:   PetscFree(user.grid->nnpts);
279:   PetscFree(user.grid->f2ntn);
280:   PetscFree(user.grid->isnode);
281:   PetscFree(user.grid->sxn);
282:   PetscFree(user.grid->syn);
283:   PetscFree(user.grid->szn);
284:   PetscFree(user.grid->sa);
285:   PetscFree(user.grid->sface_bit);

287:   PetscFree(user.grid->nvtet);
288:   PetscFree(user.grid->nvpts);
289:   PetscFree(user.grid->f2ntv);
290:   PetscFree(user.grid->ivnode);
291:   PetscFree(user.grid->vxn);
292:   PetscFree(user.grid->vyn);
293:   PetscFree(user.grid->vzn);
294:   PetscFree(user.grid->va);
295:   PetscFree(user.grid->vface_bit);

297:   PetscFree(user.grid->nftet);
298:   PetscFree(user.grid->nfpts);
299:   PetscFree(user.grid->f2ntf);
300:   PetscFree(user.grid->ifnode);
301:   PetscFree(user.grid->fxn);
302:   PetscFree(user.grid->fyn);
303:   PetscFree(user.grid->fzn);
304:   PetscFree(user.grid->fa);
305:   PetscFree(user.grid->cdt);
306:   PetscFree(user.grid->phi);
307:   PetscFree(user.grid->rxy);

309:   PetscPrintf(comm,"Time taken in gradient calculation %g sec.\n",grad_time);

311:   PetscFinalize();
312:   return ierr;
313: }

315: /*---------------------------------------------------------------------*/
316: /* ---------------------  Form initial approximation ----------------- */
317: int FormInitialGuess(SNES snes,GRID *grid)
318: /*---------------------------------------------------------------------*/
319: {
320:   int         ierr;
321:   PetscScalar *qnode;

324:   VecGetArray(grid->qnode,&qnode);
325:   f77INIT(&grid->nnodesLoc,qnode,grid->turbre,grid->amut,&grid->nvnodeLoc,grid->ivnode,&rank);
326:   VecRestoreArray(grid->qnode,&qnode);
327:   return(0);
328: }

330: /*---------------------------------------------------------------------*/
331: /* ---------------------  Evaluate Function F(x) --------------------- */
332: int FormFunction(SNES snes,Vec x,Vec f,void *dummy)
333: /*---------------------------------------------------------------------*/
334: {
335:   AppCtx      *user  = (AppCtx*) dummy;
336:   GRID        *grid  = user->grid;
337:   TstepCtx    *tsCtx = user->tsCtx;
338:   PetscScalar *qnode,*res,*qold;
339:   PetscScalar *grad;
340:   PetscScalar temp;
341:   VecScatter  scatter     = grid->scatter;
342:   VecScatter  gradScatter = grid->gradScatter;
343:   Vec         localX      = grid->qnodeLoc;
344:   Vec         localGrad   = grid->gradLoc;
345:   int         i,j,in,ierr;
346:   int         nbface,ires;
347:   PetscScalar time_ini,time_fin;

350:   /* Get X into the local work vector */
351:   VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
352:   VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
353:   /* VecCopy(x,localX); */
354:   /* access the local work f,grad,and input */
355:   VecGetArray(f,&res);
356:   VecGetArray(grid->grad,&grad);
357:   VecGetArray(localX,&qnode);
358:   ires = tsCtx->ires;

360:   PetscTime(&time_ini);
361:   f77LSTGS(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,qnode,grad,grid->xyz,grid->rxy,
362:            &rank,&grid->nvertices);
363:   PetscTime(&time_fin);
364:   grad_time += time_fin - time_ini;
365:   VecRestoreArray(grid->grad,&grad);

367:   VecScatterBegin(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
368:   VecScatterEnd(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
369:   /*VecCopy(grid->grad,localGrad);*/

371:   VecGetArray(localGrad,&grad);
372:   nbface = grid->nsface + grid->nvface + grid->nfface;
373:   f77GETRES(&grid->nnodesLoc,&grid->ncell,  &grid->nedgeLoc,  &grid->nsface,
374:             &grid->nvface,&grid->nfface, &nbface,
375:             &grid->nsnodeLoc,&grid->nvnodeLoc, &grid->nfnodeLoc,
376:             grid->isface, grid->ivface,  grid->ifface, &grid->ileast,
377:             grid->isnode, grid->ivnode,  grid->ifnode,
378:             &grid->nnfacetLoc,grid->f2ntn,  &grid->nnbound,
379:             &grid->nvfacetLoc,grid->f2ntv,  &grid->nvbound,
380:             &grid->nffacetLoc,grid->f2ntf,  &grid->nfbound,
381:             grid->eptr,
382:             grid->sxn,    grid->syn,     grid->szn,
383:             grid->vxn,    grid->vyn,     grid->vzn,
384:             grid->fxn,    grid->fyn,     grid->fzn,
385:             grid->xyzn,
386:             qnode,        grid->cdt,
387:             grid->xyz,    grid->area,
388:             grad, res,
389:             grid->turbre,
390:             grid->slen,   grid->c2n,
391:             grid->c2e,
392:             grid->us,     grid->vs,      grid->as,
393:             grid->phi,
394:             grid->amut,   &ires,
395: #if defined(_OPENMP)
396:             &max_threads,
397: #if defined(HAVE_EDGE_COLORING)
398:             &grid->ncolor, grid->ncount,
399: #elif defined(HAVE_REDUNDANT_WORK)
400:             grid->resd,
401: #else
402:             &grid->nedgeAllThr,
403:             grid->part_thr,grid->nedge_thr,grid->edge_thr,grid->xyzn_thr,
404: #endif
405: #endif
406:             &tsCtx->LocalTimeStepping,&rank,&grid->nvertices);

408: /* Add the contribution due to time stepping */
409:   if (ires == 1) {
410:     VecGetArray(tsCtx->qold,&qold);
411: #if defined(INTERLACING)
412:     for (i = 0; i < grid->nnodesLoc; i++) {
413:       temp = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
414:       for (j = 0; j < 4; j++) {
415:         in       = 4*i + j;
416:         res[in] += temp*(qnode[in] - qold[in]);
417:       }
418:     }
419: #else
420:     for (j = 0; j < 4; j++) {
421:       for (i = 0; i < grid->nnodesLoc; i++) {
422:         temp     = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
423:         in       = grid->nnodesLoc*j + i;
424:         res[in] += temp*(qnode[in] - qold[in]);
425:       }
426:     }
427: #endif
428:     VecRestoreArray(tsCtx->qold,&qold);
429:   }
430:   VecRestoreArray(localX,&qnode);
431:   VecRestoreArray(f,&res);
432:   VecRestoreArray(localGrad,&grad);
433:   return(0);
434: }

436: /*---------------------------------------------------------------------*/
437: /* --------------------  Evaluate Jacobian F'(x) -------------------- */

439: int FormJacobian(SNES snes,Vec x,Mat Jac,Mat pc_mat,void *dummy)
440: /*---------------------------------------------------------------------*/
441: {
442:   AppCtx      *user  = (AppCtx*) dummy;
443:   GRID        *grid  = user->grid;
444:   TstepCtx    *tsCtx = user->tsCtx;
445:   Vec         localX = grid->qnodeLoc;
446:   PetscScalar *qnode;
447:   int         ierr;

450:   /*  VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
451:       VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD); */
452:   /* VecCopy(x,localX); */
453:   MatSetUnfactored(pc_mat);

455:   VecGetArray(localX,&qnode);
456:   f77FILLA(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,
457:            &grid->nsface,
458:             grid->isface,grid->fxn,grid->fyn,grid->fzn,
459:             grid->sxn,grid->syn,grid->szn,
460:            &grid->nsnodeLoc,&grid->nvnodeLoc,&grid->nfnodeLoc,grid->isnode,
461:             grid->ivnode,grid->ifnode,qnode,&pc_mat,grid->cdt,
462:             grid->area,grid->xyzn,&tsCtx->cfl,
463:            &rank,&grid->nvertices);
464:   VecRestoreArray(localX,&qnode);
465:   MatAssemblyBegin(Jac,MAT_FINAL_ASSEMBLY);
466:   MatAssemblyEnd(Jac,MAT_FINAL_ASSEMBLY);
467: #if defined(MATRIX_VIEW)
468:   if ((tsCtx->itstep != 0) &&(tsCtx->itstep % tsCtx->print_freq) == 0) {
469:     PetscViewer viewer;
470:     char mat_file[PETSC_MAX_PATH_LEN];
471:     sprintf(mat_file,"mat_bin.%d",tsCtx->itstep);
472:     PetscViewerBinaryOpen(MPI_COMM_WORLD,mat_file,FILE_MODE_WRITE,&viewer);
473:     MatView(pc_mat,viewer);
474:     PetscViewerDestroy(&viewer);
475:   }
476: #endif
477:   return(0);
478: }

480: /*---------------------------------------------------------------------*/
481: int Update(SNES snes,void *ctx)
482: /*---------------------------------------------------------------------*/
483: {
484:   AppCtx      *user   = (AppCtx*) ctx;
485:   GRID        *grid   = user->grid;
486:   TstepCtx    *tsCtx  = user->tsCtx;
487:   VecScatter  scatter = grid->scatter;
488:   Vec         localX  = grid->qnodeLoc;
489:   PetscScalar *qnode,*res;
490:   PetscScalar clift,cdrag,cmom;
491:   int         ierr,its;
492:   PetscScalar fratio;
493:   PetscScalar time1,time2,cpuloc,cpuglo;
494:   int         max_steps;
495:   PetscBool   print_flag = PETSC_FALSE;
496:   FILE        *fptr      = 0;
497:   int         nfailsCum  = 0,nfails = 0;
498:   /*Scalar         cpu_ini,cpu_fin,cpu_time;*/
499:   /*int            event0 = 14,event1 = 25,gen_start,gen_read;
500:   PetscScalar    time_start_counters,time_read_counters;
501:   long long      counter0,counter1;*/

504:   PetscOptionsGetBool(NULL,NULL,"-print",&print_flag,NULL);
505:   if (print_flag) {
506:     PetscFOpen(PETSC_COMM_WORLD,"history.out","w",&fptr);
507:     PetscFPrintf(PETSC_COMM_WORLD,fptr,"VARIABLES = iter,cfl,fnorm,clift,cdrag,cmom,cpu\n");
508:   }
509:   if (user->PreLoading) max_steps = 1;
510:   else max_steps = tsCtx->max_steps;
511:   fratio = 1.0;
512:   /*tsCtx->ptime = 0.0;*/
513:   VecCopy(grid->qnode,tsCtx->qold);
514:   PetscTime(&time1);
515: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
516:   /* if (!user->PreLoading) {
517:     PetscBool  flg = PETSC_FALSE;
518:     PetscOptionsGetInt(NULL,NULL,"-e0",&event0,&flg);
519:     PetscOptionsGetInt(NULL,NULL,"-e1",&event1,&flg);
520:     PetscTime(&time_start_counters);
521:     if ((gen_start = start_counters(event0,event1)) < 0)
522:     SETERRQ(PETSC_COMM_SELF,1,>"Error in start_counters\n");
523:   }*/
524: #endif
525:   /*cpu_ini = PetscGetCPUTime();*/
526:   for (tsCtx->itstep = 0; (tsCtx->itstep < max_steps) &&
527:         (fratio <= tsCtx->fnorm_ratio); tsCtx->itstep++) {
528:     ComputeTimeStep(snes,tsCtx->itstep,user);
529:     /*tsCtx->ptime +=  tsCtx->dt;*/

531:     SNESSolve(snes,NULL,grid->qnode);
532:     SNESGetIterationNumber(snes,&its);

534:     SNESGetNonlinearStepFailures(snes,&nfails);
535:     nfailsCum += nfails; nfails = 0;
536:     if (nfailsCum >= 2) SETERRQ(PETSC_COMM_SELF,1,"Unable to find a Newton Step");
537:     if (print_flag) {
538:       PetscPrintf(PETSC_COMM_WORLD,"At Time Step %d cfl = %g and fnorm = %g\n",
539:                          tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm);
540:     }
541:     VecCopy(grid->qnode,tsCtx->qold);

543:     c_info->ntt = tsCtx->itstep+1;
544:     PetscTime(&time2);
545:     cpuloc      = time2-time1;
546:     cpuglo      = 0.0;
547:     MPI_Allreduce(&cpuloc,&cpuglo,1,MPIU_REAL,MPIU_MAX,PETSC_COMM_WORLD);
548:     c_info->tot = cpuglo;    /* Total CPU time used upto this time step */

550:     VecScatterBegin(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
551:     VecScatterEnd(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
552:     /* VecCopy(grid->qnode,localX); */

554:     VecGetArray(grid->res,&res);
555:     VecGetArray(localX,&qnode);

557:     f77FORCE(&grid->nnodesLoc,&grid->nedgeLoc,
558:               grid->isnode, grid->ivnode,
559:              &grid->nnfacetLoc,grid->f2ntn,&grid->nnbound,
560:              &grid->nvfacetLoc,grid->f2ntv,&grid->nvbound,
561:               grid->eptr,   qnode,
562:               grid->xyz,
563:               grid->sface_bit,grid->vface_bit,
564:               &clift,&cdrag,&cmom,&rank,&grid->nvertices);
565:     if (print_flag) {
566:       PetscPrintf(PETSC_COMM_WORLD,"%d\t%g\t%g\t%g\t%g\t%g\n",tsCtx->itstep,
567:                         tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom);
568:       PetscPrintf(PETSC_COMM_WORLD,"Wall clock time needed %g seconds for %d time steps\n",
569:                         cpuglo,tsCtx->itstep);
570:       PetscFPrintf(PETSC_COMM_WORLD,fptr,"%d\t%g\t%g\t%g\t%g\t%g\t%g\n",
571:                           tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom,cpuglo);
572:     }
573:     VecRestoreArray(localX,&qnode);
574:     VecRestoreArray(grid->res,&res);
575:     fratio = tsCtx->fnorm_ini/tsCtx->fnorm;
576:     MPI_Barrier(PETSC_COMM_WORLD);

578:   } /* End of time step loop */

580: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
581:   if (!user->PreLoading) {
582:     int  eve0,eve1;
583:     FILE *cfp0,*cfp1;
584:     char str[256];
585:     /* if ((gen_read = read_counters(event0,&counter0,event1,&counter1)) < 0)
586:     SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter\n");
587:     PetscTime(&time_read_counters);
588:     if (gen_read != gen_start) {
589:     SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...\n");
590:     }*/
591:     /*sprintf(str,"counters%d_and_%d",event0,event1);
592:     cfp0 = fopen(str,"a");*/
593:     /*print_counters(event0,counter0,event1,counter1);*/
594:     /*fprintf(cfp0,"%lld %lld %g\n",counter0,counter1,
595:                   time_counters);
596:     fclose(cfp0);*/
597:   }
598: #endif
599:   PetscPrintf(PETSC_COMM_WORLD,"Total wall clock time needed %g seconds for %d time steps\n",
600:                      cpuglo,tsCtx->itstep);
601:   PetscPrintf(PETSC_COMM_WORLD,"cfl = %g fnorm = %g\n",tsCtx->cfl,tsCtx->fnorm);
602:   PetscPrintf(PETSC_COMM_WORLD,"clift = %g cdrag = %g cmom = %g\n",clift,cdrag,cmom);

604:   if (rank == 0 && print_flag) fclose(fptr);
605:   if (user->PreLoading) {
606:     tsCtx->fnorm_ini = 0.0;
607:     PetscPrintf(PETSC_COMM_WORLD,"Preloading done ...\n");
608:   }
609:   return(0);
610: }

612: /*---------------------------------------------------------------------*/
613: int ComputeTimeStep(SNES snes,int iter,void *ctx)
614: /*---------------------------------------------------------------------*/
615: {
616:   AppCtx      *user  = (AppCtx*) ctx;
617:   TstepCtx    *tsCtx = user->tsCtx;
618:   Vec         func   = tsCtx->func;
619:   PetscScalar inc    = 1.1;
620:   PetscScalar newcfl;
621:   int         ierr;
622:   /*int       iramp = tsCtx->iramp;*/

625:   tsCtx->ires = 0;
626:   FormFunction(snes,tsCtx->qold,func,user);
627:   tsCtx->ires = 1;
628:   VecNorm(func,NORM_2,&tsCtx->fnorm);
629:   /* first time through so compute initial function norm */
630:   if (tsCtx->fnorm_ini == 0.0) {
631:     tsCtx->fnorm_ini = tsCtx->fnorm;
632:     tsCtx->cfl       = tsCtx->cfl_ini;
633:   } else {
634:     newcfl     = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
635:     tsCtx->cfl = PetscMin(newcfl,tsCtx->cfl_max);
636:   }

638:   /* if (iramp < 0) {
639:    newcfl = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
640:   } else {
641:    if (tsCtx->dt < 0 && iramp > 0)
642:     if (iter > iramp) newcfl = tsCtx->cfl_max;
643:     else newcfl = tsCtx->cfl_ini + (tsCtx->cfl_max - tsCtx->cfl_ini)*
644:                                 (double) iter/(double) iramp;
645:   }
646:   tsCtx->cfl = MIN(newcfl,tsCtx->cfl_max);*/
647:   /*printf("In ComputeTime Step - fnorm is %f\n",tsCtx->fnorm);*/
648:   /*VecDestroy(&func);*/
649:   return(0);
650: }

652: /*---------------------------------------------------------------------*/
653: int GetLocalOrdering(GRID *grid)
654: /*---------------------------------------------------------------------*/
655: {
656:   int         ierr,i,j,k,inode,isurf,nte,nb,node1,node2,node3;
657:   int         nnodes,nedge,nnz,jstart,jend;
658:   int         nnodesLoc,nvertices,nedgeLoc,nnodesLocEst;
659:   int         nedgeLocEst,remEdges,readEdges,remNodes,readNodes;
660:   int         nnfacet,nvfacet,nffacet;
661:   int         nnfacetLoc,nvfacetLoc,nffacetLoc;
662:   int         nsnode,nvnode,nfnode;
663:   int         nsnodeLoc,nvnodeLoc,nfnodeLoc;
664:   int         nnbound,nvbound,nfbound;
665:   int         bs = 4;
666:   int         fdes = 0;
667:   off_t       currentPos  = 0,newPos = 0;
668:   int         grid_param  = 13;
669:   int         cross_edges = 0;
670:   int         *edge_bit,*pordering;
671:   int         *l2p,*l2a,*p2l,*a2l,*v2p,*eperm;
672:   int         *tmp,*tmp1,*tmp2;
673:   PetscScalar time_ini,time_fin;
674:   PetscScalar *ftmp,*ftmp1;
675:   char        mesh_file[PETSC_MAX_PATH_LEN] = "";
676:   AO          ao;
677:   FILE        *fptr,*fptr1;
678:   PetscBool   flg;
679:   MPI_Comm    comm = PETSC_COMM_WORLD;

682:   /* Read the integer grid parameters */
683:   ICALLOC(grid_param,&tmp);
684:   if (rank == 0) {
685:     PetscBool exists;
686:     PetscOptionsGetString(NULL,NULL,"-mesh",mesh_file,sizeof(mesh_file),&flg);
687:     PetscTestFile(mesh_file,'r',&exists);
688:     if (!exists) { /* try uns3d.msh as the file name */
689:       PetscStrcpy(mesh_file,"uns3d.msh");
690:     }
691:     PetscBinaryOpen(mesh_file,FILE_MODE_READ,&fdes);
692:   }
693:   PetscBinarySynchronizedRead(comm,fdes,tmp,grid_param,PETSC_INT);
694:   grid->ncell   = tmp[0];
695:   grid->nnodes  = tmp[1];
696:   grid->nedge   = tmp[2];
697:   grid->nnbound = tmp[3];
698:   grid->nvbound = tmp[4];
699:   grid->nfbound = tmp[5];
700:   grid->nnfacet = tmp[6];
701:   grid->nvfacet = tmp[7];
702:   grid->nffacet = tmp[8];
703:   grid->nsnode  = tmp[9];
704:   grid->nvnode  = tmp[10];
705:   grid->nfnode  = tmp[11];
706:   grid->ntte    = tmp[12];
707:   grid->nsface  = 0;
708:   grid->nvface  = 0;
709:   grid->nfface  = 0;
710:   PetscFree(tmp);
711:   PetscPrintf(comm,"nnodes = %d,nedge = %d,nnfacet = %d,nsnode = %d,nfnode = %d\n",
712:                               grid->nnodes,grid->nedge,grid->nnfacet,grid->nsnode,grid->nfnode);

714:   nnodes  = grid->nnodes;
715:   nedge   = grid->nedge;
716:   nnfacet = grid->nnfacet;
717:   nvfacet = grid->nvfacet;
718:   nffacet = grid->nffacet;
719:   nnbound = grid->nnbound;
720:   nvbound = grid->nvbound;
721:   nfbound = grid->nfbound;
722:   nsnode  = grid->nsnode;
723:   nvnode  = grid->nvnode;
724:   nfnode  = grid->nfnode;

726:   /* Read the partitioning vector generated by MeTiS */
727:   ICALLOC(nnodes,&l2a);
728:   ICALLOC(nnodes,&v2p);
729:   ICALLOC(nnodes,&a2l);
730:   nnodesLoc = 0;

732:   for (i = 0; i < nnodes; i++) a2l[i] = -1;
733:   PetscTime(&time_ini);

735:   if (rank == 0) {
736:     if (size == 1) {
737:       PetscMemzero(v2p,nnodes*sizeof(int));
738:     } else {
739:       char      spart_file[PETSC_MAX_PATH_LEN],part_file[PETSC_MAX_PATH_LEN];
740:       PetscBool exists;

742:       PetscOptionsGetString(NULL,NULL,"-partition",spart_file,sizeof(spart_file),&flg);
743:       PetscTestFile(spart_file,'r',&exists);
744:       if (!exists) { /* try appending the number of processors */
745:         sprintf(part_file,"part_vec.part.%d",size);
746:         PetscStrcpy(spart_file,part_file);
747:       }
748:       fptr = fopen(spart_file,"r");
749:       if (!fptr) SETERRQ1(PETSC_COMM_SELF,1,"Cannot open file %s\n",part_file);
750:       for (inode = 0; inode < nnodes; inode++) {
751:         fscanf(fptr,"%d\n",&node1);
752:         v2p[inode] = node1;
753:       }
754:       fclose(fptr);
755:     }
756:   }
757:   MPI_Bcast(v2p,nnodes,MPI_INT,0,comm);
758:   for (inode = 0; inode < nnodes; inode++) {
759:     if (v2p[inode] == rank) {
760:       l2a[nnodesLoc] = inode;
761:       a2l[inode]     = nnodesLoc;
762:       nnodesLoc++;
763:     }
764:   }

766:   PetscTime(&time_fin);
767:   time_fin -= time_ini;
768:   PetscPrintf(comm,"Partition Vector read successfully\n");
769:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

771:   MPI_Scan(&nnodesLoc,&rstart,1,MPI_INT,MPI_SUM,comm);
772:   rstart -= nnodesLoc;
773:   ICALLOC(nnodesLoc,&pordering);
774:   for (i=0; i < nnodesLoc; i++) pordering[i] = rstart + i;
775:   AOCreateBasic(comm,nnodesLoc,l2a,pordering,&ao);
776:   PetscFree(pordering);

778:   /* Now count the local number of edges - including edges with
779:    ghost nodes but edges between ghost nodes are NOT counted */
780:   nedgeLoc  = 0;
781:   nvertices = nnodesLoc;
782:   /* Choose an estimated number of local edges. The choice
783:    nedgeLocEst = 1000000 looks reasonable as it will read
784:    the edge and edge normal arrays in 8 MB chunks */
785:   /*nedgeLocEst = nedge/size;*/
786:   nedgeLocEst = PetscMin(nedge,1000000);
787:   remEdges    = nedge;
788:   ICALLOC(2*nedgeLocEst,&tmp);
789:   PetscBinarySynchronizedSeek(comm,fdes,0,PETSC_BINARY_SEEK_CUR,&currentPos);
790:   PetscTime(&time_ini);
791:   while (remEdges > 0) {
792:     readEdges = PetscMin(remEdges,nedgeLocEst);
793:     /*time_ini = PetscTime();*/
794:     PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
795:     PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
796:     PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
797:     PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
798:     /*time_fin += PetscTime()-time_ini;*/
799:     for (j = 0; j < readEdges; j++) {
800:       node1 = tmp[j]-1;
801:       node2 = tmp[j+readEdges]-1;
802:       if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
803:         nedgeLoc++;
804:         if (a2l[node1] == -1) {
805:           l2a[nvertices] = node1;
806:           a2l[node1]     = nvertices;
807:           nvertices++;
808:         }
809:         if (a2l[node2] == -1) {
810:           l2a[nvertices] = node2;
811:           a2l[node2]     = nvertices;
812:           nvertices++;
813:         }
814:       }
815:     }
816:     remEdges = remEdges - readEdges;
817:     MPI_Barrier(comm);
818:   }
819:   PetscTime(&time_fin);
820:   time_fin -= time_ini;
821:   PetscPrintf(comm,"Local edges counted with MPI_Bcast %d\n",nedgeLoc);
822:   PetscPrintf(comm,"Local vertices counted %d\n",nvertices);
823:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

825:   /* Now store the local edges */
826:   ICALLOC(2*nedgeLoc,&grid->eptr);
827:   ICALLOC(nedgeLoc,&edge_bit);
828:   ICALLOC(nedgeLoc,&eperm);
829:   i = 0; j = 0; k = 0;
830:   remEdges   = nedge;
831:   PetscBinarySynchronizedSeek(comm,fdes,currentPos,PETSC_BINARY_SEEK_SET,&newPos);
832:   currentPos = newPos;

834:   PetscTime(&time_ini);
835:   while (remEdges > 0) {
836:     readEdges = PetscMin(remEdges,nedgeLocEst);
837:     PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
838:     PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
839:     PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
840:     PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
841:     for (j = 0; j < readEdges; j++) {
842:       node1 = tmp[j]-1;
843:       node2 = tmp[j+readEdges]-1;
844:       if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
845:         grid->eptr[k]          = a2l[node1];
846:         grid->eptr[k+nedgeLoc] = a2l[node2];
847:         edge_bit[k]            = i; /* Record global file index of the edge */
848:         eperm[k]               = k;
849:         k++;
850:       }
851:       i++;
852:     }
853:     remEdges = remEdges - readEdges;
854:     MPI_Barrier(comm);
855:   }
856:   PetscBinarySynchronizedSeek(comm,fdes,currentPos+2*nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_SET,&newPos);
857:   PetscTime(&time_fin);
858:   time_fin -= time_ini;
859:   PetscPrintf(comm,"Local edges stored\n");
860:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

862:   PetscFree(tmp);
863:   ICALLOC(2*nedgeLoc,&tmp);
864:   PetscMemcpy(tmp,grid->eptr,2*nedgeLoc*sizeof(int));
865: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
866:   EdgeColoring(nvertices,nedgeLoc,grid->eptr,eperm,&grid->ncolor,grid->ncount);
867: #else
868:   /* Now reorder the edges for better cache locality */
869:   /*
870:   tmp[0]=7;tmp[1]=6;tmp[2]=3;tmp[3]=9;tmp[4]=2;tmp[5]=0;
871:   PetscSortIntWithPermutation(6,tmp,eperm);
872:   for (i=0; i<6; i++)
873:    printf("%d %d %d\n",i,tmp[i],eperm[i]);
874:   */
875:   flg  = PETSC_FALSE;
876:   PetscOptionsGetBool(0,"-no_edge_reordering",&flg,NULL);
877:   if (!flg) {
878:     PetscSortIntWithPermutation(nedgeLoc,tmp,eperm);
879:   }
880: #endif
881:   PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
882:   k    = 0;
883:   for (i = 0; i < nedgeLoc; i++) {
884:     int cross_node=nnodesLoc/2;
885:     node1 = tmp[eperm[i]] + 1;
886:     node2 = tmp[nedgeLoc+eperm[i]] + 1;
887: #if defined(INTERLACING)
888:     grid->eptr[k++] = node1;
889:     grid->eptr[k++] = node2;
890: #else
891:     grid->eptr[i]          = node1;
892:     grid->eptr[nedgeLoc+i] = node2;
893: #endif
894:     /* if (node1 > node2)
895:      printf("On processor %d, for edge %d node1 = %d, node2 = %d\n",
896:             rank,i,node1,node2);*/
897:     if ((node1 <= cross_node) && (node2 > cross_node)) cross_edges++;
898:   }
899:   PetscPrintf(comm,"Number of cross edges %d\n", cross_edges);
900:   PetscFree(tmp);
901: #if defined(_OPENMP) && !defined(HAVE_REDUNDANT_WORK) && !defined(HAVE_EDGE_COLORING)
902:   /* Now make the local 'ia' and 'ja' arrays */
903:   ICALLOC(nvertices+1,&grid->ia);
904:   /* Use tmp for a work array */
905:   ICALLOC(nvertices,&tmp);
906:   f77GETIA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
907:   nnz = grid->ia[nvertices] - 1;
908:   ICALLOC(nnz,&grid->ja);
909:   f77GETJA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
910:   PetscFree(tmp);
911: #else
912:   /* Now make the local 'ia' and 'ja' arrays */
913:   ICALLOC(nnodesLoc+1,&grid->ia);
914:   /* Use tmp for a work array */
915:   ICALLOC(nnodesLoc,&tmp);
916:   f77GETIA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
917:   nnz = grid->ia[nnodesLoc] - 1;
918: #if defined(BLOCKING)
919:   PetscPrintf(comm,"The Jacobian has %d non-zero blocks with block size = %d\n",nnz,bs);
920: #else
921:   PetscPrintf(comm,"The Jacobian has %d non-zeros\n",nnz);
922: #endif
923:   ICALLOC(nnz,&grid->ja);
924:   f77GETJA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
925:   PetscFree(tmp);
926: #endif
927:   ICALLOC(nvertices,&grid->loc2glo);
928:   PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
929:   PetscFree(l2a);
930:   l2a  = grid->loc2glo;
931:   ICALLOC(nvertices,&grid->loc2pet);
932:   l2p  = grid->loc2pet;
933:   PetscMemcpy(l2p,l2a,nvertices*sizeof(int));
934:   AOApplicationToPetsc(ao,nvertices,l2p);

936:   /* Renumber unit normals of dual face (from node1 to node2)
937:       and the area of the dual mesh face */
938:   FCALLOC(nedgeLocEst,&ftmp);
939:   FCALLOC(nedgeLoc,&ftmp1);
940:   FCALLOC(4*nedgeLoc,&grid->xyzn);
941:   /* Do the x-component */
942:   i = 0; k = 0;
943:   remEdges = nedge;
944:   PetscTime(&time_ini);
945:   while (remEdges > 0) {
946:     readEdges = PetscMin(remEdges,nedgeLocEst);
947:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
948:     for (j = 0; j < readEdges; j++)
949:       if (edge_bit[k] == (i+j)) {
950:         ftmp1[k] = ftmp[j];
951:         k++;
952:       }
953:     i       += readEdges;
954:     remEdges = remEdges - readEdges;
955:     MPI_Barrier(comm);
956:   }
957:   for (i = 0; i < nedgeLoc; i++)
958: #if defined(INTERLACING)
959:     grid->xyzn[4*i] = ftmp1[eperm[i]];
960: #else
961:     grid->xyzn[i] = ftmp1[eperm[i]];
962: #endif
963:   /* Do the y-component */
964:   i = 0; k = 0;
965:   remEdges = nedge;
966:   while (remEdges > 0) {
967:     readEdges = PetscMin(remEdges,nedgeLocEst);
968:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
969:     for (j = 0; j < readEdges; j++)
970:       if (edge_bit[k] == (i+j)) {
971:         ftmp1[k] = ftmp[j];
972:         k++;
973:       }
974:     i       += readEdges;
975:     remEdges = remEdges - readEdges;
976:     MPI_Barrier(comm);
977:   }
978:   for (i = 0; i < nedgeLoc; i++)
979: #if defined(INTERLACING)
980:     grid->xyzn[4*i+1] = ftmp1[eperm[i]];
981: #else
982:     grid->xyzn[nedgeLoc+i] = ftmp1[eperm[i]];
983: #endif
984:   /* Do the z-component */
985:   i = 0; k = 0;
986:   remEdges = nedge;
987:   while (remEdges > 0) {
988:     readEdges = PetscMin(remEdges,nedgeLocEst);
989:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
990:     for (j = 0; j < readEdges; j++)
991:       if (edge_bit[k] == (i+j)) {
992:         ftmp1[k] = ftmp[j];
993:         k++;
994:       }
995:     i       += readEdges;
996:     remEdges = remEdges - readEdges;
997:     MPI_Barrier(comm);
998:   }
999:   for (i = 0; i < nedgeLoc; i++)
1000: #if defined(INTERLACING)
1001:     grid->xyzn[4*i+2] = ftmp1[eperm[i]];
1002: #else
1003:     grid->xyzn[2*nedgeLoc+i] = ftmp1[eperm[i]];
1004: #endif
1005:   /* Do the area */
1006:   i = 0; k = 0;
1007:   remEdges = nedge;
1008:   while (remEdges > 0) {
1009:     readEdges = PetscMin(remEdges,nedgeLocEst);
1010:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
1011:     for (j = 0; j < readEdges; j++)
1012:       if (edge_bit[k] == (i+j)) {
1013:         ftmp1[k] = ftmp[j];
1014:         k++;
1015:       }
1016:     i       += readEdges;
1017:     remEdges = remEdges - readEdges;
1018:     MPI_Barrier(comm);
1019:   }
1020:   for (i = 0; i < nedgeLoc; i++)
1021: #if defined(INTERLACING)
1022:     grid->xyzn[4*i+3] = ftmp1[eperm[i]];
1023: #else
1024:     grid->xyzn[3*nedgeLoc+i] = ftmp1[eperm[i]];
1025: #endif

1027:   PetscFree(edge_bit);
1028:   PetscFree(eperm);
1029:   PetscFree(ftmp);
1030:   PetscFree(ftmp1);
1031:   PetscTime(&time_fin);
1032:   time_fin -= time_ini;
1033:   PetscPrintf(comm,"Edge normals partitioned\n");
1034:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
1035: #if defined(_OPENMP)
1036:   /*Arrange for the division of work among threads*/
1037: #if defined(HAVE_EDGE_COLORING)
1038: #elif defined(HAVE_REDUNDANT_WORK)
1039:   FCALLOC(4*nnodesLoc,   &grid->resd);
1040: #else
1041:   {
1042:     /* Get the local adjacency structure of the graph for partitioning the local
1043:       graph into max_threads pieces */
1044:     int *ia,*ja,*vwtg=0,*adjwgt=0,options[5];
1045:     int numflag = 0, wgtflag = 0, edgecut;
1046:     int thr1,thr2,nedgeAllThreads,ned1,ned2;
1047:     ICALLOC((nvertices+1),&ia);
1048:     ICALLOC((2*nedgeLoc),&ja);
1049:     ia[0] = 0;
1050:     for (i = 1; i <= nvertices; i++) ia[i] = grid->ia[i]-i-1;
1051:     for (i = 0; i < nvertices; i++) {
1052:       int jstart,jend;
1053:       jstart = grid->ia[i]-1;
1054:       jend   = grid->ia[i+1]-1;
1055:       k      = ia[i];
1056:       for (j=jstart; j < jend; j++) {
1057:         inode = grid->ja[j]-1;
1058:         if (inode != i) ja[k++] = inode;
1059:       }
1060:     }
1061:     ICALLOC(nvertices,&grid->part_thr);
1062:     PetscMemzero(grid->part_thr,nvertices*sizeof(int));
1063:     options[0] = 0;
1064:     /* Call the pmetis library routine */
1065:     if (max_threads > 1)
1066:       METIS_PartGraphRecursive(&nvertices,ia,ja,vwtg,adjwgt,
1067:                                &wgtflag,&numflag,&max_threads,options,&edgecut,grid->part_thr);
1068:     PetscPrintf(MPI_COMM_WORLD,"The number of cut edges is %d\n", edgecut);
1069:     /* Write the partition vector to disk */
1070:     flg  = PETSC_FALSE;
1071:     PetscOptionsGetBool(0,"-omp_partitioning",&flg,NULL);
1072:     if (flg) {
1073:       int  *partv_loc, *partv_glo;
1074:       int  *disp,*counts,*loc2glo_glo;
1075:       char part_file[PETSC_MAX_PATH_LEN];
1076:       FILE *fp;

1078:       ICALLOC(nnodes, &partv_glo);
1079:       ICALLOC(nnodesLoc, &partv_loc);
1080:       for (i = 0; i < nnodesLoc; i++)
1081:         /*partv_loc[i] = grid->part_thr[i]*size + rank;*/
1082:         partv_loc[i] = grid->part_thr[i] + max_threads*rank;
1083:       ICALLOC(size,&disp);
1084:       ICALLOC(size,&counts);
1085:       MPI_Allgather(&nnodesLoc,1,MPI_INT,counts,1,MPI_INT,MPI_COMM_WORLD);
1086:       disp[0] = 0;
1087:       for (i = 1; i < size; i++) disp[i] = counts[i-1] + disp[i-1];
1088:       ICALLOC(nnodes, &loc2glo_glo);
1089:       MPI_Gatherv(grid->loc2glo,nnodesLoc,MPI_INT,loc2glo_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1090:       MPI_Gatherv(partv_loc,nnodesLoc,MPI_INT,partv_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1091:       if (rank == 0) {
1092:         PetscSortIntWithArray(nnodes,loc2glo_glo,partv_glo);
1093:         sprintf(part_file,"hyb_part_vec.%d",2*size);
1094:         fp = fopen(part_file,"w");
1095:         for (i = 0; i < nnodes; i++) fprintf(fp,"%d\n",partv_glo[i]);
1096:         fclose(fp);
1097:       }
1098:       PetscFree(partv_loc);
1099:       PetscFree(partv_glo);
1100:       PetscFree(disp);
1101:       PetscFree(counts);
1102:       PetscFree(loc2glo_glo);
1103:     }

1105:     /* Divide the work among threads */
1106:     k = 0;
1107:     ICALLOC((max_threads+1),&grid->nedge_thr);
1108:     PetscMemzero(grid->nedge_thr,(max_threads+1)*sizeof(int));
1109:     cross_edges = 0;
1110:     for (i = 0; i < nedgeLoc; i++) {
1111:       node1 = grid->eptr[k++]-1;
1112:       node2 = grid->eptr[k++]-1;
1113:       thr1  = grid->part_thr[node1];
1114:       thr2  = grid->part_thr[node2];
1115:       grid->nedge_thr[thr1]+=1;
1116:       if (thr1 != thr2) {
1117:         grid->nedge_thr[thr2]+=1;
1118:         cross_edges++;
1119:       }
1120:     }
1121:     PetscPrintf(MPI_COMM_WORLD,"The number of cross edges after Metis partitioning is %d\n",cross_edges);
1122:     ned1 = grid->nedge_thr[0];
1123:     grid->nedge_thr[0] = 1;
1124:     for (i = 1; i <= max_threads; i++) {
1125:       ned2 = grid->nedge_thr[i];
1126:       grid->nedge_thr[i] = grid->nedge_thr[i-1]+ned1;
1127:       ned1 = ned2;
1128:     }
1129:     /* Allocate a shared edge array. Note that a cut edge is evaluated
1130:         by both the threads but updates are done only for the locally
1131:         owned node */
1132:     grid->nedgeAllThr = nedgeAllThreads = grid->nedge_thr[max_threads]-1;
1133:     ICALLOC(2*nedgeAllThreads, &grid->edge_thr);
1134:     ICALLOC(max_threads,&tmp);
1135:     FCALLOC(4*nedgeAllThreads,&grid->xyzn_thr);
1136:     for (i = 0; i < max_threads; i++) tmp[i] = grid->nedge_thr[i]-1;
1137:     k = 0;
1138:     for (i = 0; i < nedgeLoc; i++) {
1139:       int ie1,ie2,ie3;
1140:       node1 = grid->eptr[k++];
1141:       node2 = grid->eptr[k++];
1142:       thr1  = grid->part_thr[node1-1];
1143:       thr2  = grid->part_thr[node2-1];
1144:       ie1   = 2*tmp[thr1];
1145:       ie2   = 4*tmp[thr1];
1146:       ie3   = 4*i;

1148:       grid->edge_thr[ie1]   = node1;
1149:       grid->edge_thr[ie1+1] = node2;
1150:       grid->xyzn_thr[ie2]   = grid->xyzn[ie3];
1151:       grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1152:       grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1153:       grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];

1155:       tmp[thr1]+=1;
1156:       if (thr1 != thr2) {
1157:         ie1 = 2*tmp[thr2];
1158:         ie2 = 4*tmp[thr2];

1160:         grid->edge_thr[ie1]   = node1;
1161:         grid->edge_thr[ie1+1] = node2;
1162:         grid->xyzn_thr[ie2]   = grid->xyzn[ie3];
1163:         grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1164:         grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1165:         grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];

1167:         tmp[thr2]+=1;
1168:       }
1169:     }
1170:   }
1171: #endif
1172: #endif

1174:   /* Remap coordinates */
1175:   /*nnodesLocEst = nnodes/size;*/
1176:   nnodesLocEst = PetscMin(nnodes,500000);
1177:   FCALLOC(nnodesLocEst,&ftmp);
1178:   FCALLOC(3*nvertices,&grid->xyz);
1179:   remNodes = nnodes;
1180:   i        = 0;
1181:   PetscTime(&time_ini);
1182:   while (remNodes > 0) {
1183:     readNodes = PetscMin(remNodes,nnodesLocEst);
1184:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1185:     for (j = 0; j < readNodes; j++) {
1186:       if (a2l[i+j] >= 0) {
1187: #if defined(INTERLACING)
1188:         grid->xyz[3*a2l[i+j]] = ftmp[j];
1189: #else
1190:         grid->xyz[a2l[i+j]] = ftmp[j];
1191: #endif
1192:       }
1193:     }
1194:     i        += nnodesLocEst;
1195:     remNodes -= nnodesLocEst;
1196:     MPI_Barrier(comm);
1197:   }

1199:   remNodes = nnodes;
1200:   i = 0;
1201:   while (remNodes > 0) {
1202:     readNodes = PetscMin(remNodes,nnodesLocEst);
1203:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1204:     for (j = 0; j < readNodes; j++) {
1205:       if (a2l[i+j] >= 0) {
1206: #if defined(INTERLACING)
1207:         grid->xyz[3*a2l[i+j]+1] = ftmp[j];
1208: #else
1209:         grid->xyz[nnodesLoc+a2l[i+j]] = ftmp[j];
1210: #endif
1211:       }
1212:     }
1213:     i        += nnodesLocEst;
1214:     remNodes -= nnodesLocEst;
1215:     MPI_Barrier(comm);
1216:   }

1218:   remNodes = nnodes;
1219:   i        = 0;
1220:   while (remNodes > 0) {
1221:     readNodes = PetscMin(remNodes,nnodesLocEst);
1222:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1223:     for (j = 0; j < readNodes; j++) {
1224:       if (a2l[i+j] >= 0) {
1225: #if defined(INTERLACING)
1226:         grid->xyz[3*a2l[i+j]+2] = ftmp[j];
1227: #else
1228:         grid->xyz[2*nnodesLoc+a2l[i+j]] = ftmp[j];
1229: #endif
1230:       }
1231:     }
1232:     i        += nnodesLocEst;
1233:     remNodes -= nnodesLocEst;
1234:     MPI_Barrier(comm);
1235:   }

1237:   /* Renumber dual volume "area" */
1238:   FCALLOC(nvertices,&grid->area);
1239:   remNodes = nnodes;
1240:   i        = 0;
1241:   while (remNodes > 0) {
1242:     readNodes = PetscMin(remNodes,nnodesLocEst);
1243:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1244:     for (j = 0; j < readNodes; j++)
1245:       if (a2l[i+j] >= 0)
1246:         grid->area[a2l[i+j]] = ftmp[j];
1247:     i        += nnodesLocEst;
1248:     remNodes -= nnodesLocEst;
1249:     MPI_Barrier(comm);
1250:   }

1252:   PetscFree(ftmp);
1253:   PetscTime(&time_fin);
1254:   time_fin -= time_ini;
1255:   PetscPrintf(comm,"Coordinates remapped\n");
1256:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

1258: /* Now,handle all the solid boundaries - things to be done :
1259:  * 1. Identify the nodes belonging to the solid
1260:  *    boundaries and count them.
1261:  * 2. Put proper indices into f2ntn array,after making it
1262:  *    of suitable size.
1263:  * 3. Remap the normals and areas of solid faces (sxn,syn,szn,
1264:  *    and sa arrays).
1265:  */
1266:   ICALLOC(nnbound,  &grid->nntet);
1267:   ICALLOC(nnbound,  &grid->nnpts);
1268:   ICALLOC(4*nnfacet,&grid->f2ntn);
1269:   ICALLOC(nsnode,&grid->isnode);
1270:   FCALLOC(nsnode,&grid->sxn);
1271:   FCALLOC(nsnode,&grid->syn);
1272:   FCALLOC(nsnode,&grid->szn);
1273:   FCALLOC(nsnode,&grid->sa);
1274:   PetscBinarySynchronizedRead(comm,fdes,grid->nntet,nnbound,PETSC_INT);
1275:   PetscBinarySynchronizedRead(comm,fdes,grid->nnpts,nnbound,PETSC_INT);
1276:   PetscBinarySynchronizedRead(comm,fdes,grid->f2ntn,4*nnfacet,PETSC_INT);
1277:   PetscBinarySynchronizedRead(comm,fdes,grid->isnode,nsnode,PETSC_INT);
1278:   PetscBinarySynchronizedRead(comm,fdes,grid->sxn,nsnode,PETSC_SCALAR);
1279:   PetscBinarySynchronizedRead(comm,fdes,grid->syn,nsnode,PETSC_SCALAR);
1280:   PetscBinarySynchronizedRead(comm,fdes,grid->szn,nsnode,PETSC_SCALAR);

1282:   isurf      = 0;
1283:   nsnodeLoc  = 0;
1284:   nnfacetLoc = 0;
1285:   nb         = 0;
1286:   nte        = 0;
1287:   ICALLOC(3*nnfacet,&tmp);
1288:   ICALLOC(nsnode,&tmp1);
1289:   ICALLOC(nnodes,&tmp2);
1290:   FCALLOC(4*nsnode,&ftmp);
1291:   PetscMemzero(tmp,3*nnfacet*sizeof(int));
1292:   PetscMemzero(tmp1,nsnode*sizeof(int));
1293:   PetscMemzero(tmp2,nnodes*sizeof(int));

1295:   j = 0;
1296:   for (i = 0; i < nsnode; i++) {
1297:     node1 = a2l[grid->isnode[i] - 1];
1298:     if (node1 >= 0) {
1299:       tmp1[nsnodeLoc] = node1;
1300:       tmp2[node1]     = nsnodeLoc;
1301:       ftmp[j++]       = grid->sxn[i];
1302:       ftmp[j++]       = grid->syn[i];
1303:       ftmp[j++]       = grid->szn[i];
1304:       ftmp[j++]       = grid->sa[i];
1305:       nsnodeLoc++;
1306:     }
1307:   }
1308:   for (i = 0; i < nnbound; i++) {
1309:     for (j = isurf; j < isurf + grid->nntet[i]; j++) {
1310:       node1 = a2l[grid->isnode[grid->f2ntn[j] - 1] - 1];
1311:       node2 = a2l[grid->isnode[grid->f2ntn[nnfacet + j] - 1] - 1];
1312:       node3 = a2l[grid->isnode[grid->f2ntn[2*nnfacet + j] - 1] - 1];

1314:       if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1315:         nnfacetLoc++;
1316:         nte++;
1317:         tmp[nb++] = tmp2[node1];
1318:         tmp[nb++] = tmp2[node2];
1319:         tmp[nb++] = tmp2[node3];
1320:       }
1321:     }
1322:     isurf += grid->nntet[i];
1323:     /*printf("grid->nntet[%d] before reordering is %d\n",i,grid->nntet[i]);*/
1324:     grid->nntet[i] = nte;
1325:     /*printf("grid->nntet[%d] after reordering is %d\n",i,grid->nntet[i]);*/
1326:     nte = 0;
1327:   }
1328:   PetscFree(grid->f2ntn);
1329:   PetscFree(grid->isnode);
1330:   PetscFree(grid->sxn);
1331:   PetscFree(grid->syn);
1332:   PetscFree(grid->szn);
1333:   PetscFree(grid->sa);
1334:   ICALLOC(4*nnfacetLoc,&grid->f2ntn);
1335:   ICALLOC(nsnodeLoc,&grid->isnode);
1336:   FCALLOC(nsnodeLoc,&grid->sxn);
1337:   FCALLOC(nsnodeLoc,&grid->syn);
1338:   FCALLOC(nsnodeLoc,&grid->szn);
1339:   FCALLOC(nsnodeLoc,&grid->sa);
1340:   j = 0;
1341:   for (i = 0; i < nsnodeLoc; i++) {
1342:     grid->isnode[i] = tmp1[i] + 1;
1343:     grid->sxn[i]    = ftmp[j++];
1344:     grid->syn[i]    = ftmp[j++];
1345:     grid->szn[i]    = ftmp[j++];
1346:     grid->sa[i]     = ftmp[j++];
1347:   }
1348:   j = 0;
1349:   for (i = 0; i < nnfacetLoc; i++) {
1350:     grid->f2ntn[i]              = tmp[j++] + 1;
1351:     grid->f2ntn[nnfacetLoc+i]   = tmp[j++] + 1;
1352:     grid->f2ntn[2*nnfacetLoc+i] = tmp[j++] + 1;
1353:   }
1354:   PetscFree(tmp);
1355:   PetscFree(tmp1);
1356:   PetscFree(tmp2);
1357:   PetscFree(ftmp);

1359: /* Now identify the triangles on which the current proceesor
1360:    would perform force calculation */
1361:   ICALLOC(nnfacetLoc,&grid->sface_bit);
1362:   PetscMemzero(grid->sface_bit,nnfacetLoc*sizeof(int));
1363:   for (i = 0; i < nnfacetLoc; i++) {
1364:     node1 = l2a[grid->isnode[grid->f2ntn[i] - 1] - 1];
1365:     node2 = l2a[grid->isnode[grid->f2ntn[nnfacetLoc + i] - 1] - 1];
1366:     node3 = l2a[grid->isnode[grid->f2ntn[2*nnfacetLoc + i] - 1] - 1];
1367:     if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1368:          && (v2p[node3] >= rank)) &&
1369:         ((v2p[node1] == rank) || (v2p[node2] == rank)
1370:          || (v2p[node3] == rank)))
1371:       grid->sface_bit[i] = 1;
1372:   }
1373:   /*printf("On processor %d total solid triangles = %d,locally owned = %d alpha = %d\n",rank,totTr,myTr,alpha);*/
1374:   PetscPrintf(comm,"Solid boundaries partitioned\n");

1376: /* Now,handle all the viscous boundaries - things to be done :
1377:  * 1. Identify the nodes belonging to the viscous
1378:  *    boundaries and count them.
1379:  * 2. Put proper indices into f2ntv array,after making it
1380:  *    of suitable size
1381:  * 3. Remap the normals and areas of viscous faces (vxn,vyn,vzn,
1382:  *    and va arrays).
1383:  */
1384:   ICALLOC(nvbound,  &grid->nvtet);
1385:   ICALLOC(nvbound,  &grid->nvpts);
1386:   ICALLOC(4*nvfacet,&grid->f2ntv);
1387:   ICALLOC(nvnode,&grid->ivnode);
1388:   FCALLOC(nvnode,&grid->vxn);
1389:   FCALLOC(nvnode,&grid->vyn);
1390:   FCALLOC(nvnode,&grid->vzn);
1391:   FCALLOC(nvnode,&grid->va);
1392:   PetscBinarySynchronizedRead(comm,fdes,grid->nvtet,nvbound,PETSC_INT);
1393:   PetscBinarySynchronizedRead(comm,fdes,grid->nvpts,nvbound,PETSC_INT);
1394:   PetscBinarySynchronizedRead(comm,fdes,grid->f2ntv,4*nvfacet,PETSC_INT);
1395:   PetscBinarySynchronizedRead(comm,fdes,grid->ivnode,nvnode,PETSC_INT);
1396:   PetscBinarySynchronizedRead(comm,fdes,grid->vxn,nvnode,PETSC_SCALAR);
1397:   PetscBinarySynchronizedRead(comm,fdes,grid->vyn,nvnode,PETSC_SCALAR);
1398:   PetscBinarySynchronizedRead(comm,fdes,grid->vzn,nvnode,PETSC_SCALAR);

1400:   isurf      = 0;
1401:   nvnodeLoc  = 0;
1402:   nvfacetLoc = 0;
1403:   nb         = 0;
1404:   nte        = 0;
1405:   ICALLOC(3*nvfacet,&tmp);
1406:   ICALLOC(nvnode,&tmp1);
1407:   ICALLOC(nnodes,&tmp2);
1408:   FCALLOC(4*nvnode,&ftmp);
1409:   PetscMemzero(tmp,3*nvfacet*sizeof(int));
1410:   PetscMemzero(tmp1,nvnode*sizeof(int));
1411:   PetscMemzero(tmp2,nnodes*sizeof(int));

1413:   j = 0;
1414:   for (i = 0; i < nvnode; i++) {
1415:     node1 = a2l[grid->ivnode[i] - 1];
1416:     if (node1 >= 0) {
1417:       tmp1[nvnodeLoc] = node1;
1418:       tmp2[node1]     = nvnodeLoc;
1419:       ftmp[j++]       = grid->vxn[i];
1420:       ftmp[j++]       = grid->vyn[i];
1421:       ftmp[j++]       = grid->vzn[i];
1422:       ftmp[j++]       = grid->va[i];
1423:       nvnodeLoc++;
1424:     }
1425:   }
1426:   for (i = 0; i < nvbound; i++) {
1427:     for (j = isurf; j < isurf + grid->nvtet[i]; j++) {
1428:       node1 = a2l[grid->ivnode[grid->f2ntv[j] - 1] - 1];
1429:       node2 = a2l[grid->ivnode[grid->f2ntv[nvfacet + j] - 1] - 1];
1430:       node3 = a2l[grid->ivnode[grid->f2ntv[2*nvfacet + j] - 1] - 1];
1431:       if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1432:         nvfacetLoc++;
1433:         nte++;
1434:         tmp[nb++] = tmp2[node1];
1435:         tmp[nb++] = tmp2[node2];
1436:         tmp[nb++] = tmp2[node3];
1437:       }
1438:     }
1439:     isurf         += grid->nvtet[i];
1440:     grid->nvtet[i] = nte;
1441:     nte            = 0;
1442:   }
1443:   PetscFree(grid->f2ntv);
1444:   PetscFree(grid->ivnode);
1445:   PetscFree(grid->vxn);
1446:   PetscFree(grid->vyn);
1447:   PetscFree(grid->vzn);
1448:   PetscFree(grid->va);
1449:   ICALLOC(4*nvfacetLoc,&grid->f2ntv);
1450:   ICALLOC(nvnodeLoc,&grid->ivnode);
1451:   FCALLOC(nvnodeLoc,&grid->vxn);
1452:   FCALLOC(nvnodeLoc,&grid->vyn);
1453:   FCALLOC(nvnodeLoc,&grid->vzn);
1454:   FCALLOC(nvnodeLoc,&grid->va);
1455:   j = 0;
1456:   for (i = 0; i < nvnodeLoc; i++) {
1457:     grid->ivnode[i] = tmp1[i] + 1;
1458:     grid->vxn[i]    = ftmp[j++];
1459:     grid->vyn[i]    = ftmp[j++];
1460:     grid->vzn[i]    = ftmp[j++];
1461:     grid->va[i]     = ftmp[j++];
1462:   }
1463:   j = 0;
1464:   for (i = 0; i < nvfacetLoc; i++) {
1465:     grid->f2ntv[i]              = tmp[j++] + 1;
1466:     grid->f2ntv[nvfacetLoc+i]   = tmp[j++] + 1;
1467:     grid->f2ntv[2*nvfacetLoc+i] = tmp[j++] + 1;
1468:   }
1469:   PetscFree(tmp);
1470:   PetscFree(tmp1);
1471:   PetscFree(tmp2);
1472:   PetscFree(ftmp);

1474: /* Now identify the triangles on which the current proceesor
1475:    would perform force calculation */
1476:   ICALLOC(nvfacetLoc,&grid->vface_bit);
1477:   PetscMemzero(grid->vface_bit,nvfacetLoc*sizeof(int));
1478:   for (i = 0; i < nvfacetLoc; i++) {
1479:     node1 = l2a[grid->ivnode[grid->f2ntv[i] - 1] - 1];
1480:     node2 = l2a[grid->ivnode[grid->f2ntv[nvfacetLoc + i] - 1] - 1];
1481:     node3 = l2a[grid->ivnode[grid->f2ntv[2*nvfacetLoc + i] - 1] - 1];
1482:     if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1483:          && (v2p[node3] >= rank)) &&
1484:         ((v2p[node1] == rank) || (v2p[node2] == rank)
1485:         || (v2p[node3] == rank))) {
1486:          grid->vface_bit[i] = 1;
1487:     }
1488:   }
1489:   PetscFree(v2p);
1490:   PetscPrintf(comm,"Viscous boundaries partitioned\n");

1492: /* Now,handle all the free boundaries - things to be done :
1493:  * 1. Identify the nodes belonging to the free
1494:  *    boundaries and count them.
1495:  * 2. Put proper indices into f2ntf array,after making it
1496:  *    of suitable size
1497:  * 3. Remap the normals and areas of free bound. faces (fxn,fyn,fzn,
1498:  *    and fa arrays).
1499:  */

1501:   ICALLOC(nfbound,  &grid->nftet);
1502:   ICALLOC(nfbound,  &grid->nfpts);
1503:   ICALLOC(4*nffacet,&grid->f2ntf);
1504:   ICALLOC(nfnode,&grid->ifnode);
1505:   FCALLOC(nfnode,&grid->fxn);
1506:   FCALLOC(nfnode,&grid->fyn);
1507:   FCALLOC(nfnode,&grid->fzn);
1508:   FCALLOC(nfnode,&grid->fa);
1509:   PetscBinarySynchronizedRead(comm,fdes,grid->nftet,nfbound,PETSC_INT);
1510:   PetscBinarySynchronizedRead(comm,fdes,grid->nfpts,nfbound,PETSC_INT);
1511:   PetscBinarySynchronizedRead(comm,fdes,grid->f2ntf,4*nffacet,PETSC_INT);
1512:   PetscBinarySynchronizedRead(comm,fdes,grid->ifnode,nfnode,PETSC_INT);
1513:   PetscBinarySynchronizedRead(comm,fdes,grid->fxn,nfnode,PETSC_SCALAR);
1514:   PetscBinarySynchronizedRead(comm,fdes,grid->fyn,nfnode,PETSC_SCALAR);
1515:   PetscBinarySynchronizedRead(comm,fdes,grid->fzn,nfnode,PETSC_SCALAR);

1517:   isurf      = 0;
1518:   nfnodeLoc  = 0;
1519:   nffacetLoc = 0;
1520:   nb         = 0;
1521:   nte        = 0;
1522:   ICALLOC(3*nffacet,&tmp);
1523:   ICALLOC(nfnode,&tmp1);
1524:   ICALLOC(nnodes,&tmp2);
1525:   FCALLOC(4*nfnode,&ftmp);
1526:   PetscMemzero(tmp,3*nffacet*sizeof(int));
1527:   PetscMemzero(tmp1,nfnode*sizeof(int));
1528:   PetscMemzero(tmp2,nnodes*sizeof(int));

1530:   j = 0;
1531:   for (i = 0; i < nfnode; i++) {
1532:     node1 = a2l[grid->ifnode[i] - 1];
1533:     if (node1 >= 0) {
1534:       tmp1[nfnodeLoc] = node1;
1535:       tmp2[node1]     = nfnodeLoc;
1536:       ftmp[j++]       = grid->fxn[i];
1537:       ftmp[j++]       = grid->fyn[i];
1538:       ftmp[j++]       = grid->fzn[i];
1539:       ftmp[j++]       = grid->fa[i];
1540:       nfnodeLoc++;
1541:     }
1542:   }
1543:   for (i = 0; i < nfbound; i++) {
1544:     for (j = isurf; j < isurf + grid->nftet[i]; j++) {
1545:       node1 = a2l[grid->ifnode[grid->f2ntf[j] - 1] - 1];
1546:       node2 = a2l[grid->ifnode[grid->f2ntf[nffacet + j] - 1] - 1];
1547:       node3 = a2l[grid->ifnode[grid->f2ntf[2*nffacet + j] - 1] - 1];
1548:       if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1549:         nffacetLoc++;
1550:         nte++;
1551:         tmp[nb++] = tmp2[node1];
1552:         tmp[nb++] = tmp2[node2];
1553:         tmp[nb++] = tmp2[node3];
1554:       }
1555:     }
1556:     isurf         += grid->nftet[i];
1557:     grid->nftet[i] = nte;
1558:     nte            = 0;
1559:   }
1560:   PetscFree(grid->f2ntf);
1561:   PetscFree(grid->ifnode);
1562:   PetscFree(grid->fxn);
1563:   PetscFree(grid->fyn);
1564:   PetscFree(grid->fzn);
1565:   PetscFree(grid->fa);
1566:   ICALLOC(4*nffacetLoc,&grid->f2ntf);
1567:   ICALLOC(nfnodeLoc,&grid->ifnode);
1568:   FCALLOC(nfnodeLoc,&grid->fxn);
1569:   FCALLOC(nfnodeLoc,&grid->fyn);
1570:   FCALLOC(nfnodeLoc,&grid->fzn);
1571:   FCALLOC(nfnodeLoc,&grid->fa);
1572:   j = 0;
1573:   for (i = 0; i < nfnodeLoc; i++) {
1574:     grid->ifnode[i] = tmp1[i] + 1;
1575:     grid->fxn[i]    = ftmp[j++];
1576:     grid->fyn[i]    = ftmp[j++];
1577:     grid->fzn[i]    = ftmp[j++];
1578:     grid->fa[i]     = ftmp[j++];
1579:   }
1580:   j = 0;
1581:   for (i = 0; i < nffacetLoc; i++) {
1582:     grid->f2ntf[i]              = tmp[j++] + 1;
1583:     grid->f2ntf[nffacetLoc+i]   = tmp[j++] + 1;
1584:     grid->f2ntf[2*nffacetLoc+i] = tmp[j++] + 1;
1585:   }

1587:   PetscFree(tmp);
1588:   PetscFree(tmp1);
1589:   PetscFree(tmp2);
1590:   PetscFree(ftmp);
1591:   PetscPrintf(comm,"Free boundaries partitioned\n");

1593:   flg  = PETSC_FALSE;
1594:   PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
1595:   if (flg) {
1596:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after partitioning\n");
1597:   }

1599:   /* Put different mappings and other info into grid */
1600:   /* ICALLOC(nvertices,&grid->loc2pet);
1601:    ICALLOC(nvertices,&grid->loc2glo);
1602:    PetscMemcpy(grid->loc2pet,l2p,nvertices*sizeof(int));
1603:    PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
1604:    PetscFree(l2a);
1605:    PetscFree(l2p);*/

1607:   grid->nnodesLoc  = nnodesLoc;
1608:   grid->nedgeLoc   = nedgeLoc;
1609:   grid->nvertices  = nvertices;
1610:   grid->nsnodeLoc  = nsnodeLoc;
1611:   grid->nvnodeLoc  = nvnodeLoc;
1612:   grid->nfnodeLoc  = nfnodeLoc;
1613:   grid->nnfacetLoc = nnfacetLoc;
1614:   grid->nvfacetLoc = nvfacetLoc;
1615:   grid->nffacetLoc = nffacetLoc;
1616: /*
1617:  * FCALLOC(nvertices*4, &grid->gradx);
1618:  * FCALLOC(nvertices*4, &grid->grady);
1619:  * FCALLOC(nvertices*4, &grid->gradz);
1620:  */
1621:   FCALLOC(nvertices,   &grid->cdt);
1622:   FCALLOC(nvertices*4, &grid->phi);
1623: /*
1624:    FCALLOC(nvertices,   &grid->r11);
1625:    FCALLOC(nvertices,   &grid->r12);
1626:    FCALLOC(nvertices,   &grid->r13);
1627:    FCALLOC(nvertices,   &grid->r22);
1628:    FCALLOC(nvertices,   &grid->r23);
1629:    FCALLOC(nvertices,   &grid->r33);
1630: */
1631:   FCALLOC(7*nnodesLoc,   &grid->rxy);

1633: /* Map the 'ja' array in petsc ordering */
1634:   for (i = 0; i < nnz; i++) grid->ja[i] = l2a[grid->ja[i] - 1];
1635:   AOApplicationToPetsc(ao,nnz,grid->ja);
1636:   AODestroy(&ao);

1638: /* Print the different mappings
1639:  *
1640:  */
1641:   {
1642:     int partLoc[7],partMax[7],partMin[7],partSum[7];
1643:     partLoc[0] = nnodesLoc;
1644:     partLoc[1] = nvertices;
1645:     partLoc[2] = nedgeLoc;
1646:     partLoc[3] = nnfacetLoc;
1647:     partLoc[4] = nffacetLoc;
1648:     partLoc[5] = nsnodeLoc;
1649:     partLoc[6] = nfnodeLoc;
1650:     for (i = 0; i < 7; i++) {
1651:       partMin[i] = 0;
1652:       partMax[i] = 0;
1653:       partSum[i] = 0;
1654:     }

1656:     MPI_Allreduce(partLoc,partMax,7,MPI_INT,MPI_MAX,comm);
1657:     MPI_Allreduce(partLoc,partMin,7,MPI_INT,MPI_MIN,comm);
1658:     MPI_Allreduce(partLoc,partSum,7,MPI_INT,MPI_SUM,comm);
1659:     PetscPrintf(comm,"==============================\n");
1660:     PetscPrintf(comm,"Partitioning quality info ....\n");
1661:     PetscPrintf(comm,"==============================\n");
1662:     PetscPrintf(comm,"------------------------------------------------------------\n");
1663:     PetscPrintf(comm,"Item                    Min        Max    Average      Total\n");
1664:     PetscPrintf(comm,"------------------------------------------------------------\n");
1665:     PetscPrintf(comm,"Local Nodes       %9d  %9d  %9d  %9d\n",
1666:                        partMin[0],partMax[0],partSum[0]/size,partSum[0]);
1667:     PetscPrintf(comm,"Local+Ghost Nodes %9d  %9d  %9d  %9d\n",
1668:                        partMin[1],partMax[1],partSum[1]/size,partSum[1]);
1669:     PetscPrintf(comm,"Local Edges       %9d  %9d  %9d  %9d\n",
1670:                        partMin[2],partMax[2],partSum[2]/size,partSum[2]);
1671:     PetscPrintf(comm,"Local solid faces %9d  %9d  %9d  %9d\n",
1672:                        partMin[3],partMax[3],partSum[3]/size,partSum[3]);
1673:     PetscPrintf(comm,"Local free faces  %9d  %9d  %9d  %9d\n",
1674:                        partMin[4],partMax[4],partSum[4]/size,partSum[4]);
1675:     PetscPrintf(comm,"Local solid nodes %9d  %9d  %9d  %9d\n",
1676:                        partMin[5],partMax[5],partSum[5]/size,partSum[5]);
1677:     PetscPrintf(comm,"Local free nodes  %9d  %9d  %9d  %9d\n",
1678:                        partMin[6],partMax[6],partSum[6]/size,partSum[6]);
1679:     PetscPrintf(comm,"------------------------------------------------------------\n");
1680:   }
1681:   flg  = PETSC_FALSE;
1682:   PetscOptionsGetBool(0,"-partition_info",&flg,NULL);
1683:   if (flg) {
1684:     char part_file[PETSC_MAX_PATH_LEN];
1685:     sprintf(part_file,"output.%d",rank);
1686:     fptr1 = fopen(part_file,"w");

1688:     fprintf(fptr1,"---------------------------------------------\n");
1689:     fprintf(fptr1,"Local and Global Grid Parameters are :\n");
1690:     fprintf(fptr1,"---------------------------------------------\n");
1691:     fprintf(fptr1,"Local\t\t\t\tGlobal\n");
1692:     fprintf(fptr1,"nnodesLoc = %d\t\tnnodes = %d\n",nnodesLoc,nnodes);
1693:     fprintf(fptr1,"nedgeLoc = %d\t\t\tnedge = %d\n",nedgeLoc,nedge);
1694:     fprintf(fptr1,"nnfacetLoc = %d\t\tnnfacet = %d\n",nnfacetLoc,nnfacet);
1695:     fprintf(fptr1,"nvfacetLoc = %d\t\t\tnvfacet = %d\n",nvfacetLoc,nvfacet);
1696:     fprintf(fptr1,"nffacetLoc = %d\t\t\tnffacet = %d\n",nffacetLoc,nffacet);
1697:     fprintf(fptr1,"nsnodeLoc = %d\t\t\tnsnode = %d\n",nsnodeLoc,nsnode);
1698:     fprintf(fptr1,"nvnodeLoc = %d\t\t\tnvnode = %d\n",nvnodeLoc,nvnode);
1699:     fprintf(fptr1,"nfnodeLoc = %d\t\t\tnfnode = %d\n",nfnodeLoc,nfnode);
1700:     fprintf(fptr1,"\n");
1701:     fprintf(fptr1,"nvertices = %d\n",nvertices);
1702:     fprintf(fptr1,"nnbound = %d\n",nnbound);
1703:     fprintf(fptr1,"nvbound = %d\n",nvbound);
1704:     fprintf(fptr1,"nfbound = %d\n",nfbound);
1705:     fprintf(fptr1,"\n");

1707:     fprintf(fptr1,"---------------------------------------------\n");
1708:     fprintf(fptr1,"Different Orderings\n");
1709:     fprintf(fptr1,"---------------------------------------------\n");
1710:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1711:     fprintf(fptr1,"---------------------------------------------\n");
1712:     for (i = 0; i < nvertices; i++) fprintf(fptr1,"%d\t\t%d\t\t%d\n",i,grid->loc2pet[i],grid->loc2glo[i]);
1713:     fprintf(fptr1,"\n");

1715:     fprintf(fptr1,"---------------------------------------------\n");
1716:     fprintf(fptr1,"Solid Boundary Nodes\n");
1717:     fprintf(fptr1,"---------------------------------------------\n");
1718:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1719:     fprintf(fptr1,"---------------------------------------------\n");
1720:     for (i = 0; i < nsnodeLoc; i++) {
1721:       j = grid->isnode[i]-1;
1722:       fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1723:     }
1724:     fprintf(fptr1,"\n");
1725:     fprintf(fptr1,"---------------------------------------------\n");
1726:     fprintf(fptr1,"f2ntn array\n");
1727:     fprintf(fptr1,"---------------------------------------------\n");
1728:     for (i = 0; i < nnfacetLoc; i++) {
1729:       fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntn[i],
1730:               grid->f2ntn[nnfacetLoc+i],grid->f2ntn[2*nnfacetLoc+i]);
1731:     }
1732:     fprintf(fptr1,"\n");

1734:     fprintf(fptr1,"---------------------------------------------\n");
1735:     fprintf(fptr1,"Viscous Boundary Nodes\n");
1736:     fprintf(fptr1,"---------------------------------------------\n");
1737:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1738:     fprintf(fptr1,"---------------------------------------------\n");
1739:     for (i = 0; i < nvnodeLoc; i++) {
1740:       j = grid->ivnode[i]-1;
1741:       fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1742:     }
1743:     fprintf(fptr1,"\n");
1744:     fprintf(fptr1,"---------------------------------------------\n");
1745:     fprintf(fptr1,"f2ntv array\n");
1746:     fprintf(fptr1,"---------------------------------------------\n");
1747:     for (i = 0; i < nvfacetLoc; i++) {
1748:       fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntv[i],
1749:               grid->f2ntv[nvfacetLoc+i],grid->f2ntv[2*nvfacetLoc+i]);
1750:     }
1751:     fprintf(fptr1,"\n");

1753:     fprintf(fptr1,"---------------------------------------------\n");
1754:     fprintf(fptr1,"Free Boundary Nodes\n");
1755:     fprintf(fptr1,"---------------------------------------------\n");
1756:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1757:     fprintf(fptr1,"---------------------------------------------\n");
1758:     for (i = 0; i < nfnodeLoc; i++) {
1759:       j = grid->ifnode[i]-1;
1760:       fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1761:     }
1762:     fprintf(fptr1,"\n");
1763:     fprintf(fptr1,"---------------------------------------------\n");
1764:     fprintf(fptr1,"f2ntf array\n");
1765:     fprintf(fptr1,"---------------------------------------------\n");
1766:     for (i = 0; i < nffacetLoc; i++) {
1767:       fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntf[i],
1768:               grid->f2ntf[nffacetLoc+i],grid->f2ntf[2*nffacetLoc+i]);
1769:     }
1770:     fprintf(fptr1,"\n");

1772:     fprintf(fptr1,"---------------------------------------------\n");
1773:     fprintf(fptr1,"Neighborhood Info In Various Ordering\n");
1774:     fprintf(fptr1,"---------------------------------------------\n");
1775:     ICALLOC(nnodes,&p2l);
1776:     for (i = 0; i < nvertices; i++) p2l[grid->loc2pet[i]] = i;
1777:     for (i = 0; i < nnodesLoc; i++) {
1778:       jstart = grid->ia[grid->loc2glo[i]] - 1;
1779:       jend   = grid->ia[grid->loc2glo[i]+1] - 1;
1780:       fprintf(fptr1,"Neighbors of Node %d in Local Ordering are :",i);
1781:       for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",p2l[grid->ja[j]]);
1782:       fprintf(fptr1,"\n");

1784:       fprintf(fptr1,"Neighbors of Node %d in PETSc ordering are :",grid->loc2pet[i]);
1785:       for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->ja[j]);
1786:       fprintf(fptr1,"\n");

1788:       fprintf(fptr1,"Neighbors of Node %d in Global Ordering are :",grid->loc2glo[i]);
1789:       for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->loc2glo[p2l[grid->ja[j]]]);
1790:       fprintf(fptr1,"\n");

1792:     }
1793:     fprintf(fptr1,"\n");
1794:     PetscFree(p2l);
1795:     fclose(fptr1);
1796:   }

1798: /* Free the temporary arrays */
1799:   PetscFree(a2l);
1800:   MPI_Barrier(comm);
1801:   return(0);
1802: }

1804: /*
1805:   encode 3 8-bit binary bytes as 4 '6-bit' characters, len is the number of bytes remaining, at most 3 are used
1806: */
1807: void *base64_encodeblock(void *vout,const void *vin,int len)
1808: {
1809:   unsigned char *out = (unsigned char*)vout,in[3] = {0,0,0};
1810:   /* Translation Table as described in RFC1113 */
1811:   static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1812:   memcpy(in,vin,PetscMin(len,3));
1813:   out[0] = cb64[in[0] >> 2];
1814:   out[1] = cb64[((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4)];
1815:   out[2] = (unsigned char) (len > 1 ? cb64[((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)] : '=');
1816:   out[3] = (unsigned char) (len > 2 ? cb64[in[2] & 0x3f] : '=');
1817:   return (void*)(out+4);
1818: }

1820: /* Write binary data, does not do byte swapping. */
1821: static PetscErrorCode PetscFWrite_FUN3D(MPI_Comm comm,FILE *fp,void *data,PetscInt n,PetscDataType dtype,PetscBool base64)
1822: {
1824:   PetscMPIInt    rank;

1827:   if (n < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Trying to write a negative amount of data %D",n);
1828:   if (!n) return(0);
1829:   MPI_Comm_rank(comm,&rank);
1830:   if (rank == 0) {
1831:     size_t count;
1832:     int    bytes;
1833:     switch (dtype) {
1834:     case PETSC_DOUBLE:
1835:       size = sizeof(double);
1836:       break;
1837:     case PETSC_FLOAT:
1838:       size = sizeof(float);
1839:       break;
1840:     case PETSC_INT:
1841:       size = sizeof(PetscInt);
1842:       break;
1843:     case PETSC_CHAR:
1844:       size = sizeof(char);
1845:       break;
1846:     default: SETERRQ(comm,PETSC_ERR_SUP,"Data type not supported");
1847:     }
1848:     bytes = size*n;
1849:     if (base64) {
1850:       unsigned char *buf,*ptr;
1851:       int           i;
1852:       size_t        b64alloc = 9 + (n*size*4) / 3 + (n*size*4) % 3;
1853:       PetscMalloc(b64alloc,&buf);
1854:       ptr  = buf;
1855:       ptr  = (unsigned char*)base64_encodeblock(ptr,&bytes,3);
1856:       ptr  = (unsigned char*)base64_encodeblock(ptr,((char*)&bytes)+3,1);
1857:       for (i=0; i<bytes; i+=3) {
1858:         int left = bytes - i;
1859:         ptr = (unsigned char*)base64_encodeblock(ptr,((char*)data)+i,left);
1860:       }
1861:       *ptr++ = '\n';
1862:       /* printf("encoded 4+%d raw bytes in %zd base64 chars, allocated for %zd\n",bytes,ptr-buf,b64alloc); */
1863:       count = fwrite(buf,1,ptr-buf,fp);
1864:       if (count < (size_t)(ptr-buf)) {
1865:         perror("");
1866:         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %D of %D bytes",(PetscInt)count,(PetscInt)(ptr-buf));
1867:       }
1868:       PetscFree(buf);
1869:     } else {
1870:       count = fwrite(&bytes,sizeof(int),1,fp);
1871:       if (count != 1) {
1872:         perror("");
1873:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Error writing byte count");
1874:       }
1875:       count = fwrite(data,size,(size_t)n,fp);
1876:       if ((int)count != n) {
1877:         perror("");
1878:         SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %D/%D array members of size %D",(PetscInt)count,n,(PetscInt)size);
1879:       }
1880:     }
1881:   }
1882:   return(0);
1883: }

1885: static void SortInt2(PetscInt *a,PetscInt *b)
1886: {
1887:   if (*b < *a) {
1888:     PetscInt c = *b;
1889:     *b = *a;
1890:     *a = c;
1891:   }
1892: }

1894: /* b = intersection(a,b) */
1895: static PetscErrorCode IntersectInt(PetscInt na,const PetscInt *a,PetscInt *nb,PetscInt *b)
1896: {
1897:   PetscInt i,n,j;

1900:   j = 0;
1901:   n = 0;
1902:   for (i=0; i<*nb; i++) {
1903:     while (j<na && a[j]<b[i]) j++;
1904:     if (j<na && a[j]==b[i]) {
1905:       b[n++] = b[i];
1906:       j++;
1907:     }
1908:   }
1909:   *nb = n;
1910:   return(0);
1911: }

1913: /*
1914:   This function currently has a semantic bug: it only produces cells containing all local edges.  Since the local mesh
1915:   does not even store edges between unowned nodes, primal cells that are effectively shared between processes will not
1916:   be constructed. This causes visualization artifacts.

1918:   This issue could be resolved by either (a) storing more edges from the original mesh or (b) communicating an extra
1919:   layer of edges in this function.
1920: */
1921: static PetscErrorCode InferLocalCellConnectivity(PetscInt nnodes,PetscInt nedge,const PetscInt *eptr,PetscInt *incell,PetscInt **iconn)
1922: {
1924:   PetscInt       ncell,acell,(*conn)[4],node0,node1,node2,node3,i,j,k,l,rowmax;
1925:   PetscInt       *ui,*uj,*utmp,*tmp1,*tmp2,*tmp3,ntmp1,ntmp2,ntmp3;
1926: #if defined(INTERLACING)
1927: #  define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i*2+0]-1; n2 = eptr[i*2+1]-1; } while (0)
1928: #else
1929: #  define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i+0*nedge]-1; n2 = eptr[i+1*nedge]-1; } while (0)
1930: #endif

1933:   *incell = -1;
1934:   *iconn  = NULL;
1935:   acell   = 100000;              /* allocate for this many cells */
1936:   PetscMalloc1(acell,&conn);
1937:   PetscMalloc2(nnodes+1,&ui,nedge,&uj);
1938:   PetscCalloc1(nnodes,&utmp);
1939:   /* count the number of edges in the upper-triangular matrix u */
1940:   for (i=0; i<nedge; i++) {     /* count number of nonzeros in upper triangular matrix */
1941:     GetEdge(eptr,i,node0,node1);
1942:     utmp[PetscMin(node0,node1)]++;
1943:   }
1944:   rowmax = 0;
1945:   ui[0]  = 0;
1946:   for (i=0; i<nnodes; i++) {
1947:     rowmax  = PetscMax(rowmax,utmp[i]);
1948:     ui[i+1] = ui[i] + utmp[i]; /* convert from count to row offsets */
1949:     utmp[i] = 0;
1950:   }
1951:   for (i=0; i<nedge; i++) {     /* assemble upper triangular matrix U */
1952:     GetEdge(eptr,i,node0,node1);
1953:     SortInt2(&node0,&node1);
1954:     uj[ui[node0] + utmp[node0]++] = node1;
1955:   }
1956:   PetscFree(utmp);
1957:   for (i=0; i<nnodes; i++) {    /* sort every row */
1958:     PetscInt n = ui[i+1] - ui[i];
1959:     PetscSortInt(n,&uj[ui[i]]);
1960:   }

1962:   /* Infer cells */
1963:   ncell = 0;
1964:   PetscMalloc3(rowmax,&tmp1,rowmax,&tmp2,rowmax,&tmp3);
1965:   for (i=0; i<nnodes; i++) {
1966:     node0 = i;
1967:     ntmp1 = ui[node0+1] - ui[node0]; /* Number of candidates for node1 */
1968:     PetscMemcpy(tmp1,&uj[ui[node0]],ntmp1*sizeof(PetscInt));
1969:     for (j=0; j<ntmp1; j++) {
1970:       node1 = tmp1[j];
1971:       if (node1 < 0 || nnodes <= node1) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node1,nnodes);
1972:       if (node1 <= node0) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node0,node1);
1973:       ntmp2 = ui[node1+1] - ui[node1];
1974:       PetscMemcpy(tmp2,&uj[ui[node1]],ntmp2*sizeof(PetscInt));
1975:       IntersectInt(ntmp1,tmp1,&ntmp2,tmp2);
1976:       for (k=0; k<ntmp2; k++) {
1977:         node2 = tmp2[k];
1978:         if (node2 < 0 || nnodes <= node2) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node2,nnodes);
1979:         if (node2 <= node1) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node1,node2);
1980:         ntmp3 = ui[node2+1] - ui[node2];
1981:         PetscMemcpy(tmp3,&uj[ui[node2]],ntmp3*sizeof(PetscInt));
1982:         IntersectInt(ntmp2,tmp2,&ntmp3,tmp3);
1983:         for (l=0; l<ntmp3; l++) {
1984:           node3 = tmp3[l];
1985:           if (node3 < 0 || nnodes <= node3) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node3,nnodes);
1986:           if (node3 <= node2) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node2,node3);
1987:           if (ncell > acell) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer exceeded");
1988:           if (ntmp3 < 3) continue;
1989:           conn[ncell][0] = node0;
1990:           conn[ncell][1] = node1;
1991:           conn[ncell][2] = node2;
1992:           conn[ncell][3] = node3;
1993:           if (0) {
1994:             PetscViewer viewer = PETSC_VIEWER_STDOUT_WORLD;
1995:             PetscViewerASCIIPrintf(viewer,"created cell %d: %d %d %d %d\n",ncell,node0,node1,node2,node3);
1996:             PetscIntView(ntmp1,tmp1,viewer);
1997:             PetscIntView(ntmp2,tmp2,viewer);
1998:             PetscIntView(ntmp3,tmp3,viewer);
1999:             /* uns3d.msh has a handful of "tetrahedra" that overlap by violating the following condition. As far as I
2000:              * can tell, that means it is an invalid mesh. I don't know what the intent was. */
2001:             if (ntmp3 > 2) SETERRQ(PETSC_COMM_SELF,1,"More than two ways to complete a tetrahedron using a common triangle");
2002:           }
2003:           ncell++;
2004:         }
2005:       }
2006:     }
2007:   }
2008:   PetscFree3(tmp1,tmp2,tmp3);
2009:   PetscFree2(ui,uj);

2011:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"Inferred %D cells with nnodes=%D nedge=%D\n",ncell,nnodes,nedge);
2012:   PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
2013:   *incell = ncell;
2014:   *iconn  = (PetscInt*)conn;
2015:   return(0);
2016: }

2018: static PetscErrorCode GridCompleteOverlap(GRID *grid,PetscInt *invertices,PetscInt *inedgeOv,PetscInt **ieptrOv)
2019: {
2021:   PetscInt       nedgeLoc,nedgeOv,i,j,cnt,node0,node1,node0p,node1p,nnodes,nnodesLoc,nvertices,rstart,nodeEdgeCountAll,nodeEdgeRstart;
2022:   PetscInt       *nodeEdgeCount,*nodeEdgeOffset,*eIdxOv,*p2l,*eptrOv;
2023:   Vec            VNodeEdge,VNodeEdgeInfo,VNodeEdgeInfoOv,VNodeEdgeOv;
2024:   PetscScalar    *vne,*vnei;
2025:   IS             isglobal,isedgeOv;
2026:   VecScatter     nescat,neiscat;
2027:   PetscBool      flg;

2030:   nnodes    = grid->nnodes;     /* Total number of global nodes */
2031:   nnodesLoc = grid->nnodesLoc;  /* Number of owned nodes */
2032:   nvertices = grid->nvertices;  /* Number of owned+ghosted nodes */
2033:   nedgeLoc  = grid->nedgeLoc;   /* Number of edges connected to owned nodes */

2035:   /* Count the number of neighbors of each owned node */
2036:   MPI_Scan(&nnodesLoc,&rstart,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);
2037:   rstart -= nnodesLoc;
2038:   PetscMalloc2(nnodesLoc,&nodeEdgeCount,nnodesLoc,&nodeEdgeOffset);
2039:   PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2040:   for (i=0; i<nedgeLoc; i++) {
2041:     GetEdge(grid->eptr,i,node0,node1);
2042:     node0p = grid->loc2pet[node0];
2043:     node1p = grid->loc2pet[node1];
2044:     if (rstart <= node0p && node0p < rstart+nnodesLoc) nodeEdgeCount[node0p-rstart]++;
2045:     if (rstart <= node1p && node1p < rstart+nnodesLoc) nodeEdgeCount[node1p-rstart]++;
2046:   }
2047:   /* Get the offset in the node-based edge array */
2048:   nodeEdgeOffset[0] = 0;
2049:   for (i=0; i<nnodesLoc-1; i++) nodeEdgeOffset[i+1] = nodeEdgeOffset[i] + nodeEdgeCount[i];
2050:   nodeEdgeCountAll = nodeEdgeCount[nnodesLoc-1] + nodeEdgeOffset[nnodesLoc-1];

2052:   /* Pack a Vec by node of all the edges for that node. The nodes are stored by global index */
2053:   VecCreateMPI(PETSC_COMM_WORLD,nodeEdgeCountAll,PETSC_DETERMINE,&VNodeEdge);
2054:   PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2055:   VecGetArray(VNodeEdge,&vne);
2056:   for (i=0; i<nedgeLoc; i++) {
2057:     GetEdge(grid->eptr,i,node0,node1);
2058:     node0p = grid->loc2pet[node0];
2059:     node1p = grid->loc2pet[node1];
2060:     if (rstart <= node0p && node0p < rstart+nnodesLoc) vne[nodeEdgeOffset[node0p-rstart] + nodeEdgeCount[node0p-rstart]++] = node1p;
2061:     if (rstart <= node1p && node1p < rstart+nnodesLoc) vne[nodeEdgeOffset[node1p-rstart] + nodeEdgeCount[node1p-rstart]++] = node0p;
2062:   }
2063:   VecRestoreArray(VNodeEdge,&vne);
2064:   VecGetOwnershipRange(VNodeEdge,&nodeEdgeRstart,NULL);

2066:   /* Move the count and offset into a Vec so that we can use VecScatter, translating offset from local to global */
2067:   VecCreate(PETSC_COMM_WORLD,&VNodeEdgeInfo);
2068:   VecSetSizes(VNodeEdgeInfo,2*nnodesLoc,2*nnodes);
2069:   VecSetBlockSize(VNodeEdgeInfo,2);
2070:   VecSetType(VNodeEdgeInfo,VECMPI);

2072:   VecGetArray(VNodeEdgeInfo,&vnei);
2073:   for (i=0; i<nnodesLoc; i++) {
2074:     vnei[i*2+0] = nodeEdgeCount[i];                   /* Total number of edges from this vertex */
2075:     vnei[i*2+1] = nodeEdgeOffset[i] + nodeEdgeRstart; /* Now the global index in the next comm round */
2076:   }
2077:   VecRestoreArray(VNodeEdgeInfo,&vnei);
2078:   PetscFree2(nodeEdgeCount,nodeEdgeOffset);

2080:   /* Create a Vec to receive the edge count and global offset for each node in owned+ghosted, get them, and clean up */
2081:   VecCreate(PETSC_COMM_SELF,&VNodeEdgeInfoOv);
2082:   VecSetSizes(VNodeEdgeInfoOv,2*nvertices,2*nvertices);
2083:   VecSetBlockSize(VNodeEdgeInfoOv,2);
2084:   VecSetType(VNodeEdgeInfoOv,VECSEQ);

2086:   ISCreateBlock(PETSC_COMM_WORLD,2,nvertices,grid->loc2pet,PETSC_COPY_VALUES,&isglobal); /* Address the nodes in overlap to get info from */
2087:   VecScatterCreate(VNodeEdgeInfo,isglobal,VNodeEdgeInfoOv,NULL,&neiscat);
2088:   VecScatterBegin(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2089:   VecScatterEnd(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2090:   VecScatterDestroy(&neiscat);
2091:   VecDestroy(&VNodeEdgeInfo);
2092:   ISDestroy(&isglobal);

2094:   /* Create a Vec to receive the actual edges for all nodes (owned and ghosted), execute the scatter */
2095:   nedgeOv = 0;                  /* First count the number of edges in the complete overlap */
2096:   VecGetArray(VNodeEdgeInfoOv,&vnei);
2097:   for (i=0; i<nvertices; i++) nedgeOv += (PetscInt)vnei[2*i+0];
2098:   /* Allocate for the global indices in VNodeEdge of the edges to receive */
2099:   PetscMalloc1(nedgeOv,&eIdxOv);
2100:   for (i=0,cnt=0; i<nvertices; i++) {
2101:     for (j=0; j<(PetscInt)vnei[2*i+0]; j++) eIdxOv[cnt++] = (PetscInt)vnei[2*i+1] + j;
2102:   }
2103:   VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2104:   ISCreateGeneral(PETSC_COMM_WORLD,nedgeOv,eIdxOv,PETSC_USE_POINTER,&isedgeOv);
2105:   VecCreateSeq(PETSC_COMM_SELF,nedgeOv,&VNodeEdgeOv);
2106:   VecScatterCreate(VNodeEdge,isedgeOv,VNodeEdgeOv,NULL,&nescat);
2107:   VecScatterBegin(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2108:   VecScatterEnd(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2109:   VecScatterDestroy(&nescat);
2110:   VecDestroy(&VNodeEdge);
2111:   ISDestroy(&isedgeOv);
2112:   PetscFree(eIdxOv);

2114:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of edges before pruning: %D, half=%D\n",rank,PETSC_FUNCTION_NAME,nedgeOv,nedgeOv/2);

2116:   /* Create the non-scalable global-to-local index map. Yuck, but it has already been used elsewhere. */
2117:   PetscMalloc1(nnodes,&p2l);
2118:   for (i=0; i<nnodes; i++) p2l[i] = -1;
2119:   for (i=0; i<nvertices; i++) p2l[grid->loc2pet[i]] = i;
2120:   if (1) {
2121:     PetscInt m = 0;
2122:     for (i=0; i<nnodes; i++) m += (p2l[i] >= 0);
2123:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of global indices that map to local indices: %D; nvertices=%D nnodesLoc=%D nnodes=%D\n",rank,PETSC_FUNCTION_NAME,m,nvertices,nnodesLoc,nnodes);
2124:   }

2126:   /* Log each edge connecting nodes in owned+ghosted exactly once */
2127:   VecGetArray(VNodeEdgeInfoOv,&vnei);
2128:   VecGetArray(VNodeEdgeOv,&vne);
2129:   /* First count the number of edges to keep */
2130:   nedgeOv = 0;
2131:   for (i=0,cnt=0; i<nvertices; i++) {
2132:     PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2133:     node0 = i;
2134:     for (j=0; j<n; j++) {
2135:       node1p = vne[cnt++];
2136:       node1  = p2l[node1p];
2137:       if (node0 < node1) nedgeOv++;
2138:     }
2139:   }
2140:   /* Array of edges to keep */
2141:   PetscMalloc1(2*nedgeOv,&eptrOv);
2142:   nedgeOv = 0;
2143:   for (i=0,cnt=0; i<nvertices; i++) {
2144:     PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2145:     node0 = i;
2146:     for (j=0; j<n; j++) {
2147:       node1p = vne[cnt++];
2148:       node1  = p2l[node1p];
2149:       if (node0 < node1) {
2150:         eptrOv[2*nedgeOv+0] = node0;
2151:         eptrOv[2*nedgeOv+1] = node1;
2152:         nedgeOv++;
2153:       }
2154:     }
2155:   }
2156:   VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2157:   VecRestoreArray(VNodeEdgeOv,&vne);
2158:   VecDestroy(&VNodeEdgeInfoOv);
2159:   VecDestroy(&VNodeEdgeOv);
2160:   PetscFree(p2l);

2162:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: nedgeLoc=%D nedgeOv=%D\n",rank,PETSC_FUNCTION_NAME,nedgeLoc,nedgeOv);
2163:   PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

2165:   flg  = PETSC_TRUE;
2166:   PetscOptionsGetBool(NULL,NULL,"-complete_overlap",&flg,NULL);
2167:   if (flg) {
2168:     *invertices = grid->nvertices; /* We did not change the number of vertices */
2169:     *inedgeOv   = nedgeOv;
2170:     *ieptrOv    = eptrOv;
2171:   } else {
2172:     *invertices = grid->nvertices;
2173:     *inedgeOv   = nedgeLoc;
2174:     PetscFree(eptrOv);
2175:     PetscMalloc1(2*nedgeLoc,&eptrOv);
2176:     PetscMemcpy(eptrOv,grid->eptr,2*nedgeLoc*sizeof(PetscInt));
2177:     *ieptrOv    = eptrOv;
2178:   }
2179:   return(0);
2180: }

2182: static PetscErrorCode WritePVTU(AppCtx *user,const char *fname,PetscBool base64)
2183: {
2184:   GRID              *grid  = user->grid;
2185:   TstepCtx          *tsCtx = user->tsCtx;
2186:   FILE              *vtu,*pvtu;
2187:   char              pvtu_fname[PETSC_MAX_PATH_LEN],vtu_fname[PETSC_MAX_PATH_LEN];
2188:   MPI_Comm          comm;
2189:   PetscMPIInt       rank,size;
2190:   PetscInt          i,nvertices = 0,nedgeLoc = 0,ncells,bs,nloc,boffset = 0,*eptr = NULL;
2191:   PetscErrorCode    ierr;
2192:   Vec               Xloc,Xploc,Xuloc;
2193:   unsigned char     *celltype;
2194:   int               *celloffset,*conn,*cellrank;
2195:   const PetscScalar *x;
2196:   PetscScalar       *xu,*xp;
2197:   const char        *byte_order = PetscBinaryBigEndian() ? "BigEndian" : "LittleEndian";

2200:   GridCompleteOverlap(user->grid,&nvertices,&nedgeLoc,&eptr);
2201:   comm = PETSC_COMM_WORLD;
2202:   MPI_Comm_rank(comm,&rank);
2203:   MPI_Comm_size(comm,&size);
2204: #if defined(PETSC_USE_COMPLEX) || !defined(PETSC_USE_REAL_DOUBLE) || defined(PETSC_USE_64BIT_INDICES)
2205:   SETERRQ(comm,PETSC_ERR_SUP,"This function is only implemented for scalar-type=real precision=double, 32-bit indices");
2206: #endif
2207:   PetscSNPrintf(pvtu_fname,sizeof(pvtu_fname),"%s-%D.pvtu",fname,tsCtx->itstep);
2208:   PetscSNPrintf(vtu_fname,sizeof(vtu_fname),"%s-%D-%D.vtu",fname,tsCtx->itstep,rank);
2209:   PetscFOpen(comm,pvtu_fname,"w",&pvtu);
2210:   PetscFPrintf(comm,pvtu,"<?xml version=\"1.0\"?>\n");
2211:   PetscFPrintf(comm,pvtu,"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2212:   PetscFPrintf(comm,pvtu," <PUnstructuredGrid GhostLevel=\"0\">\n");
2213:   PetscFPrintf(comm,pvtu,"  <PPointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2214:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" />\n");
2215:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" />\n");
2216:   PetscFPrintf(comm,pvtu,"  </PPointData>\n");
2217:   PetscFPrintf(comm,pvtu,"  <PCellData>\n");
2218:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" />\n");
2219:   PetscFPrintf(comm,pvtu,"  </PCellData>\n");
2220:   PetscFPrintf(comm,pvtu,"  <PPoints>\n");
2221:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" />\n");
2222:   PetscFPrintf(comm,pvtu,"  </PPoints>\n");
2223:   PetscFPrintf(comm,pvtu,"  <PCells>\n");
2224:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" />\n");
2225:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Int32\" Name=\"offsets\"      NumberOfComponents=\"1\" />\n");
2226:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"UInt8\" Name=\"types\"        NumberOfComponents=\"1\" />\n");
2227:   PetscFPrintf(comm,pvtu,"  </PCells>\n");
2228:   for (i=0; i<size; i++) {
2229:     PetscFPrintf(comm,pvtu,"  <Piece Source=\"%s-%D-%D.vtu\" />\n",fname,tsCtx->itstep,i);
2230:   }
2231:   PetscFPrintf(comm,pvtu," </PUnstructuredGrid>\n");
2232:   PetscFPrintf(comm,pvtu,"</VTKFile>\n");
2233:   PetscFClose(comm,pvtu);

2235:   Xloc = grid->qnodeLoc;
2236:   VecScatterBegin(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2237:   VecScatterEnd(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2238:   VecGetBlockSize(Xloc,&bs);
2239:   if (bs != 4) SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"expected block size 4, got %D",bs);
2240:   VecGetSize(Xloc,&nloc);
2241:   if (nloc/bs != nvertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"expected nloc/bs=%D to match nvertices=%D",nloc/bs,nvertices);
2242:   if (nvertices != grid->nvertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"expected nvertices=%D to match grid->nvertices=%D",nvertices,grid->nvertices);
2243:   VecCreateSeq(PETSC_COMM_SELF,nvertices,&Xploc);

2245:   VecCreate(PETSC_COMM_SELF,&Xuloc);
2246:   VecSetSizes(Xuloc,3*nvertices,3*nvertices);
2247:   VecSetBlockSize(Xuloc,3);
2248:   VecSetType(Xuloc,VECSEQ);

2250:   VecGetArrayRead(Xloc,&x);
2251:   VecGetArray(Xploc,&xp);
2252:   VecGetArray(Xuloc,&xu);
2253:   for (i=0; i<nvertices; i++) {
2254:     xp[i]     = x[i*4+0];
2255:     xu[i*3+0] = x[i*4+1];
2256:     xu[i*3+1] = x[i*4+2];
2257:     xu[i*3+2] = x[i*4+3];
2258:   }
2259:   VecRestoreArrayRead(Xloc,&x);

2261:   InferLocalCellConnectivity(nvertices,nedgeLoc,eptr,&ncells,&conn);

2263:   PetscFOpen(PETSC_COMM_SELF,vtu_fname,"w",&vtu);
2264:   PetscFPrintf(PETSC_COMM_SELF,vtu,"<?xml version=\"1.0\"?>\n");
2265:   PetscFPrintf(PETSC_COMM_SELF,vtu,"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2266:   PetscFPrintf(PETSC_COMM_SELF,vtu," <UnstructuredGrid>\n");
2267:   PetscFPrintf(PETSC_COMM_SELF,vtu,"  <Piece NumberOfPoints=\"%D\" NumberOfCells=\"%D\">\n",nvertices,ncells);

2269:   /* Solution fields */
2270:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <PointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2271:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2272:   boffset += nvertices*sizeof(PetscScalar) + sizeof(int);
2273:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%D\" />\n",boffset);
2274:   boffset += nvertices*3*sizeof(PetscScalar) + sizeof(int);
2275:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </PointData>\n");
2276:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <CellData>\n");
2277:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2278:   boffset += ncells*sizeof(int) + sizeof(int);
2279:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </CellData>\n");
2280:   /* Coordinate positions */
2281:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <Points>\n");
2282:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%D\" />\n",boffset);
2283:   boffset += nvertices*3*sizeof(double) + sizeof(int);
2284:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </Points>\n");
2285:   /* Cell connectivity */
2286:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <Cells>\n");
2287:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2288:   boffset += ncells*4*sizeof(int) + sizeof(int);
2289:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Int32\" Name=\"offsets\"      NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2290:   boffset += ncells*sizeof(int) + sizeof(int);
2291:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"UInt8\" Name=\"types\"        NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2292:   boffset += ncells*sizeof(unsigned char) + sizeof(int);
2293:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </Cells>\n");
2294:   PetscFPrintf(PETSC_COMM_SELF,vtu,"  </Piece>\n");
2295:   PetscFPrintf(PETSC_COMM_SELF,vtu," </UnstructuredGrid>\n");
2296:   PetscFPrintf(PETSC_COMM_SELF,vtu," <AppendedData encoding=\"%s\">\n",base64 ? "base64" : "raw");
2297:   PetscFPrintf(PETSC_COMM_SELF,vtu,"_");

2299:   /* Write pressure */
2300:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xp,nvertices,PETSC_SCALAR,base64);

2302:   /* Write velocity */
2303:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xu,nvertices*3,PETSC_SCALAR,base64);

2305:   /* Label cell rank, not a measure of computation because nothing is actually computed at cells.  This is written
2306:    * primarily to aid in debugging. The partition for computation should label vertices. */
2307:   PetscMalloc1(ncells,&cellrank);
2308:   for (i=0; i<ncells; i++) cellrank[i] = rank;
2309:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,cellrank,ncells,PETSC_INT,base64);
2310:   PetscFree(cellrank);

2312:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,grid->xyz,nvertices*3,PETSC_DOUBLE,base64);
2313:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,conn,ncells*4,PETSC_INT,base64);
2314:   PetscFree(conn);

2316:   PetscMalloc1(ncells,&celloffset);
2317:   for (i=0; i<ncells; i++) celloffset[i] = 4*(i+1);
2318:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celloffset,ncells,PETSC_INT,base64);
2319:   PetscFree(celloffset);

2321:   PetscMalloc1(ncells,&celltype);
2322:   for (i=0; i<ncells; i++) celltype[i] = 10; /* VTK_TETRA */
2323:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celltype,ncells,PETSC_CHAR,base64);
2324:   PetscFree(celltype);

2326:   PetscFPrintf(PETSC_COMM_SELF,vtu,"\n </AppendedData>\n");
2327:   PetscFPrintf(PETSC_COMM_SELF,vtu,"</VTKFile>\n");
2328:   PetscFClose(PETSC_COMM_SELF,vtu);

2330:   VecRestoreArray(Xploc,&xp);
2331:   VecRestoreArray(Xuloc,&xu);
2332:   VecDestroy(&Xploc);
2333:   VecDestroy(&Xuloc);
2334:   PetscFree(eptr);
2335:   return(0);
2336: }

2338: /*---------------------------------------------------------------------*/
2339: int SetPetscDS(GRID *grid,TstepCtx *tsCtx)
2340: /*---------------------------------------------------------------------*/
2341: {
2342:   int                    ierr,i,j,bs;
2343:   int                    nnodes,jstart,jend,nbrs_diag,nbrs_offd;
2344:   int                    nnodesLoc,nvertices;
2345:   int                    *val_diag,*val_offd,*svertices,*loc2pet;
2346:   IS                     isglobal,islocal;
2347:   ISLocalToGlobalMapping isl2g;
2348:   PetscBool              flg;
2349:   MPI_Comm               comm = PETSC_COMM_WORLD;

2352:   nnodes    = grid->nnodes;
2353:   nnodesLoc = grid->nnodesLoc;
2354:   nvertices = grid->nvertices;
2355:   loc2pet   = grid->loc2pet;
2356:   bs        = 4;

2358:   /* Set up the PETSc datastructures */

2360:   VecCreate(comm,&grid->qnode);
2361:   VecSetSizes(grid->qnode,bs*nnodesLoc,bs*nnodes);
2362:   VecSetBlockSize(grid->qnode,bs);
2363:   VecSetType(grid->qnode,VECMPI);

2365:   VecDuplicate(grid->qnode,&grid->res);
2366:   VecDuplicate(grid->qnode,&tsCtx->qold);
2367:   VecDuplicate(grid->qnode,&tsCtx->func);

2369:   VecCreate(MPI_COMM_SELF,&grid->qnodeLoc);
2370:   VecSetSizes(grid->qnodeLoc,bs*nvertices,bs*nvertices);
2371:   VecSetBlockSize(grid->qnodeLoc,bs);
2372:   VecSetType(grid->qnodeLoc,VECSEQ);

2374:   VecCreate(comm,&grid->grad);
2375:   VecSetSizes(grid->grad,3*bs*nnodesLoc,3*bs*nnodes);
2376:   VecSetBlockSize(grid->grad,3*bs);
2377:   VecSetType(grid->grad,VECMPI);

2379:   VecCreate(MPI_COMM_SELF,&grid->gradLoc);
2380:   VecSetSizes(grid->gradLoc,3*bs*nvertices,3*bs*nvertices);
2381:   VecSetBlockSize(grid->gradLoc,3*bs);
2382:   VecSetType(grid->gradLoc,VECSEQ);

2384: /* Create Scatter between the local and global vectors */
2385: /* First create scatter for qnode */
2386:   ISCreateStride(MPI_COMM_SELF,bs*nvertices,0,1,&islocal);
2387: #if defined(INTERLACING)
2388: #if defined(BLOCKING)
2389:   ICALLOC(nvertices,&svertices);
2390:   for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2391:   ISCreateBlock(MPI_COMM_SELF,bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2392: #else
2393:   ICALLOC(bs*nvertices,&svertices);
2394:   for (i = 0; i < nvertices; i++)
2395:     for (j = 0; j < bs; j++) svertices[j+bs*i] = j + bs*loc2pet[i];
2396:   ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2397: #endif
2398: #else
2399:   ICALLOC(bs*nvertices,&svertices);
2400:   for (j = 0; j < bs; j++)
2401:     for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2402:   ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2403: #endif
2404:   PetscFree(svertices);
2405:   VecScatterCreate(grid->qnode,isglobal,grid->qnodeLoc,islocal,&grid->scatter);
2406:   ISDestroy(&isglobal);
2407:   ISDestroy(&islocal);

2409: /* Now create scatter for gradient vector of qnode */
2410:   ISCreateStride(MPI_COMM_SELF,3*bs*nvertices,0,1,&islocal);
2411: #if defined(INTERLACING)
2412: #if defined(BLOCKING)
2413:   ICALLOC(nvertices,&svertices);
2414:   for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2415:   ISCreateBlock(MPI_COMM_SELF,3*bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2416: #else
2417:   ICALLOC(3*bs*nvertices,&svertices);
2418:   for (i = 0; i < nvertices; i++)
2419:     for (j = 0; j < 3*bs; j++) svertices[j+3*bs*i] = j + 3*bs*loc2pet[i];
2420:   ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2421: #endif
2422: #else
2423:   ICALLOC(3*bs*nvertices,&svertices);
2424:   for (j = 0; j < 3*bs; j++)
2425:     for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2426:   ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2427: #endif
2428:   PetscFree(svertices);
2429:   VecScatterCreate(grid->grad,isglobal,grid->gradLoc,islocal,&grid->gradScatter);
2430:   ISDestroy(&isglobal);
2431:   ISDestroy(&islocal);

2433: /* Store the number of non-zeroes per row */
2434: #if defined(INTERLACING)
2435: #if defined(BLOCKING)
2436:   ICALLOC(nnodesLoc,&val_diag);
2437:   ICALLOC(nnodesLoc,&val_offd);
2438:   for (i = 0; i < nnodesLoc; i++) {
2439:     jstart    = grid->ia[i] - 1;
2440:     jend      = grid->ia[i+1] - 1;
2441:     nbrs_diag = 0;
2442:     nbrs_offd = 0;
2443:     for (j = jstart; j < jend; j++) {
2444:       if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2445:       else nbrs_offd++;
2446:     }
2447:     val_diag[i] = nbrs_diag;
2448:     val_offd[i] = nbrs_offd;
2449:   }
2450:   MatCreateBAIJ(comm,bs,bs*nnodesLoc,bs*nnodesLoc,
2451:                        bs*nnodes,bs*nnodes,PETSC_DEFAULT,val_diag,
2452:                        PETSC_DEFAULT,val_offd,&grid->A);
2453: #else
2454:   ICALLOC(nnodesLoc*4,&val_diag);
2455:   ICALLOC(nnodesLoc*4,&val_offd);
2456:   for (i = 0; i < nnodesLoc; i++) {
2457:     jstart    = grid->ia[i] - 1;
2458:     jend      = grid->ia[i+1] - 1;
2459:     nbrs_diag = 0;
2460:     nbrs_offd = 0;
2461:     for (j = jstart; j < jend; j++) {
2462:       if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2463:       else nbrs_offd++;
2464:     }
2465:     for (j = 0; j < 4; j++) {
2466:       row           = 4*i + j;
2467:       val_diag[row] = nbrs_diag*4;
2468:       val_offd[row] = nbrs_offd*4;
2469:     }
2470:   }
2471:   MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2472:                       bs*nnodes,bs*nnodes,NULL,val_diag,
2473:                       NULL,val_offd,&grid->A);
2474: #endif
2475:   PetscFree(val_diag);
2476:   PetscFree(val_offd);

2478: #else
2479:   if (size > 1) SETERRQ(PETSC_COMM_SELF,1,"Parallel case not supported in non-interlaced case\n");
2480:   ICALLOC(nnodes*4,&val_diag);
2481:   ICALLOC(nnodes*4,&val_offd);
2482:   for (j = 0; j < 4; j++)
2483:     for (i = 0; i < nnodes; i++) {
2484:       int row;
2485:       row           = i + j*nnodes;
2486:       jstart        = grid->ia[i] - 1;
2487:       jend          = grid->ia[i+1] - 1;
2488:       nbrs_diag     = jend - jstart;
2489:       val_diag[row] = nbrs_diag*4;
2490:       val_offd[row] = 0;
2491:     }
2492:   /* MatCreateSeqAIJ(MPI_COMM_SELF,nnodes*4,nnodes*4,NULL,
2493:                         val,&grid->A);*/
2494:   MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2495:                       bs*nnodes,bs*nnodes,NULL,val_diag,
2496:                       NULL,val_offd,&grid->A);
2497:   MatSetBlockSize(grid->A,bs);
2498:   PetscFree(val_diag);
2499:   PetscFree(val_offd);
2500: #endif

2502:   flg  = PETSC_FALSE;
2503:   PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
2504:   if (flg) {
2505:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after allocating PETSc data structures\n");
2506:   }

2508: /* Set local to global mapping for setting the matrix elements in
2509:    local ordering : first set row by row mapping
2510: */
2511:   ISLocalToGlobalMappingCreate(MPI_COMM_SELF,bs,nvertices,loc2pet,PETSC_COPY_VALUES,&isl2g);
2512:   MatSetLocalToGlobalMapping(grid->A,isl2g,isl2g);
2513:   ISLocalToGlobalMappingDestroy(&isl2g);
2514:   return(0);
2515: }

2517: /*================================= CLINK ===================================*/
2518: /*                                                                           */
2519: /* Used in establishing the links between FORTRAN common blocks and C        */
2520: /*                                                                           */
2521: /*===========================================================================*/
2522: EXTERN_C_BEGIN
2523: void f77CLINK(CINFO *p1,CRUNGE *p2,CGMCOM *p3)
2524: {
2525:   c_info  = p1;
2526:   c_runge = p2;
2527:   c_gmcom = p3;
2528: }
2529: EXTERN_C_END

2531: /*========================== SET_UP_GRID====================================*/
2532: /*                                                                          */
2533: /* Allocates the memory for the fine grid                                   */
2534: /*                                                                          */
2535: /*==========================================================================*/
2536: int set_up_grid(GRID *grid)
2537: {
2538:   int nnodes,nedge;
2539:   int nsface,nvface,nfface,nbface;
2540:   int tnode,ierr;
2541:   /*int vface,lnodes,nnz,ncell,kvisc,ilu0,nsrch,ileast,ifcn,valloc;*/
2542:   /*int nsnode,nvnode,nfnode; */
2543:   /*int mgzero=0;*/ /* Variable so we dont allocate memory for multigrid */
2544:   /*int jalloc;*/  /* If jalloc=1 allocate space for dfp and dfm */
2545:   /*
2546:   * stuff to read in dave's grids
2547:   */
2548:   /*int nnbound,nvbound,nfbound,nnfacet,nvfacet,nffacet,ntte;*/
2549:   /* end of stuff */

2552:   nnodes = grid->nnodes;
2553:   tnode  = grid->nnodes;
2554:   nedge  = grid->nedge;
2555:   nsface = grid->nsface;
2556:   nvface = grid->nvface;
2557:   nfface = grid->nfface;
2558:   nbface = nsface + nvface + nfface;

2560:   /*ncell  = grid->ncell;
2561:   vface  = grid->nedge;
2562:   lnodes = grid->nnodes;
2563:   nsnode = grid->nsnode;
2564:   nvnode = grid->nvnode;
2565:   nfnode = grid->nfnode;
2566:   nsrch  = c_gmcom->nsrch;
2567:   ilu0   = c_gmcom->ilu0;
2568:   ileast = grid->ileast;
2569:   ifcn   = c_gmcom->ifcn;
2570:   jalloc = 0;
2571:   kvisc  = grid->jvisc;*/

2573:   /* if (ilu0 >=1 && ifcn == 1) jalloc=0;*/

2575:   /*
2576:   * stuff to read in dave's grids
2577:   */
2578:   /*nnbound = grid->nnbound;
2579:   nvbound = grid->nvbound;
2580:   nfbound = grid->nfbound;
2581:   nnfacet = grid->nnfacet;
2582:   nvfacet = grid->nvfacet;
2583:   nffacet = grid->nffacet;
2584:   ntte    = grid->ntte;*/
2585:   /* end of stuff */

2587:   /* if (!ileast) lnodes = 1;
2588:     printf("In set_up_grid->jvisc = %d\n",grid->jvisc);

2590:   if (grid->jvisc != 2 && grid->jvisc != 4 && grid->jvisc != 6)vface = 1;
2591:   printf(" vface = %d \n",vface);
2592:   if (grid->jvisc < 3) tnode = 1;
2593:   valloc = 1;
2594:   if (grid->jvisc ==  0)valloc = 0;*/

2596:   /*PetscPrintf(PETSC_COMM_WORLD," nsnode= %d nvnode= %d nfnode= %d\n",nsnode,nvnode,nfnode);*/
2597:   /*PetscPrintf(PETSC_COMM_WORLD," nsface= %d nvface= %d nfface= %d\n",nsface,nvface,nfface);
2598:   PetscPrintf(PETSC_COMM_WORLD," nbface= %d\n",nbface);*/
2599:   /* Now allocate memory for the other grid arrays */
2600:   /* ICALLOC(nedge*2,  &grid->eptr); */
2601:   ICALLOC(nsface,   &grid->isface);
2602:   ICALLOC(nvface,   &grid->ivface);
2603:   ICALLOC(nfface,   &grid->ifface);
2604:   /* ICALLOC(nsnode,   &grid->isnode);
2605:     ICALLOC(nvnode,   &grid->ivnode);
2606:     ICALLOC(nfnode,   &grid->ifnode);*/
2607:   /*ICALLOC(nnodes,   &grid->clist);
2608:   ICALLOC(nnodes,   &grid->iupdate);
2609:   ICALLOC(nsface*2, &grid->sface);
2610:   ICALLOC(nvface*2, &grid->vface);
2611:   ICALLOC(nfface*2, &grid->fface);
2612:   ICALLOC(lnodes,   &grid->icount);*/
FCALLOC(nnodes, &grid->y);
FCALLOC(nnodes, &grid->z);
FCALLOC(nnodes, &grid->area);*/
/*
* FCALLOC(nnodes*4, &grid->gradx);
* FCALLOC(nnodes*4, &grid->grady);
* FCALLOC(nnodes*4, &grid->gradz);
* FCALLOC(nnodes, &grid->cdt);
*/
/*
* FCALLOC(nnodes*4, &grid->qnode);
* FCALLOC(nnodes*4, &grid->dq);
* FCALLOC(nnodes*4, &grid->res);
* FCALLOC(jalloc*nnodes*4*4,&grid->A);
* FCALLOC(nnodes*4, &grid->B);
* FCALLOC(jalloc*nedge*4*4,&grid->dfp);
* FCALLOC(jalloc*nedge*4*4,&grid->dfm);
*/
FCALLOC(nsnode, &grid->syn);
FCALLOC(nsnode, &grid->szn);
FCALLOC(nsnode, &grid->sa);
FCALLOC(nvnode, &grid->vxn);
FCALLOC(nvnode, &grid->vyn);
FCALLOC(nvnode, &grid->vzn);
FCALLOC(nvnode, &grid->va);
FCALLOC(nfnode, &grid->fxn);
FCALLOC(nfnode, &grid->fyn);
FCALLOC(nfnode, &grid->fzn);
FCALLOC(nfnode, &grid->fa);
FCALLOC(nedge, &grid->xn);
FCALLOC(nedge, &grid->yn);
FCALLOC(nedge, &grid->zn);
FCALLOC(nedge, &grid->rl);*/

line2649">2649: FCALLOC(nbface*15,&grid-&
line2650">2650: FCALLOC(nbface*15,&grid-&
line2651">2651: FCALLOC(nbface*15,&grid-&
/*
* FCALLOC(nnodes*4, &grid->phi);
* FCALLOC(nnodes, &grid->r11);
* FCALLOC(nnodes, &grid->r12);
* FCALLOC(nnodes, &grid->r13);
* FCALLOC(nnodes, &grid->r22);
* FCALLOC(nnodes, &grid->r23);
* FCALLOC(nnodes, &grid->r33);
*/
/*
* Allocate memory for viscous length scale if turbulent
*/
line2664">2664: if (grid->jvisc >
line2665">2665: FCALLOC(tnode, &grid->
line2666">2666: FCALLOC(nnodes, &grid->t
line2667">2667: FCALLOC(nnodes, &grid->
line2668">2668: FCALLOC(tnode, &grid->tu
line2669">2669: FCALLOC(nedge, &grid->
line2670">2670: FCALLOC(nedge, &grid->
line2671">2671:
/*
* Allocate memory for MG transfer
*/
/*
ICALLOC(mgzero*nsface, &grid->isford);
ICALLOC(mgzero*nvface, &grid->ivford);
ICALLOC(mgzero*nfface, &grid->ifford);
ICALLOC(mgzero*nnodes, &grid->nflag);
ICALLOC(mgzero*nnodes, &grid->nnext);
ICALLOC(mgzero*nnodes, &grid->nneigh);
ICALLOC(mgzero*ncell, &grid->ctag);
ICALLOC(mgzero*ncell, &grid->csearch);
ICALLOC(valloc*ncell*4, &grid->c2n);
ICALLOC(valloc*ncell*6, &grid->c2e);
grid->c2c = (int*)grid->dfp;
ICALLOC(ncell*4, &grid->c2c);
ICALLOC(nnodes, &grid->cenc);
if (grid->iup == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefup);
FCALLOC(mgzero*nnodes*3, &grid->rcoefup);
}
if (grid->idown == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefdn);
FCALLOC(mgzero*nnodes*3, &grid->rcoefdn);
}
FCALLOC(nnodes*4, &grid->ff);
FCALLOC(tnode, &grid->turbff);
*/
/*
* If using GMRES (nsrch>0) allocate memory
*/
/* NoEq = 0;
if (nsrch > 0)NoEq = 4*nnodes;
if (nsrch < 0)NoEq = nnodes;
FCALLOC(NoEq, &grid->AP);
FCALLOC(NoEq, &grid->Xgm);
FCALLOC(NoEq, &grid->temr);
FCALLOC((abs(nsrch)+1)*NoEq,&grid->Fgm);
*/
/*
* stuff to read in dave's grids
*/
/*
ICALLOC(nnbound, &grid->ncolorn);
ICALLOC(nnbound*100,&grid->countn);
ICALLOC(nvbound, &grid->ncolorv);
ICALLOC(nvbound*100,&grid->countv);
ICALLOC(nfbound, &grid->ncolorf);
ICALLOC(nfbound*100,&grid->countf);
*/
/*ICALLOC(nnbound, &grid->nntet);
ICALLOC(nnbound, &grid->nnpts);
ICALLOC(nvbound, &grid->nvtet);
ICALLOC(nvbound, &grid->nvpts);
ICALLOC(nfbound, &grid->nftet);
ICALLOC(nfbound, &grid->nfpts);
ICALLOC(nnfacet*4,&grid->f2ntn);
ICALLOC(nvfacet*4,&grid->f2ntv);
ICALLOC(nffacet*4,&grid->f2ntf);*/
line2731">2731: return line2732">2732

/*========================== WRITE_FINE_GRID ================================*/
/* */
/* Write memory locations and other information for the fine grid */
/* */
/*===========================================================================*/
line2739">2739: int write_fine_grid(GRID *grid) line2740">2740
line2741">2741: FILE *

/* open file for output */
/* call the output frame.out */

line2747">2747: if (!(output = fopen("frame.out","a"))) SETERRQ(PETSC_COMM_SELF,1,"can't open frame.out" line2748">2748: fprintf(output,"information for fine grid\n" line2749">2749: fprintf(output,"\n" line2750">2750: fprintf(output," address of fine grid = %p\n",(void*

line2752">2752: fprintf(output,"grid.nnodes = %d\n",grid->n
line2753">2753: fprintf(output,"grid.ncell = %d\n",grid->
line2754">2754: fprintf(output,"grid.nedge = %d\n",grid->
line2755">2755: fprintf(output,"grid.nsface = %d\n",grid->n
line2756">2756: fprintf(output,"grid.nvface = %d\n",grid->n
line2757">2757: fprintf(output,"grid.nfface = %d\n",grid->n
line2758">2758: fprintf(output,"grid.nsnode = %d\n",grid->n
line2759">2759: fprintf(output,"grid.nvnode = %d\n",grid->n
line2760">2760: fprintf(output,"grid.nfnode = %d\n",grid->n
/*
fprintf(output,"grid.eptr = %p\n",grid->eptr);
fprintf(output,"grid.isface = %p\n",grid->isface);
fprintf(output,"grid.ivface = %p\n",grid->ivface);
fprintf(output,"grid.ifface = %p\n",grid->ifface);
fprintf(output,"grid.isnode = %p\n",grid->isnode);
fprintf(output,"grid.ivnode = %p\n",grid->ivnode);
fprintf(output,"grid.ifnode = %p\n",grid->ifnode);
fprintf(output,"grid.c2n = %p\n",grid->c2n);
fprintf(output,"grid.c2e = %p\n",grid->c2e);
fprintf(output,"grid.xyz = %p\n",grid->xyz);
*/
/*fprintf(output,"grid.y = %p\n",grid->xyz);
fprintf(output,"grid.z = %p\n",grid->z);*/
/*
fprintf(output,"grid.area = %p\n",grid->area);
fprintf(output,"grid.qnode = %p\n",grid->qnode);
*/
/*
fprintf(output,"grid.gradx = %p\n",grid->gradx);
fprintf(output,"grid.grady = %p\n",grid->grady);
fprintf(output,"grid.gradz = %p\n",grid->gradz);
*/
/*
fprintf(output,"grid.cdt = %p\n",grid->cdt);
fprintf(output,"grid.sxn = %p\n",grid->sxn);
fprintf(output,"grid.syn = %p\n",grid->syn);
fprintf(output,"grid.szn = %p\n",grid->szn);
fprintf(output,"grid.vxn = %p\n",grid->vxn);
fprintf(output,"grid.vyn = %p\n",grid->vyn);
fprintf(output,"grid.vzn = %p\n",grid->vzn);
fprintf(output,"grid.fxn = %p\n",grid->fxn);
fprintf(output,"grid.fyn = %p\n",grid->fyn);
fprintf(output,"grid.fzn = %p\n",grid->fzn);
fprintf(output,"grid.xyzn = %p\n",grid->xyzn);
*/
/*fprintf(output,"grid.yn = %p\n",grid->yn);
fprintf(output,"grid.zn = %p\n",grid->zn);
fprintf(output,"grid.rl = %p\n",grid->rl);*/
line2800">2800: fclose(o
line2801">2801: return line2802">2802

line2804">2804: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
line2805">2805: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncle,int *counte) line2806">2806
line2807">2807: int ncolore = *nc
line2808">2808: int iedg = 0,ib = 0,ie = nedge,ta
line2809">2809: int i
line2810">2810: in
line2811">2811: ICALLOC(nnodes,&am
line2812">2812: while (ib <
line2813">2813: for (i = 0; i < nnodes; i++) tag[
line2814">2814: counte[ncolor
line2815">2815: for (i = ib; i < ie;
line2816">2816: n1 =
line2817">2817: n2 = e2n[i+
line2818">2818: tagcount = tag[n1]+t
/* If tagcount = 0 then this edge belongs in this color */
line2820">2820: if (!tagc
line2821">2821: tag[n1]
line2822">2822: tag[n2]
line2823">2823: e2n[i] = e2n
line2824">2824: e2n[i+nedge] = e2n[iedg+
line2825">2825: e2n[iedg]
line2826">2826: e2n[iedg+nedge
line2827">2827: n1 = ep
line2828">2828: eperm[i] = eperm
line2829">2829: eperm[iedg]
line2830">2830:
line2831">2831: counte[ncolor
line2832">2832:
line2833">2833: line2834">2834: ib
line2835">2835: nco
line2836">2836:
line2837">2837: *ncle = n
line2838">2838: return line2839">2839
line2840">2840: #endif
line2841">2841: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
line2842">2842: int EventCountersBegin(int *gen_start,PetscScalar *time_start_counters) line2843">2843
line2845">2845: if ((*gen_start = start_counters(event0,event1)) < 0) SETERRQ(PETSC_COMM_SELF,1,"Error in start_counters\n" line2846">2846: PetscTime(&time_start_cou
line2847">2847: return line2848">2848

line2850">2850: int EventCountersEnd(int gen_start,PetscScalar time_start_counters) line2851">2851
line2852">2852: int gen_rea
line2853">2853: PetscScalar time_read_co
line2854">2854: long long _counter0,_co

line2856">2856: if ((gen_read = read_counters(event0,&_counter0,event1,&_counter1)) < 0) SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter\n" line2857">2857: PetscTime(&&time_read_cou
line2858">2858: if (gen_read != gen_start) SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...\n" line2859">2859: counter0 += _co
line2860">2860: counter1 += _co
line2861">2861: time_counters += time_read_counters-time_start_co
line2862">2862: return line2863">2863
line2864">2864: #endif