Actual source code: ex66.c

  1: static char help[] = "Tests MATH2OPUS\n\n";

  3: #include <petscmat.h>
  4: #include <petscsf.h>

  6: static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
  7: {
  8:     PetscInt  d;
  9:     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
 10:     PetscReal dist, diff = 0.0;

 12:     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
 13:     dist = PetscSqrtReal(diff);
 14:     return PetscExpReal(-dist / clength);
 15: }

 17: static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
 18: {
 19:     PetscInt  d;
 20:     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
 21:     PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0;

 23:     for (d = 0; d < sdim; d++) { nx += x[d]*x[d]; }
 24:     for (d = 0; d < sdim; d++) { ny += y[d]*y[d]; }
 25:     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
 26:     dist = PetscSqrtReal(diff);
 27:     return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.;
 28: }

 30: int main(int argc,char **argv)
 31: {
 32:   Mat            A,B,C,D;
 33:   Vec            v,x,y,Ax,Ay,Bx,By;
 34:   PetscRandom    r;
 35:   PetscLayout    map;
 36:   PetscScalar    *Adata = NULL, *Cdata = NULL, scale = 1.0;
 37:   PetscReal      *coords,nA,nD,nB,err,nX,norms[3];
 38:   PetscInt       N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, nt, ntrials = 2;
 39:   PetscMPIInt    size,rank;
 40:   PetscBool      testlayout = PETSC_FALSE,flg,symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE;
 41:   PetscBool      checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE, flgglob;
 42:   PetscBool      testtrans, testnorm, randommat = PETSC_TRUE, testorthog, testcompress;
 43:   void           (*approxnormfunc)(void);
 44:   void           (*Anormfunc)(void);

 47: #if defined(PETSC_HAVE_MPI_INIT_THREAD)
 48:   PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE;
 49: #endif
 50:   PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr;
 51:   PetscOptionsGetInt(NULL,NULL,"-ng",&N,&flgglob);
 52:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
 53:   PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
 54:   PetscOptionsGetInt(NULL,NULL,"-dim",&dim,NULL);
 55:   PetscOptionsGetInt(NULL,NULL,"-lda",&lda,NULL);
 56:   PetscOptionsGetInt(NULL,NULL,"-ldc",&ldc,NULL);
 57:   PetscOptionsGetInt(NULL,NULL,"-matmattrials",&ntrials,NULL);
 58:   PetscOptionsGetBool(NULL,NULL,"-randommat",&randommat,NULL);
 59:   if (!flgglob) { PetscOptionsGetBool(NULL,NULL,"-testlayout",&testlayout,NULL); }
 60:   PetscOptionsGetBool(NULL,NULL,"-Asymm",&Asymm,NULL);
 61:   PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);
 62:   PetscOptionsGetBool(NULL,NULL,"-kernel",&kernel,NULL);
 63:   PetscOptionsGetBool(NULL,NULL,"-checkexpl",&checkexpl,NULL);
 64:   PetscOptionsGetBool(NULL,NULL,"-agpu",&agpu,NULL);
 65:   PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);
 66:   PetscOptionsGetBool(NULL,NULL,"-cgpu",&cgpu,NULL);
 67:   PetscOptionsGetScalar(NULL,NULL,"-scale",&scale,NULL);
 68:   if (!Asymm) symm = PETSC_FALSE;

 70:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 71:   /* MatMultTranspose for nonsymmetric matrices in parallel not implemented */
 72:   testtrans = (PetscBool)(size == 1 || symm);
 73:   testnorm = (PetscBool)(size == 1 || symm);
 74:   testorthog = (PetscBool)(size == 1 || symm);
 75:   testcompress = (PetscBool)(size == 1 || symm);

 77:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 78:   PetscLayoutCreate(PETSC_COMM_WORLD,&map);
 79:   if (testlayout) {
 80:     if (rank%2) n = PetscMax(2*n-5*rank,0);
 81:     else n = 2*n+rank;
 82:   }
 83:   if (!flgglob) {
 84:     PetscLayoutSetLocalSize(map,n);
 85:     PetscLayoutSetUp(map);
 86:     PetscLayoutGetSize(map,&N);
 87:   } else {
 88:     PetscLayoutSetSize(map,N);
 89:     PetscLayoutSetUp(map);
 90:     PetscLayoutGetLocalSize(map,&n);
 91:   }
 92:   PetscLayoutDestroy(&map);

 94:   if (lda) {
 95:     PetscMalloc1(N*(n+lda),&Adata);
 96:   }
 97:   MatCreateDense(PETSC_COMM_WORLD,n,n,N,N,Adata,&A);
 98:   MatDenseSetLDA(A,n+lda);

100:   /* Create random points; these are replicated in order to populate a dense matrix and to compare sequential and dense runs
101:      The constructor for MATH2OPUS can take as input the distributed coordinates and replicates them internally in case
102:      a kernel construction is requested */
103:   PetscRandomCreate(PETSC_COMM_WORLD,&r);
104:   PetscRandomSetFromOptions(r);
105:   PetscRandomSetSeed(r,123456);
106:   PetscRandomSeed(r);
107:   PetscMalloc1(N*dim,&coords);
108:   PetscRandomGetValuesReal(r,N*dim,coords);
109:   PetscRandomDestroy(&r);

111:   if (kernel || !randommat) {
112:     MatH2OpusKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm;
113:     PetscInt        ist,ien;

115:     MatGetOwnershipRange(A,&ist,&ien);
116:     for (i = ist; i < ien; i++) {
117:       for (j = 0; j < N; j++) {
118:         MatSetValue(A,i,j,(*k)(dim,coords + i*dim,coords + j*dim,NULL),INSERT_VALUES);
119:       }
120:     }
121:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
122:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
123:     if (kernel) {
124:       MatCreateH2OpusFromKernel(PETSC_COMM_WORLD,n,n,N,N,dim,coords + ist*dim,PETSC_TRUE,k,NULL,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
125:     } else {
126:       MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
127:     }
128:   } else {
129:     PetscInt ist;

131:     MatGetOwnershipRange(A,&ist,NULL);
132:     MatSetRandom(A,NULL);
133:     if (Asymm) {
134:       MatTranspose(A,MAT_INITIAL_MATRIX,&B);
135:       MatAXPY(A,1.0,B,SAME_NONZERO_PATTERN);
136:       MatDestroy(&B);
137:       MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
138:     }
139:     MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
140:   }
141:   PetscFree(coords);
142:   if (agpu) {
143:     MatConvert(A,MATDENSECUDA,MAT_INPLACE_MATRIX,&A);
144:   }
145:   MatViewFromOptions(A,NULL,"-A_view");

147:   MatSetOption(B,MAT_SYMMETRIC,symm);

149:   /* assemble the H-matrix */
150:   MatBindToCPU(B,(PetscBool)!bgpu);
151:   MatSetFromOptions(B);
152:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
153:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
154:   MatViewFromOptions(B,NULL,"-B_view");

156:   /* Test MatScale */
157:   MatScale(A,scale);
158:   MatScale(B,scale);

160:   /* Test MatMult */
161:   MatCreateVecs(A,&Ax,&Ay);
162:   MatCreateVecs(B,&Bx,&By);
163:   VecSetRandom(Ax,NULL);
164:   VecCopy(Ax,Bx);
165:   MatMult(A,Ax,Ay);
166:   MatMult(B,Bx,By);
167:   VecViewFromOptions(Ay,NULL,"-mult_vec_view");
168:   VecViewFromOptions(By,NULL,"-mult_vec_view");
169:   VecNorm(Ay,NORM_INFINITY,&nX);
170:   VecAXPY(Ay,-1.0,By);
171:   VecViewFromOptions(Ay,NULL,"-mult_vec_view");
172:   VecNorm(Ay,NORM_INFINITY,&err);
173:   PetscPrintf(PETSC_COMM_WORLD,"MatMult err %g\n",err/nX);
174:   VecScale(By,-1.0);
175:   MatMultAdd(B,Bx,By,By);
176:   VecNorm(By,NORM_INFINITY,&err);
177:   VecViewFromOptions(By,NULL,"-mult_vec_view");
178:   if (err > PETSC_SMALL) {
179:     PetscPrintf(PETSC_COMM_WORLD,"MatMultAdd err %g\n",err);
180:   }

182:   /* Test MatNorm */
183:   MatNorm(A,NORM_INFINITY,&norms[0]);
184:   MatNorm(A,NORM_1,&norms[1]);
185:   norms[2] = -1.; /* NORM_2 not supported */
186:   PetscPrintf(PETSC_COMM_WORLD,"A Matrix norms:        infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
187:   MatGetOperation(A,MATOP_NORM,&Anormfunc);
188:   MatGetOperation(B,MATOP_NORM,&approxnormfunc);
189:   MatSetOperation(A,MATOP_NORM,approxnormfunc);
190:   MatNorm(A,NORM_INFINITY,&norms[0]);
191:   MatNorm(A,NORM_1,&norms[1]);
192:   MatNorm(A,NORM_2,&norms[2]);
193:   PetscPrintf(PETSC_COMM_WORLD,"A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
194:   if (testnorm) {
195:     MatNorm(B,NORM_INFINITY,&norms[0]);
196:     MatNorm(B,NORM_1,&norms[1]);
197:     MatNorm(B,NORM_2,&norms[2]);
198:   } else {
199:     norms[0] = -1.;
200:     norms[1] = -1.;
201:     norms[2] = -1.;
202:   }
203:   PetscPrintf(PETSC_COMM_WORLD,"B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
204:   MatSetOperation(A,MATOP_NORM,Anormfunc);

206:   /* Test MatDuplicate */
207:   MatDuplicate(B,MAT_COPY_VALUES,&D);
208:   MatSetOption(D,MAT_SYMMETRIC,symm);
209:   MatMultEqual(B,D,10,&flg);
210:   if (!flg) {
211:     PetscPrintf(PETSC_COMM_WORLD,"MatMult error after MatDuplicate\n");
212:   }
213:   if (testtrans) {
214:     MatMultTransposeEqual(B,D,10,&flg);
215:     if (!flg) {
216:       PetscPrintf(PETSC_COMM_WORLD,"MatMultTranpose error after MatDuplicate\n");
217:     }
218:   }
219:   MatDestroy(&D);

221:   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
222:     VecSetRandom(Ay,NULL);
223:     VecCopy(Ay,By);
224:     MatMultTranspose(A,Ay,Ax);
225:     MatMultTranspose(B,By,Bx);
226:     VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");
227:     VecViewFromOptions(Bx,NULL,"-multtrans_vec_view");
228:     VecNorm(Ax,NORM_INFINITY,&nX);
229:     VecAXPY(Ax,-1.0,Bx);
230:     VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");
231:     VecNorm(Ax,NORM_INFINITY,&err);
232:     PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose err %g\n",err/nX);
233:     VecScale(Bx,-1.0);
234:     MatMultTransposeAdd(B,By,Bx,Bx);
235:     VecNorm(Bx,NORM_INFINITY,&err);
236:     if (err > PETSC_SMALL) {
237:       PetscPrintf(PETSC_COMM_WORLD,"MatMultTransposeAdd err %g\n",err);
238:     }
239:   }
240:   VecDestroy(&Ax);
241:   VecDestroy(&Ay);
242:   VecDestroy(&Bx);
243:   VecDestroy(&By);

245:   /* Test MatMatMult */
246:   if (ldc) {
247:     PetscMalloc1(nrhs*(n+ldc),&Cdata);
248:   }
249:   MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,nrhs,Cdata,&C);
250:   MatDenseSetLDA(C,n+ldc);
251:   MatSetRandom(C,NULL);
252:   if (cgpu) {
253:     MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);
254:   }
255:   for (nt = 0; nt < ntrials; nt++) {
256:     MatMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);
257:     MatViewFromOptions(D,NULL,"-bc_view");
258:     PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");
259:     if (flg) {
260:       MatCreateVecs(B,&x,&y);
261:       MatCreateVecs(D,NULL,&v);
262:       for (i = 0; i < nrhs; i++) {
263:         MatGetColumnVector(D,v,i);
264:         MatGetColumnVector(C,x,i);
265:         MatMult(B,x,y);
266:         VecAXPY(y,-1.0,v);
267:         VecNorm(y,NORM_INFINITY,&err);
268:         if (err > PETSC_SMALL) { PetscPrintf(PETSC_COMM_WORLD,"MatMat err %D %g\n",i,err); }
269:       }
270:       VecDestroy(&y);
271:       VecDestroy(&x);
272:       VecDestroy(&v);
273:     }
274:   }
275:   MatDestroy(&D);

277:   /* Test MatTransposeMatMult */
278:   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
279:     for (nt = 0; nt < ntrials; nt++) {
280:       MatTransposeMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);
281:       MatViewFromOptions(D,NULL,"-btc_view");
282:       PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");
283:       if (flg) {
284:         MatCreateVecs(B,&y,&x);
285:         MatCreateVecs(D,NULL,&v);
286:         for (i = 0; i < nrhs; i++) {
287:           MatGetColumnVector(D,v,i);
288:           MatGetColumnVector(C,x,i);
289:           MatMultTranspose(B,x,y);
290:           VecAXPY(y,-1.0,v);
291:           VecNorm(y,NORM_INFINITY,&err);
292:           if (err > PETSC_SMALL) { PetscPrintf(PETSC_COMM_WORLD,"MatTransMat err %D %g\n",i,err); }
293:         }
294:         VecDestroy(&y);
295:         VecDestroy(&x);
296:         VecDestroy(&v);
297:       }
298:     }
299:     MatDestroy(&D);
300:   }

302:   if (testorthog) {
303:     MatDuplicate(B,MAT_COPY_VALUES,&D);
304:     MatSetOption(D,MAT_SYMMETRIC,symm);
305:     MatH2OpusOrthogonalize(D);
306:     MatMultEqual(B,D,10,&flg);
307:     if (!flg) {
308:       PetscPrintf(PETSC_COMM_WORLD,"MatMult error after basis ortogonalization\n");
309:     }
310:     MatDestroy(&D);
311:   }

313:   if (testcompress) {
314:     MatDuplicate(B,MAT_COPY_VALUES,&D);
315:     MatSetOption(D,MAT_SYMMETRIC,symm);
316:     MatH2OpusCompress(D,PETSC_SMALL);
317:     MatDestroy(&D);
318:   }

320:   /* check explicit operator */
321:   if (checkexpl) {
322:     Mat Be, Bet;

324:     MatComputeOperator(B,MATDENSE,&D);
325:     MatDuplicate(D,MAT_COPY_VALUES,&Be);
326:     MatNorm(D,NORM_FROBENIUS,&nB);
327:     MatViewFromOptions(D,NULL,"-expl_view");
328:     MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);
329:     MatViewFromOptions(D,NULL,"-diff_view");
330:     MatNorm(D,NORM_FROBENIUS,&nD);
331:     MatNorm(A,NORM_FROBENIUS,&nA);
332:     PetscPrintf(PETSC_COMM_WORLD,"Approximation error %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);
333:     MatDestroy(&D);

335:     if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
336:       MatTranspose(A,MAT_INPLACE_MATRIX,&A);
337:       MatComputeOperatorTranspose(B,MATDENSE,&D);
338:       MatDuplicate(D,MAT_COPY_VALUES,&Bet);
339:       MatNorm(D,NORM_FROBENIUS,&nB);
340:       MatViewFromOptions(D,NULL,"-expl_trans_view");
341:       MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);
342:       MatViewFromOptions(D,NULL,"-diff_trans_view");
343:       MatNorm(D,NORM_FROBENIUS,&nD);
344:       MatNorm(A,NORM_FROBENIUS,&nA);
345:       PetscPrintf(PETSC_COMM_WORLD,"Approximation error transpose %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);
346:       MatDestroy(&D);

348:       MatTranspose(Bet,MAT_INPLACE_MATRIX,&Bet);
349:       MatAXPY(Be,-1.0,Bet,SAME_NONZERO_PATTERN);
350:       MatViewFromOptions(Be,NULL,"-diff_expl_view");
351:       MatNorm(Be,NORM_FROBENIUS,&nB);
352:       PetscPrintf(PETSC_COMM_WORLD,"Approximation error B - (B^T)^T %g\n",nB);
353:       MatDestroy(&Be);
354:       MatDestroy(&Bet);
355:     }
356:   }
357:   MatDestroy(&A);
358:   MatDestroy(&B);
359:   MatDestroy(&C);
360:   PetscFree(Cdata);
361:   PetscFree(Adata);
362:   PetscFinalize();
363:   return ierr;
364: }

366: /*TEST

368:    build:
369:      requires: h2opus

371: #tests from kernel
372:    test:
373:      requires: h2opus
374:      nsize: 1
375:      suffix: 1
376:      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0

378:    test:
379:      requires: h2opus
380:      nsize: 1
381:      suffix: 1_ld
382:      output_file: output/ex66_1.out
383:      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 0

385:    test:
386:      requires: h2opus cuda
387:      nsize: 1
388:      suffix: 1_cuda
389:      output_file: output/ex66_1.out
390:      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1

392:    test:
393:      requires: h2opus cuda
394:      nsize: 1
395:      suffix: 1_cuda_ld
396:      output_file: output/ex66_1.out
397:      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 1

399:    test:
400:      requires: h2opus
401:      nsize: {{2 3}}
402:      suffix: 1_par
403:      args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0

405:    test:
406:      requires: h2opus cuda
407:      nsize: {{2 3}}
408:      suffix: 1_par_cuda
409:      args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}}
410:      output_file: output/ex66_1_par.out

412: #tests from matrix sampling (parallel or unsymmetric not supported)
413:    test:
414:      requires: h2opus
415:      nsize: 1
416:      suffix: 2
417:      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0

419:    test:
420:      requires: h2opus cuda
421:      nsize: 1
422:      suffix: 2_cuda
423:      output_file: output/ex66_2.out
424:      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}}

426: #tests view operation
427:    test:
428:      requires: h2opus !cuda
429:      filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
430:      nsize: {{1 2 3}}
431:      suffix: view
432:      args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2

434:    test:
435:      requires: h2opus cuda
436:      filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
437:      nsize: {{1 2 3}}
438:      suffix: view_cuda
439:      args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -bgpu -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2

441: TEST*/