Actual source code: dvec2.c
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petsc/private/kernels/petscaxpy.h>
9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
11: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
12: {
13: PetscErrorCode ierr;
14: PetscInt i,nv_rem,n = xin->map->n;
15: PetscScalar sum0,sum1,sum2,sum3;
16: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
17: Vec *yy;
20: sum0 = 0.0;
21: sum1 = 0.0;
22: sum2 = 0.0;
24: i = nv;
25: nv_rem = nv&0x3;
26: yy = (Vec*)yin;
27: VecGetArrayRead(xin,&x);
29: switch (nv_rem) {
30: case 3:
31: VecGetArrayRead(yy[0],&yy0);
32: VecGetArrayRead(yy[1],&yy1);
33: VecGetArrayRead(yy[2],&yy2);
34: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
35: VecRestoreArrayRead(yy[0],&yy0);
36: VecRestoreArrayRead(yy[1],&yy1);
37: VecRestoreArrayRead(yy[2],&yy2);
38: z[0] = sum0;
39: z[1] = sum1;
40: z[2] = sum2;
41: break;
42: case 2:
43: VecGetArrayRead(yy[0],&yy0);
44: VecGetArrayRead(yy[1],&yy1);
45: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
46: VecRestoreArrayRead(yy[0],&yy0);
47: VecRestoreArrayRead(yy[1],&yy1);
48: z[0] = sum0;
49: z[1] = sum1;
50: break;
51: case 1:
52: VecGetArrayRead(yy[0],&yy0);
53: fortranmdot1_(x,yy0,&n,&sum0);
54: VecRestoreArrayRead(yy[0],&yy0);
55: z[0] = sum0;
56: break;
57: case 0:
58: break;
59: }
60: z += nv_rem;
61: i -= nv_rem;
62: yy += nv_rem;
64: while (i >0) {
65: sum0 = 0.;
66: sum1 = 0.;
67: sum2 = 0.;
68: sum3 = 0.;
69: VecGetArrayRead(yy[0],&yy0);
70: VecGetArrayRead(yy[1],&yy1);
71: VecGetArrayRead(yy[2],&yy2);
72: VecGetArrayRead(yy[3],&yy3);
73: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
74: VecRestoreArrayRead(yy[0],&yy0);
75: VecRestoreArrayRead(yy[1],&yy1);
76: VecRestoreArrayRead(yy[2],&yy2);
77: VecRestoreArrayRead(yy[3],&yy3);
78: yy += 4;
79: z[0] = sum0;
80: z[1] = sum1;
81: z[2] = sum2;
82: z[3] = sum3;
83: z += 4;
84: i -= 4;
85: }
86: VecRestoreArrayRead(xin,&x);
87: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
88: return(0);
89: }
91: #else
92: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
93: {
94: PetscErrorCode ierr;
95: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
96: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
97: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
98: Vec *yy;
101: sum0 = 0.;
102: sum1 = 0.;
103: sum2 = 0.;
105: i = nv;
106: nv_rem = nv&0x3;
107: yy = (Vec*)yin;
108: j = n;
109: VecGetArrayRead(xin,&xbase);
110: x = xbase;
112: switch (nv_rem) {
113: case 3:
114: VecGetArrayRead(yy[0],&yy0);
115: VecGetArrayRead(yy[1],&yy1);
116: VecGetArrayRead(yy[2],&yy2);
117: switch (j_rem=j&0x3) {
118: case 3:
119: x2 = x[2];
120: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
121: sum2 += x2*PetscConj(yy2[2]);
122: case 2:
123: x1 = x[1];
124: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
125: sum2 += x1*PetscConj(yy2[1]);
126: case 1:
127: x0 = x[0];
128: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
129: sum2 += x0*PetscConj(yy2[0]);
130: case 0:
131: x += j_rem;
132: yy0 += j_rem;
133: yy1 += j_rem;
134: yy2 += j_rem;
135: j -= j_rem;
136: break;
137: }
138: while (j>0) {
139: x0 = x[0];
140: x1 = x[1];
141: x2 = x[2];
142: x3 = x[3];
143: x += 4;
145: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
146: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
147: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
148: j -= 4;
149: }
150: z[0] = sum0;
151: z[1] = sum1;
152: z[2] = sum2;
153: VecRestoreArrayRead(yy[0],&yy0);
154: VecRestoreArrayRead(yy[1],&yy1);
155: VecRestoreArrayRead(yy[2],&yy2);
156: break;
157: case 2:
158: VecGetArrayRead(yy[0],&yy0);
159: VecGetArrayRead(yy[1],&yy1);
160: switch (j_rem=j&0x3) {
161: case 3:
162: x2 = x[2];
163: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
164: case 2:
165: x1 = x[1];
166: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
167: case 1:
168: x0 = x[0];
169: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
170: case 0:
171: x += j_rem;
172: yy0 += j_rem;
173: yy1 += j_rem;
174: j -= j_rem;
175: break;
176: }
177: while (j>0) {
178: x0 = x[0];
179: x1 = x[1];
180: x2 = x[2];
181: x3 = x[3];
182: x += 4;
184: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
185: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
186: j -= 4;
187: }
188: z[0] = sum0;
189: z[1] = sum1;
191: VecRestoreArrayRead(yy[0],&yy0);
192: VecRestoreArrayRead(yy[1],&yy1);
193: break;
194: case 1:
195: VecGetArrayRead(yy[0],&yy0);
196: switch (j_rem=j&0x3) {
197: case 3:
198: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
199: case 2:
200: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
201: case 1:
202: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
203: case 0:
204: x += j_rem;
205: yy0 += j_rem;
206: j -= j_rem;
207: break;
208: }
209: while (j>0) {
210: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
211: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
212: yy0 +=4;
213: j -= 4; x+=4;
214: }
215: z[0] = sum0;
217: VecRestoreArrayRead(yy[0],&yy0);
218: break;
219: case 0:
220: break;
221: }
222: z += nv_rem;
223: i -= nv_rem;
224: yy += nv_rem;
226: while (i >0) {
227: sum0 = 0.;
228: sum1 = 0.;
229: sum2 = 0.;
230: sum3 = 0.;
231: VecGetArrayRead(yy[0],&yy0);
232: VecGetArrayRead(yy[1],&yy1);
233: VecGetArrayRead(yy[2],&yy2);
234: VecGetArrayRead(yy[3],&yy3);
236: j = n;
237: x = xbase;
238: switch (j_rem=j&0x3) {
239: case 3:
240: x2 = x[2];
241: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
242: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
243: case 2:
244: x1 = x[1];
245: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
246: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
247: case 1:
248: x0 = x[0];
249: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
250: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
251: case 0:
252: x += j_rem;
253: yy0 += j_rem;
254: yy1 += j_rem;
255: yy2 += j_rem;
256: yy3 += j_rem;
257: j -= j_rem;
258: break;
259: }
260: while (j>0) {
261: x0 = x[0];
262: x1 = x[1];
263: x2 = x[2];
264: x3 = x[3];
265: x += 4;
267: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
268: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
269: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
270: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
271: j -= 4;
272: }
273: z[0] = sum0;
274: z[1] = sum1;
275: z[2] = sum2;
276: z[3] = sum3;
277: z += 4;
278: i -= 4;
279: VecRestoreArrayRead(yy[0],&yy0);
280: VecRestoreArrayRead(yy[1],&yy1);
281: VecRestoreArrayRead(yy[2],&yy2);
282: VecRestoreArrayRead(yy[3],&yy3);
283: yy += 4;
284: }
285: VecRestoreArrayRead(xin,&xbase);
286: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
287: return(0);
288: }
289: #endif
291: /* ----------------------------------------------------------------------------*/
292: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
293: {
294: PetscErrorCode ierr;
295: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
296: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
297: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
298: Vec *yy;
301: sum0 = 0.;
302: sum1 = 0.;
303: sum2 = 0.;
305: i = nv;
306: nv_rem = nv&0x3;
307: yy = (Vec*)yin;
308: j = n;
309: VecGetArrayRead(xin,&xbase);
310: x = xbase;
312: switch (nv_rem) {
313: case 3:
314: VecGetArrayRead(yy[0],&yy0);
315: VecGetArrayRead(yy[1],&yy1);
316: VecGetArrayRead(yy[2],&yy2);
317: switch (j_rem=j&0x3) {
318: case 3:
319: x2 = x[2];
320: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
321: sum2 += x2*yy2[2];
322: case 2:
323: x1 = x[1];
324: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
325: sum2 += x1*yy2[1];
326: case 1:
327: x0 = x[0];
328: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
329: sum2 += x0*yy2[0];
330: case 0:
331: x += j_rem;
332: yy0 += j_rem;
333: yy1 += j_rem;
334: yy2 += j_rem;
335: j -= j_rem;
336: break;
337: }
338: while (j>0) {
339: x0 = x[0];
340: x1 = x[1];
341: x2 = x[2];
342: x3 = x[3];
343: x += 4;
345: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
346: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
347: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
348: j -= 4;
349: }
350: z[0] = sum0;
351: z[1] = sum1;
352: z[2] = sum2;
353: VecRestoreArrayRead(yy[0],&yy0);
354: VecRestoreArrayRead(yy[1],&yy1);
355: VecRestoreArrayRead(yy[2],&yy2);
356: break;
357: case 2:
358: VecGetArrayRead(yy[0],&yy0);
359: VecGetArrayRead(yy[1],&yy1);
360: switch (j_rem=j&0x3) {
361: case 3:
362: x2 = x[2];
363: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
364: case 2:
365: x1 = x[1];
366: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
367: case 1:
368: x0 = x[0];
369: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
370: case 0:
371: x += j_rem;
372: yy0 += j_rem;
373: yy1 += j_rem;
374: j -= j_rem;
375: break;
376: }
377: while (j>0) {
378: x0 = x[0];
379: x1 = x[1];
380: x2 = x[2];
381: x3 = x[3];
382: x += 4;
384: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
385: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
386: j -= 4;
387: }
388: z[0] = sum0;
389: z[1] = sum1;
391: VecRestoreArrayRead(yy[0],&yy0);
392: VecRestoreArrayRead(yy[1],&yy1);
393: break;
394: case 1:
395: VecGetArrayRead(yy[0],&yy0);
396: switch (j_rem=j&0x3) {
397: case 3:
398: x2 = x[2]; sum0 += x2*yy0[2];
399: case 2:
400: x1 = x[1]; sum0 += x1*yy0[1];
401: case 1:
402: x0 = x[0]; sum0 += x0*yy0[0];
403: case 0:
404: x += j_rem;
405: yy0 += j_rem;
406: j -= j_rem;
407: break;
408: }
409: while (j>0) {
410: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
411: j -= 4; x+=4;
412: }
413: z[0] = sum0;
415: VecRestoreArrayRead(yy[0],&yy0);
416: break;
417: case 0:
418: break;
419: }
420: z += nv_rem;
421: i -= nv_rem;
422: yy += nv_rem;
424: while (i >0) {
425: sum0 = 0.;
426: sum1 = 0.;
427: sum2 = 0.;
428: sum3 = 0.;
429: VecGetArrayRead(yy[0],&yy0);
430: VecGetArrayRead(yy[1],&yy1);
431: VecGetArrayRead(yy[2],&yy2);
432: VecGetArrayRead(yy[3],&yy3);
433: x = xbase;
435: j = n;
436: switch (j_rem=j&0x3) {
437: case 3:
438: x2 = x[2];
439: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
440: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
441: case 2:
442: x1 = x[1];
443: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
444: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
445: case 1:
446: x0 = x[0];
447: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
448: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
449: case 0:
450: x += j_rem;
451: yy0 += j_rem;
452: yy1 += j_rem;
453: yy2 += j_rem;
454: yy3 += j_rem;
455: j -= j_rem;
456: break;
457: }
458: while (j>0) {
459: x0 = x[0];
460: x1 = x[1];
461: x2 = x[2];
462: x3 = x[3];
463: x += 4;
465: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
466: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
467: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
468: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
469: j -= 4;
470: }
471: z[0] = sum0;
472: z[1] = sum1;
473: z[2] = sum2;
474: z[3] = sum3;
475: z += 4;
476: i -= 4;
477: VecRestoreArrayRead(yy[0],&yy0);
478: VecRestoreArrayRead(yy[1],&yy1);
479: VecRestoreArrayRead(yy[2],&yy2);
480: VecRestoreArrayRead(yy[3],&yy3);
481: yy += 4;
482: }
483: VecRestoreArrayRead(xin,&xbase);
484: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
485: return(0);
486: }
488: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
489: {
490: PetscInt i,j=0,n = xin->map->n;
491: PetscReal max,tmp;
492: const PetscScalar *xx;
493: PetscErrorCode ierr;
496: VecGetArrayRead(xin,&xx);
497: if (!n) {
498: max = PETSC_MIN_REAL;
499: j = -1;
500: } else {
501: max = PetscRealPart(*xx++); j = 0;
502: for (i=1; i<n; i++) {
503: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
504: }
505: }
506: *z = max;
507: if (idx) *idx = j;
508: VecRestoreArrayRead(xin,&xx);
509: return(0);
510: }
512: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
513: {
514: PetscInt i,j=0,n = xin->map->n;
515: PetscReal min,tmp;
516: const PetscScalar *xx;
517: PetscErrorCode ierr;
520: VecGetArrayRead(xin,&xx);
521: if (!n) {
522: min = PETSC_MAX_REAL;
523: j = -1;
524: } else {
525: min = PetscRealPart(*xx++); j = 0;
526: for (i=1; i<n; i++) {
527: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
528: }
529: }
530: *z = min;
531: if (idx) *idx = j;
532: VecRestoreArrayRead(xin,&xx);
533: return(0);
534: }
536: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
537: {
538: PetscInt i,n = xin->map->n;
539: PetscScalar *xx;
543: VecGetArrayWrite(xin,&xx);
544: if (alpha == (PetscScalar)0.0) {
545: PetscArrayzero(xx,n);
546: } else {
547: for (i=0; i<n; i++) xx[i] = alpha;
548: }
549: VecRestoreArrayWrite(xin,&xx);
550: return(0);
551: }
553: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
554: {
555: PetscErrorCode ierr;
556: PetscInt n = xin->map->n,j,j_rem;
557: const PetscScalar *yy0,*yy1,*yy2,*yy3;
558: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
560: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
561: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
562: #endif
565: PetscLogFlops(nv*2.0*n);
566: VecGetArray(xin,&xx);
567: switch (j_rem=nv&0x3) {
568: case 3:
569: VecGetArrayRead(y[0],&yy0);
570: VecGetArrayRead(y[1],&yy1);
571: VecGetArrayRead(y[2],&yy2);
572: alpha0 = alpha[0];
573: alpha1 = alpha[1];
574: alpha2 = alpha[2];
575: alpha += 3;
576: PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
577: VecRestoreArrayRead(y[0],&yy0);
578: VecRestoreArrayRead(y[1],&yy1);
579: VecRestoreArrayRead(y[2],&yy2);
580: y += 3;
581: break;
582: case 2:
583: VecGetArrayRead(y[0],&yy0);
584: VecGetArrayRead(y[1],&yy1);
585: alpha0 = alpha[0];
586: alpha1 = alpha[1];
587: alpha +=2;
588: PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
589: VecRestoreArrayRead(y[0],&yy0);
590: VecRestoreArrayRead(y[1],&yy1);
591: y +=2;
592: break;
593: case 1:
594: VecGetArrayRead(y[0],&yy0);
595: alpha0 = *alpha++;
596: PetscKernelAXPY(xx,alpha0,yy0,n);
597: VecRestoreArrayRead(y[0],&yy0);
598: y +=1;
599: break;
600: }
601: for (j=j_rem; j<nv; j+=4) {
602: VecGetArrayRead(y[0],&yy0);
603: VecGetArrayRead(y[1],&yy1);
604: VecGetArrayRead(y[2],&yy2);
605: VecGetArrayRead(y[3],&yy3);
606: alpha0 = alpha[0];
607: alpha1 = alpha[1];
608: alpha2 = alpha[2];
609: alpha3 = alpha[3];
610: alpha += 4;
612: PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
613: VecRestoreArrayRead(y[0],&yy0);
614: VecRestoreArrayRead(y[1],&yy1);
615: VecRestoreArrayRead(y[2],&yy2);
616: VecRestoreArrayRead(y[3],&yy3);
617: y += 4;
618: }
619: VecRestoreArray(xin,&xx);
620: return(0);
621: }
623: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
625: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
626: {
627: PetscErrorCode ierr;
628: PetscInt n = yin->map->n;
629: PetscScalar *yy;
630: const PetscScalar *xx;
633: if (alpha == (PetscScalar)0.0) {
634: VecCopy(xin,yin);
635: } else if (alpha == (PetscScalar)1.0) {
636: VecAXPY_Seq(yin,alpha,xin);
637: } else if (alpha == (PetscScalar)-1.0) {
638: PetscInt i;
639: VecGetArrayRead(xin,&xx);
640: VecGetArray(yin,&yy);
642: for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];
644: VecRestoreArrayRead(xin,&xx);
645: VecRestoreArray(yin,&yy);
646: PetscLogFlops(1.0*n);
647: } else {
648: VecGetArrayRead(xin,&xx);
649: VecGetArray(yin,&yy);
650: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
651: {
652: PetscScalar oalpha = alpha;
653: fortranaypx_(&n,&oalpha,xx,yy);
654: }
655: #else
656: {
657: PetscInt i;
659: for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
660: }
661: #endif
662: VecRestoreArrayRead(xin,&xx);
663: VecRestoreArray(yin,&yy);
664: PetscLogFlops(2.0*n);
665: }
666: return(0);
667: }
669: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
670: /*
671: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
672: to be slower than a regular C loop. Hence,we do not include it.
673: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
674: */
676: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
677: {
678: PetscErrorCode ierr;
679: PetscInt i,n = win->map->n;
680: PetscScalar *ww;
681: const PetscScalar *yy,*xx;
684: VecGetArrayRead(xin,&xx);
685: VecGetArrayRead(yin,&yy);
686: VecGetArray(win,&ww);
687: if (alpha == (PetscScalar)1.0) {
688: PetscLogFlops(n);
689: /* could call BLAS axpy after call to memcopy, but may be slower */
690: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
691: } else if (alpha == (PetscScalar)-1.0) {
692: PetscLogFlops(n);
693: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
694: } else if (alpha == (PetscScalar)0.0) {
695: PetscArraycpy(ww,yy,n);
696: } else {
697: PetscScalar oalpha = alpha;
698: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
699: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
700: #else
701: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
702: #endif
703: PetscLogFlops(2.0*n);
704: }
705: VecRestoreArrayRead(xin,&xx);
706: VecRestoreArrayRead(yin,&yy);
707: VecRestoreArray(win,&ww);
708: return(0);
709: }
711: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
712: {
713: PetscErrorCode ierr;
714: PetscInt n = xin->map->n,i;
715: const PetscScalar *xx,*yy;
716: PetscReal m = 0.0;
719: VecGetArrayRead(xin,&xx);
720: VecGetArrayRead(yin,&yy);
721: for (i = 0; i < n; i++) {
722: if (yy[i] != (PetscScalar)0.0) {
723: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
724: } else {
725: m = PetscMax(PetscAbsScalar(xx[i]), m);
726: }
727: }
728: VecRestoreArrayRead(xin,&xx);
729: VecRestoreArrayRead(yin,&yy);
730: MPIU_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
731: PetscLogFlops(n);
732: return(0);
733: }
735: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
736: {
737: Vec_Seq *v = (Vec_Seq*)vin->data;
740: if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
741: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
742: v->array = (PetscScalar*)a;
743: return(0);
744: }
746: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
747: {
748: Vec_Seq *v = (Vec_Seq*)vin->data;
752: PetscFree(v->array_allocated);
753: v->array_allocated = v->array = (PetscScalar*)a;
754: return(0);
755: }