download the original source code.
1 /*
2 Example 15big
3
4 Interface: Semi-Structured interface (SStruct)
5
6 Compile with: make ex15big
7
8 Sample run: mpirun -np 8 ex15big -n 10
9
10 To see options: ex15big -help
11
12 Description: This example is a slight modification of Example 15 that
13 illustrates the 64-bit integer support in hypre needed to
14 runproblems with more than 2B unknowns.
15
16 Specifically, the changes compared to Example 15 are as
17 follows:
18
19 1) All integer arguments to HYPRE functions should be
20 declared of type HYPRE_Int.
21
22 2) Variables of type HYPRE_Int are 64-bit integers, so
23 they should be printed in the %lld format (not %d).
24
25 To enable the 64-bit integer support, you need to build
26 hypre with the --enable-bigint option of 'configure'.
27 We recommend comparing this example with Example 15.
28 */
29
30 #include <math.h>
31 #include "_hypre_utilities.h"
32 #include "HYPRE_sstruct_mv.h"
33 #include "HYPRE_sstruct_ls.h"
34 #include "_hypre_parcsr_ls.h"
35 #include "HYPRE.h"
36
37
38 int optionAlpha, optionBeta;
39
40 /* Curl-curl coefficient alpha = mu^{-1} */
41 double alpha(double x, double y, double z)
42 {
43 switch (optionAlpha)
44 {
45 case 0: /* uniform coefficient */
46 return 1.0;
47 case 1: /* smooth coefficient */
48 return x*x+exp(y)+sin(z);
49 case 2: /* small outside of an interior cube */
50 if ((fabs(x-0.5) < 0.25) && (fabs(y-0.5) < 0.25) && (fabs(z-0.5) < 0.25))
51 return 1.0;
52 else
53 return 1.0e-6;
54 case 3: /* small outside of an interior ball */
55 if (((x-0.5)*(x-0.5)+(y-0.5)*(y-0.5)+(z-0.5)*(z-0.5)) < 0.0625)
56 return 1.0;
57 else
58 return 1.0e-6;
59 case 4: /* random coefficient */
60 return hypre_Rand();
61 default:
62 return 1.0;
63 }
64 }
65
66 /* Mass coefficient beta = sigma */
67 double beta(double x, double y, double z)
68 {
69 switch (optionBeta)
70 {
71 case 0: /* uniform coefficient */
72 return 1.0;
73 case 1: /* smooth coefficient */
74 return x*x+exp(y)+sin(z);
75 case 2:/* small outside of interior cube */
76 if ((fabs(x-0.5) < 0.25) && (fabs(y-0.5) < 0.25) && (fabs(z-0.5) < 0.25))
77 return 1.0;
78 else
79 return 1.0e-6;
80 case 3: /* small outside of an interior ball */
81 if (((x-0.5)*(x-0.5)+(y-0.5)*(y-0.5)+(z-0.5)*(z-0.5)) < 0.0625)
82 return 1.0;
83 else
84 return 1.0e-6;
85 case 4: /* random coefficient */
86 return hypre_Rand();
87 default:
88 return 1.0;
89 }
90 }
91
92 /*
93 This routine computes the lowest order Nedelec, or "edge" finite element
94 stiffness matrix and load vector on a cube of size h. The 12 edges {e_i}
95 are numbered in terms of the vertices as follows:
96
97 [7]------[6]
98 /| /| e_0 = 01, e_1 = 12, e_2 = 32, e_3 = 03,
99 / | / | e_4 = 45, e_5 = 56, e_6 = 76, e_7 = 47,
100 [4]------[5] | e_8 = 04, e_9 = 15, e_10 = 26, e_11 = 37.
101 | [3]----|-[2]
102 | / | / The edges are oriented from first to the
103 |/ |/ second vertex, e.g. e_0 is from [0] to [1].
104 [0]------[1]
105
106 We allow for different scaling of the curl-curl and the mass parts of the
107 matrix with coefficients alpha and beta respectively:
108
109 S_ij = alpha (curl phi_i,curl phi_j) + beta (phi_i, phi_j).
110
111 The load vector corresponding to a right-hand side of {1,1,1} is
112
113 F_j = (1,phi_j) = h^2/4.
114 */
115 void ComputeFEMND1(double S[12][12], double F[12],
116 double x, double y, double z, double h)
117 {
118 int i, j;
119
120 double h2_4 = h*h/4;
121
122 double cS1 = alpha(x,y,z)/(6.0*h), cS2 = 2*cS1, cS4 = 2*cS2;
123 double cM1 = beta(x,y,z)*h/36.0, cM2 = 2*cM1, cM4 = 2*cM2;
124
125 S[ 0][ 0] = cS4 + cM4; S[ 0][ 1] = cS2; S[ 0][ 2] = -cS1 + cM2;
126 S[ 0][ 3] = -cS2; S[ 0][ 4] = -cS1 + cM2; S[ 0][ 5] = cS1;
127 S[ 0][ 6] = -cS2 + cM1; S[ 0][ 7] = -cS1; S[ 0][ 8] = -cS2;
128 S[ 0][ 9] = cS2; S[ 0][10] = cS1; S[ 0][11] = -cS1;
129
130 S[ 1][ 1] = cS4 + cM4; S[ 1][ 2] = -cS2; S[ 1][ 3] = -cS1 + cM2;
131 S[ 1][ 4] = cS1; S[ 1][ 5] = -cS1 + cM2; S[ 1][ 6] = -cS1;
132 S[ 1][ 7] = -cS2 + cM1; S[ 1][ 8] = -cS1; S[ 1][ 9] = -cS2;
133 S[ 1][10] = cS2; S[ 1][11] = cS1;
134
135 S[ 2][ 2] = cS4 + cM4; S[ 2][ 3] = cS2; S[ 2][ 4] = -cS2 + cM1;
136 S[ 2][ 5] = -cS1; S[ 2][ 6] = -cS1 + cM2; S[ 2][ 7] = cS1;
137 S[ 2][ 8] = -cS1; S[ 2][ 9] = cS1; S[ 2][10] = cS2;
138 S[ 2][11] = -cS2;
139
140 S[ 3][ 3] = cS4 + cM4; S[ 3][ 4] = -cS1; S[ 3][ 5] = -cS2 + cM1;
141 S[ 3][ 6] = cS1; S[ 3][ 7] = -cS1 + cM2; S[ 3][ 8] = -cS2;
142 S[ 3][ 9] = -cS1; S[ 3][10] = cS1; S[ 3][11] = cS2;
143
144 S[ 4][ 4] = cS4 + cM4; S[ 4][ 5] = cS2; S[ 4][ 6] = -cS1 + cM2;
145 S[ 4][ 7] = -cS2; S[ 4][ 8] = cS2; S[ 4][ 9] = -cS2;
146 S[ 4][10] = -cS1; S[ 4][11] = cS1;
147
148 S[ 5][ 5] = cS4 + cM4; S[ 5][ 6] = -cS2; S[ 5][ 7] = -cS1 + cM2;
149 S[ 5][ 8] = cS1; S[ 5][ 9] = cS2; S[ 5][10] = -cS2;
150 S[ 5][11] = -cS1;
151
152 S[ 6][ 6] = cS4 + cM4; S[ 6][ 7] = cS2; S[ 6][ 8] = cS1;
153 S[ 6][ 9] = -cS1; S[ 6][10] = -cS2; S[ 6][11] = cS2;
154
155 S[ 7][ 7] = cS4 + cM4; S[ 7][ 8] = cS2; S[ 7][ 9] = cS1;
156 S[ 7][10] = -cS1; S[ 7][11] = -cS2;
157
158 S[ 8][ 8] = cS4 + cM4; S[ 8][ 9] = -cS1 + cM2; S[ 8][10] = -cS2 + cM1;
159 S[ 8][11] = -cS1 + cM2;
160
161 S[ 9][ 9] = cS4 + cM4; S[ 9][10] = -cS1 + cM2; S[ 9][11] = -cS2 + cM1;
162
163 S[10][10] = cS4 + cM4; S[10][11] = -cS1 + cM2;
164
165 S[11][11] = cS4 + cM4;
166
167 /* The stiffness matrix is symmetric */
168 for (i = 1; i < 12; i++)
169 for (j = 0; j < i; j++)
170 S[i][j] = S[j][i];
171
172 for (i = 0; i < 12; i++)
173 F[i] = h2_4;
174 }
175
176
177 int main (int argc, char *argv[])
178 {
179 int myid, num_procs;
180 int n, N, pi, pj, pk;
181 double h;
182 int print_solution;
183
184 double tol, theta;
185 int maxit, cycle_type;
186 int rlx_type, rlx_sweeps, rlx_weight, rlx_omega;
187 int amg_coarsen_type, amg_agg_levels, amg_rlx_type;
188 int amg_interp_type, amg_Pmax;
189 int singular_problem ;
190
191 HYPRE_Int time_index;
192
193 HYPRE_SStructGrid edge_grid;
194 HYPRE_SStructGraph A_graph;
195 HYPRE_SStructMatrix A;
196 HYPRE_SStructVector b;
197 HYPRE_SStructVector x;
198 HYPRE_SStructGrid node_grid;
199 HYPRE_SStructGraph G_graph;
200 HYPRE_SStructStencil G_stencil[3];
201 HYPRE_SStructMatrix G;
202 HYPRE_SStructVector xcoord, ycoord, zcoord;
203
204 HYPRE_Solver solver, precond;
205
206 /* Initialize MPI */
207 MPI_Init(&argc, &argv);
208 MPI_Comm_rank(MPI_COMM_WORLD, &myid);
209 MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
210
211 /* Set default parameters */
212 n = 10;
213 print_solution = 0;
214 optionAlpha = 0;
215 optionBeta = 0;
216 maxit = 100;
217 tol = 1e-6;
218 cycle_type = 13;
219 rlx_type = 2;
220 rlx_sweeps = 1;
221 rlx_weight = 1.0;
222 rlx_omega = 1.0;
223 amg_coarsen_type = 10;
224 amg_agg_levels = 1;
225 amg_rlx_type = 6;
226 theta = 0.25;
227 amg_interp_type = 6;
228 amg_Pmax = 4;
229 singular_problem = 0;
230
231 /* Parse command line */
232 {
233 int arg_index = 0;
234 int print_usage = 0;
235
236 while (arg_index < argc)
237 {
238 if ( strcmp(argv[arg_index], "-n") == 0 )
239 {
240 arg_index++;
241 n = atoi(argv[arg_index++]);
242 }
243 else if ( strcmp(argv[arg_index], "-a") == 0 )
244 {
245 arg_index++;
246 optionAlpha = atoi(argv[arg_index++]);
247 }
248 else if ( strcmp(argv[arg_index], "-b") == 0 )
249 {
250 arg_index++;
251 optionBeta = atoi(argv[arg_index++]);
252 }
253 else if ( strcmp(argv[arg_index], "-print_solution") == 0 )
254 {
255 arg_index++;
256 print_solution = 1;
257 }
258 else if ( strcmp(argv[arg_index], "-maxit") == 0 )
259 {
260 arg_index++;
261 maxit = atoi(argv[arg_index++]);
262 }
263 else if ( strcmp(argv[arg_index], "-tol") == 0 )
264 {
265 arg_index++;
266 tol = atof(argv[arg_index++]);
267 }
268 else if ( strcmp(argv[arg_index], "-type") == 0 )
269 {
270 arg_index++;
271 cycle_type = atoi(argv[arg_index++]);
272 }
273 else if ( strcmp(argv[arg_index], "-rlx") == 0 )
274 {
275 arg_index++;
276 rlx_type = atoi(argv[arg_index++]);
277 }
278 else if ( strcmp(argv[arg_index], "-rlxn") == 0 )
279 {
280 arg_index++;
281 rlx_sweeps = atoi(argv[arg_index++]);
282 }
283 else if ( strcmp(argv[arg_index], "-rlxw") == 0 )
284 {
285 arg_index++;
286 rlx_weight = atof(argv[arg_index++]);
287 }
288 else if ( strcmp(argv[arg_index], "-rlxo") == 0 )
289 {
290 arg_index++;
291 rlx_omega = atof(argv[arg_index++]);
292 }
293 else if ( strcmp(argv[arg_index], "-ctype") == 0 )
294 {
295 arg_index++;
296 amg_coarsen_type = atoi(argv[arg_index++]);
297 }
298 else if ( strcmp(argv[arg_index], "-amgrlx") == 0 )
299 {
300 arg_index++;
301 amg_rlx_type = atoi(argv[arg_index++]);
302 }
303 else if ( strcmp(argv[arg_index], "-agg") == 0 )
304 {
305 arg_index++;
306 amg_agg_levels = atoi(argv[arg_index++]);
307 }
308 else if ( strcmp(argv[arg_index], "-itype") == 0 )
309 {
310 arg_index++;
311 amg_interp_type = atoi(argv[arg_index++]);
312 }
313 else if ( strcmp(argv[arg_index], "-pmax") == 0 )
314 {
315 arg_index++;
316 amg_Pmax = atoi(argv[arg_index++]);
317 }
318 else if ( strcmp(argv[arg_index], "-sing") == 0 )
319 {
320 arg_index++;
321 singular_problem = 1;
322 }
323 else if ( strcmp(argv[arg_index], "-theta") == 0 )
324 {
325 arg_index++;
326 theta = atof(argv[arg_index++]);
327 }
328
329 else if ( strcmp(argv[arg_index], "-help") == 0 )
330 {
331 print_usage = 1;
332 break;
333 }
334 else
335 {
336 arg_index++;
337 }
338 }
339
340 if ((print_usage) && (myid == 0))
341 {
342 printf("\n");
343 printf("Usage: %s [<options>]\n", argv[0]);
344 printf("\n");
345 printf(" -n <n> : problem size per processor (default: 10)\n");
346 printf(" -a <alpha_opt> : choice for the curl-curl coefficient (default: 1.0)\n");
347 printf(" -b <beta_opt> : choice for the mass coefficient (default: 1.0)\n");
348 printf(" -print_solution : print the solution vector\n");
349 printf("\n");
350 printf("PCG-AMS solver options: \n");
351 printf(" -maxit <num> : maximum number of iterations (100) \n");
352 printf(" -tol <num> : convergence tolerance (1e-6) \n");
353 printf(" -type <num> : 3-level cycle type (0-8, 11-14) \n");
354 printf(" -theta <num> : BoomerAMG threshold (0.25) \n");
355 printf(" -ctype <num> : BoomerAMG coarsening type \n");
356 printf(" -agg <num> : Levels of BoomerAMG agg. coarsening \n");
357 printf(" -amgrlx <num> : BoomerAMG relaxation type \n");
358 printf(" -itype <num> : BoomerAMG interpolation type \n");
359 printf(" -pmax <num> : BoomerAMG interpolation truncation \n");
360 printf(" -rlx <num> : relaxation type \n");
361 printf(" -rlxn <num> : number of relaxation sweeps \n");
362 printf(" -rlxw <num> : damping parameter (usually <=1) \n");
363 printf(" -rlxo <num> : SOR parameter (usually in (0,2)) \n");
364 printf(" -sing : curl-curl only (singular) problem \n");
365 printf("\n");
366 printf("\n");
367 }
368
369 if (print_usage)
370 {
371 MPI_Finalize();
372 return (0);
373 }
374 }
375
376 /* Figure out the processor grid (N x N x N). The local problem size is n^3,
377 while pi, pj and pk indicate the position in the processor grid. */
378 N = pow(num_procs,1.0/3.0) + 0.5;
379 if (num_procs != N*N*N)
380 {
381 if (myid == 0) printf("Can't run on %d processors, try %d.\n",
382 num_procs, N*N*N);
383 MPI_Finalize();
384 exit(1);
385 }
386 h = 1.0 / (N*n);
387 pk = myid / (N*N);
388 pj = myid/N - pk*N;
389 pi = myid - pj*N - pk*N*N;
390
391 /* Start timing */
392 time_index = hypre_InitializeTiming("SStruct Setup");
393 hypre_BeginTiming(time_index);
394
395 /* 1. Set up the edge and nodal grids. Note that we do this simultaneously
396 to make sure that they have the same extends. For simplicity we use
397 only one part to represent the unit cube. */
398 {
399 HYPRE_Int ndim = 3;
400 HYPRE_Int nparts = 1;
401
402 /* Create empty 2D grid objects */
403 HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &node_grid);
404 HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &edge_grid);
405
406 /* Set the extents of the grid - each processor sets its grid boxes. */
407 {
408 HYPRE_Int part = 0;
409 HYPRE_Int ilower[3] = {1 + pi*n, 1 + pj*n, 1 + pk*n};
410 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
411
412 HYPRE_SStructGridSetExtents(node_grid, part, ilower, iupper);
413 HYPRE_SStructGridSetExtents(edge_grid, part, ilower, iupper);
414 }
415
416 /* Set the variable type and number of variables on each grid. */
417 {
418 HYPRE_Int i;
419 HYPRE_Int nnodevars = 1;
420 HYPRE_Int nedgevars = 3;
421
422 HYPRE_SStructVariable nodevars[1] = {HYPRE_SSTRUCT_VARIABLE_NODE};
423 HYPRE_SStructVariable edgevars[3] = {HYPRE_SSTRUCT_VARIABLE_XEDGE,
424 HYPRE_SSTRUCT_VARIABLE_YEDGE,
425 HYPRE_SSTRUCT_VARIABLE_ZEDGE};
426 for (i = 0; i < nparts; i++)
427 {
428 HYPRE_SStructGridSetVariables(node_grid, i, nnodevars, nodevars);
429 HYPRE_SStructGridSetVariables(edge_grid, i, nedgevars, edgevars);
430 }
431 }
432
433 /* Since there is only one part, there is no need to call the
434 SetNeighborPart or SetSharedPart functions, which determine the spatial
435 relation between the parts. See Examples 12, 13 and 14 for
436 illustrations of these calls. */
437
438 /* Now the grids are ready to be used */
439 HYPRE_SStructGridAssemble(node_grid);
440 HYPRE_SStructGridAssemble(edge_grid);
441 }
442
443 /* 2. Create the finite element stiffness matrix A and load vector b. */
444 {
445 HYPRE_Int part = 0; /* this problem has only one part */
446
447 /* Set the ordering of the variables in the finite element problem. This
448 is done by listing the variable offset directions relative to the
449 element's center. See the Reference Manual for more details. */
450 {
451 HYPRE_Int ordering[48] = { 0, 0, -1, -1, /* x-edge [0]-[1] */
452 1, +1, 0, -1, /* y-edge [1]-[2] */
453 /* [7]------[6] */ 0, 0, +1, -1, /* x-edge [3]-[2] */
454 /* /| /| */ 1, -1, 0, -1, /* y-edge [0]-[3] */
455 /* / | / | */ 0, 0, -1, +1, /* x-edge [4]-[5] */
456 /* [4]------[5] | */ 1, +1, 0, +1, /* y-edge [5]-[6] */
457 /* | [3]----|-[2] */ 0, 0, +1, +1, /* x-edge [7]-[6] */
458 /* | / | / */ 1, -1, 0, +1, /* y-edge [4]-[7] */
459 /* |/ |/ */ 2, -1, -1, 0, /* z-edge [0]-[4] */
460 /* [0]------[1] */ 2, +1, -1, 0, /* z-edge [1]-[5] */
461 2, +1, +1, 0, /* z-edge [2]-[6] */
462 2, -1, +1, 0 }; /* z-edge [3]-[7] */
463
464 HYPRE_SStructGridSetFEMOrdering(edge_grid, part, ordering);
465 }
466
467 /* Set up the Graph - this determines the non-zero structure of the
468 matrix. */
469 {
470 HYPRE_Int part = 0;
471
472 /* Create the graph object */
473 HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &A_graph);
474
475 /* See MatrixSetObjectType below */
476 HYPRE_SStructGraphSetObjectType(A_graph, HYPRE_PARCSR);
477
478 /* Indicate that this problem uses finite element stiffness matrices and
479 load vectors, instead of stencils. */
480 HYPRE_SStructGraphSetFEM(A_graph, part);
481
482 /* The edge finite element matrix is full, so there is no need to call the
483 HYPRE_SStructGraphSetFEMSparsity() function. */
484
485 /* Assemble the graph */
486 HYPRE_SStructGraphAssemble(A_graph);
487 }
488
489 /* Set up the SStruct Matrix and right-hand side vector */
490 {
491 /* Create the matrix object */
492 HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, A_graph, &A);
493 /* Use a ParCSR storage */
494 HYPRE_SStructMatrixSetObjectType(A, HYPRE_PARCSR);
495 /* Indicate that the matrix coefficients are ready to be set */
496 HYPRE_SStructMatrixInitialize(A);
497
498 /* Create an empty vector object */
499 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &b);
500 /* Use a ParCSR storage */
501 HYPRE_SStructVectorSetObjectType(b, HYPRE_PARCSR);
502 /* Indicate that the vector coefficients are ready to be set */
503 HYPRE_SStructVectorInitialize(b);
504 }
505
506 /* Set the matrix and vector entries by finite element assembly */
507 {
508 /* local stiffness matrix and load vector */
509 double S[12][12], F[12];
510
511 int i, j, k;
512 HYPRE_Int index[3];
513
514 for (i = 1; i <= n; i++)
515 for (j = 1; j <= n; j++)
516 for (k = 1; k <= n; k++)
517 {
518 /* Compute the FEM matrix and r.h.s. for cell (i,j,k) with
519 coefficients evaluated at the cell center. */
520 index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;
521 ComputeFEMND1(S,F,(pi*n+i)*h-h/2,(pj*n+j)*h-h/2,(pk*n+k)*h-h/2,h);
522
523 /* Eliminate boundary conditions on x = 0 */
524 if (index[0] == 1)
525 {
526 int ii, jj, bc_edges[4] = { 3, 11, 7, 8 };
527 for (ii = 0; ii < 4; ii++)
528 {
529 for (jj = 0; jj < 12; jj++)
530 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
531 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
532 F[bc_edges[ii]] = 0.0;
533 }
534 }
535 /* Eliminate boundary conditions on y = 0 */
536 if (index[1] == 1)
537 {
538 int ii, jj, bc_edges[4] = { 0, 9, 4, 8 };
539 for (ii = 0; ii < 4; ii++)
540 {
541 for (jj = 0; jj < 12; jj++)
542 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
543 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
544 F[bc_edges[ii]] = 0.0;
545 }
546 }
547 /* Eliminate boundary conditions on z = 0 */
548 if (index[2] == 1)
549 {
550 int ii, jj, bc_edges[4] = { 0, 1, 2, 3 };
551 for (ii = 0; ii < 4; ii++)
552 {
553 for (jj = 0; jj < 12; jj++)
554 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
555 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
556 F[bc_edges[ii]] = 0.0;
557 }
558 }
559 /* Eliminate boundary conditions on x = 1 */
560 if (index[0] == N*n)
561 {
562 int ii, jj, bc_edges[4] = { 1, 10, 5, 9 };
563 for (ii = 0; ii < 4; ii++)
564 {
565 for (jj = 0; jj < 12; jj++)
566 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
567 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
568 F[bc_edges[ii]] = 0.0;
569 }
570 }
571 /* Eliminate boundary conditions on y = 1 */
572 if (index[1] == N*n)
573 {
574 int ii, jj, bc_edges[4] = { 2, 10, 6, 11 };
575 for (ii = 0; ii < 4; ii++)
576 {
577 for (jj = 0; jj < 12; jj++)
578 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
579 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
580 F[bc_edges[ii]] = 0.0;
581 }
582 }
583 /* Eliminate boundary conditions on z = 1 */
584 if (index[2] == N*n)
585 {
586 int ii, jj, bc_edges[4] = { 4, 5, 6, 7 };
587 for (ii = 0; ii < 4; ii++)
588 {
589 for (jj = 0; jj < 12; jj++)
590 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
591 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
592 F[bc_edges[ii]] = 0.0;
593 }
594 }
595
596 /* Assemble the matrix */
597 HYPRE_SStructMatrixAddFEMValues(A, part, index, &S[0][0]);
598
599 /* Assemble the vector */
600 HYPRE_SStructVectorAddFEMValues(b, part, index, F);
601 }
602 }
603
604 /* Collective calls finalizing the matrix and vector assembly */
605 HYPRE_SStructMatrixAssemble(A);
606 HYPRE_SStructVectorAssemble(b);
607 }
608
609 /* 3. Create the discrete gradient matrix G, which is needed in AMS. */
610 {
611 HYPRE_Int part = 0;
612 HYPRE_Int stencil_size = 2;
613
614 /* Define the discretization stencil relating the edges and nodes of the
615 grid. */
616 {
617 HYPRE_Int ndim = 3;
618 HYPRE_Int entry;
619 HYPRE_Int var = 0; /* the node variable */
620
621 /* The discrete gradient stencils connect edge to node variables. */
622 HYPRE_Int Gx_offsets[2][3] = {{-1,0,0},{0,0,0}}; /* x-edge [7]-[6] */
623 HYPRE_Int Gy_offsets[2][3] = {{0,-1,0},{0,0,0}}; /* y-edge [5]-[6] */
624 HYPRE_Int Gz_offsets[2][3] = {{0,0,-1},{0,0,0}}; /* z-edge [2]-[6] */
625
626 HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[0]);
627 HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[1]);
628 HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[2]);
629
630 for (entry = 0; entry < stencil_size; entry++)
631 {
632 HYPRE_SStructStencilSetEntry(G_stencil[0], entry, Gx_offsets[entry], var);
633 HYPRE_SStructStencilSetEntry(G_stencil[1], entry, Gy_offsets[entry], var);
634 HYPRE_SStructStencilSetEntry(G_stencil[2], entry, Gz_offsets[entry], var);
635 }
636 }
637
638 /* Set up the Graph - this determines the non-zero structure of the
639 matrix. */
640 {
641 HYPRE_Int nvars = 3;
642 HYPRE_Int var; /* the edge variables */
643
644 /* Create the discrete gradient graph object */
645 HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &G_graph);
646
647 /* See MatrixSetObjectType below */
648 HYPRE_SStructGraphSetObjectType(G_graph, HYPRE_PARCSR);
649
650 /* Since the discrete gradient relates edge and nodal variables (it is a
651 rectangular matrix), we have to specify the domain (column) grid. */
652 HYPRE_SStructGraphSetDomainGrid(G_graph, node_grid);
653
654 /* Tell the graph which stencil to use for each edge variable on each
655 part (we only have one part). */
656 for (var = 0; var < nvars; var++)
657 HYPRE_SStructGraphSetStencil(G_graph, part, var, G_stencil[var]);
658
659 /* Assemble the graph */
660 HYPRE_SStructGraphAssemble(G_graph);
661 }
662
663 /* Set up the SStruct Matrix */
664 {
665 /* Create the matrix object */
666 HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, G_graph, &G);
667 /* Use a ParCSR storage */
668 HYPRE_SStructMatrixSetObjectType(G, HYPRE_PARCSR);
669 /* Indicate that the matrix coefficients are ready to be set */
670 HYPRE_SStructMatrixInitialize(G);
671 }
672
673 /* Set the discrete gradient values, assuming a "natural" orientation of
674 the edges (i.e. one in agreement with the coordinate directions). */
675 {
676 int i;
677 int nedges = n*(n+1)*(n+1);
678 double *values;
679 HYPRE_Int stencil_indices[2] = {0,1}; /* the nodes of each edge */
680
681 values = calloc(2*nedges, sizeof(double));
682
683 /* The edge orientation is fixed: from first to second node */
684 for (i = 0; i < nedges; i++)
685 {
686 values[2*i] = -1.0;
687 values[2*i+1] = 1.0;
688 }
689
690 /* Set the values in the discrete gradient x-edges */
691 {
692 HYPRE_Int var = 0;
693 HYPRE_Int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
694 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
695 HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
696 stencil_size, stencil_indices,
697 values);
698 }
699 /* Set the values in the discrete gradient y-edges */
700 {
701 HYPRE_Int var = 1;
702 HYPRE_Int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
703 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
704 HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
705 stencil_size, stencil_indices,
706 values);
707 }
708 /* Set the values in the discrete gradient z-edges */
709 {
710 HYPRE_Int var = 2;
711 HYPRE_Int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
712 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
713 HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
714 stencil_size, stencil_indices,
715 values);
716 }
717
718 free(values);
719 }
720
721 /* Finalize the matrix assembly */
722 HYPRE_SStructMatrixAssemble(G);
723 }
724
725 /* 4. Create the vectors of nodal coordinates xcoord, ycoord and zcoord,
726 which are needed in AMS. */
727 {
728 int i, j, k;
729 HYPRE_Int part = 0;
730 HYPRE_Int var = 0; /* the node variable */
731 HYPRE_Int index[3];
732 double xval, yval, zval;
733
734 /* Create empty vector objects */
735 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &xcoord);
736 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &ycoord);
737 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &zcoord);
738 /* Set the object type to ParCSR */
739 HYPRE_SStructVectorSetObjectType(xcoord, HYPRE_PARCSR);
740 HYPRE_SStructVectorSetObjectType(ycoord, HYPRE_PARCSR);
741 HYPRE_SStructVectorSetObjectType(zcoord, HYPRE_PARCSR);
742 /* Indicate that the vector coefficients are ready to be set */
743 HYPRE_SStructVectorInitialize(xcoord);
744 HYPRE_SStructVectorInitialize(ycoord);
745 HYPRE_SStructVectorInitialize(zcoord);
746
747 /* Compute and set the coordinates of the nodes */
748 for (i = 0; i <= n; i++)
749 for (j = 0; j <= n; j++)
750 for (k = 0; k <= n; k++)
751 {
752 index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;
753
754 xval = index[0]*h;
755 yval = index[1]*h;
756 zval = index[2]*h;
757
758 HYPRE_SStructVectorSetValues(xcoord, part, index, var, &xval);
759 HYPRE_SStructVectorSetValues(ycoord, part, index, var, &yval);
760 HYPRE_SStructVectorSetValues(zcoord, part, index, var, &zval);
761 }
762
763 /* Finalize the vector assembly */
764 HYPRE_SStructVectorAssemble(xcoord);
765 HYPRE_SStructVectorAssemble(ycoord);
766 HYPRE_SStructVectorAssemble(zcoord);
767 }
768
769 /* 5. Set up a SStruct Vector for the solution vector x */
770 {
771 HYPRE_Int part = 0;
772 int nvalues = n*(n+1)*(n+1);
773 double *values;
774
775 values = calloc(nvalues, sizeof(double));
776
777 /* Create an empty vector object */
778 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &x);
779 /* Set the object type to ParCSR */
780 HYPRE_SStructVectorSetObjectType(x, HYPRE_PARCSR);
781 /* Indicate that the vector coefficients are ready to be set */
782 HYPRE_SStructVectorInitialize(x);
783
784 /* Set the values for the initial guess x-edge */
785 {
786 HYPRE_Int var = 0;
787 HYPRE_Int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
788 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
789 HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
790 }
791 /* Set the values for the initial guess y-edge */
792 {
793 HYPRE_Int var = 1;
794 HYPRE_Int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
795 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
796 HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
797 }
798 /* Set the values for the initial guess z-edge */
799 {
800 HYPRE_Int var = 2;
801 HYPRE_Int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
802 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
803 HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
804 }
805
806 free(values);
807
808 /* Finalize the vector assembly */
809 HYPRE_SStructVectorAssemble(x);
810 }
811
812 /* Finalize current timing */
813 hypre_EndTiming(time_index);
814 hypre_PrintTiming("SStruct phase times", MPI_COMM_WORLD);
815 hypre_FinalizeTiming(time_index);
816 hypre_ClearTiming();
817
818 /* 6. Set up and call the PCG-AMS solver (Solver options can be found in the
819 Reference Manual.) */
820 {
821 double final_res_norm;
822 HYPRE_Int its;
823
824 HYPRE_ParCSRMatrix par_A;
825 HYPRE_ParVector par_b;
826 HYPRE_ParVector par_x;
827
828 HYPRE_ParCSRMatrix par_G;
829 HYPRE_ParVector par_xcoord;
830 HYPRE_ParVector par_ycoord;
831 HYPRE_ParVector par_zcoord;
832
833 /* Extract the ParCSR objects needed in the solver */
834 HYPRE_SStructMatrixGetObject(A, (void **) &par_A);
835 HYPRE_SStructVectorGetObject(b, (void **) &par_b);
836 HYPRE_SStructVectorGetObject(x, (void **) &par_x);
837 HYPRE_SStructMatrixGetObject(G, (void **) &par_G);
838 HYPRE_SStructVectorGetObject(xcoord, (void **) &par_xcoord);
839 HYPRE_SStructVectorGetObject(ycoord, (void **) &par_ycoord);
840 HYPRE_SStructVectorGetObject(zcoord, (void **) &par_zcoord);
841
842 if (myid == 0)
843 printf("Problem size: %lld\n\n",
844 hypre_ParCSRMatrixGlobalNumRows((hypre_ParCSRMatrix*)par_A));
845
846 /* Start timing */
847 time_index = hypre_InitializeTiming("AMS Setup");
848 hypre_BeginTiming(time_index);
849
850 /* Create solver */
851 HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);
852
853 /* Set some parameters (See Reference Manual for more parameters) */
854 HYPRE_PCGSetMaxIter(solver, maxit); /* max iterations */
855 HYPRE_PCGSetTol(solver, tol); /* conv. tolerance */
856 HYPRE_PCGSetTwoNorm(solver, 0); /* use the two norm as the stopping criteria */
857 HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
858 HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
859
860 /* Create AMS preconditioner */
861 HYPRE_AMSCreate(&precond);
862
863 /* Set AMS parameters */
864 HYPRE_AMSSetMaxIter(precond, 1);
865 HYPRE_AMSSetTol(precond, 0.0);
866 HYPRE_AMSSetCycleType(precond, cycle_type);
867 HYPRE_AMSSetPrintLevel(precond, 1);
868
869 /* Set discrete gradient */
870 HYPRE_AMSSetDiscreteGradient(precond, par_G);
871
872 /* Set vertex coordinates */
873 HYPRE_AMSSetCoordinateVectors(precond,
874 par_xcoord, par_ycoord, par_zcoord);
875
876 if (singular_problem)
877 HYPRE_AMSSetBetaPoissonMatrix(precond, NULL);
878
879 /* Smoothing and AMG options */
880 HYPRE_AMSSetSmoothingOptions(precond,
881 rlx_type, rlx_sweeps,
882 rlx_weight, rlx_omega);
883 HYPRE_AMSSetAlphaAMGOptions(precond,
884 amg_coarsen_type, amg_agg_levels,
885 amg_rlx_type, theta, amg_interp_type,
886 amg_Pmax);
887 HYPRE_AMSSetBetaAMGOptions(precond,
888 amg_coarsen_type, amg_agg_levels,
889 amg_rlx_type, theta, amg_interp_type,
890 amg_Pmax);
891
892 /* Set the PCG preconditioner */
893 HYPRE_PCGSetPrecond(solver,
894 (HYPRE_PtrToSolverFcn) HYPRE_AMSSolve,
895 (HYPRE_PtrToSolverFcn) HYPRE_AMSSetup,
896 precond);
897
898 /* Call the setup */
899 HYPRE_ParCSRPCGSetup(solver, par_A, par_b, par_x);
900
901 /* Finalize current timing */
902 hypre_EndTiming(time_index);
903 hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD);
904 hypre_FinalizeTiming(time_index);
905 hypre_ClearTiming();
906
907 /* Start timing again */
908 time_index = hypre_InitializeTiming("AMS Solve");
909 hypre_BeginTiming(time_index);
910
911 /* Call the solve */
912 HYPRE_ParCSRPCGSolve(solver, par_A, par_b, par_x);
913
914 /* Finalize current timing */
915 hypre_EndTiming(time_index);
916 hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD);
917 hypre_FinalizeTiming(time_index);
918 hypre_ClearTiming();
919
920 /* Get some info */
921 HYPRE_PCGGetNumIterations(solver, &its);
922 HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
923
924 /* Clean up */
925 HYPRE_AMSDestroy(precond);
926 HYPRE_ParCSRPCGDestroy(solver);
927
928 /* Gather the solution vector */
929 HYPRE_SStructVectorGather(x);
930
931 /* Print the solution with replicated shared data */
932 if (print_solution)
933 {
934 FILE *file;
935 char filename[255];
936
937 HYPRE_Int part = 0;
938 int nvalues = n*(n+1)*(n+1);
939 double *xvalues, *yvalues, *zvalues;
940
941 xvalues = calloc(nvalues, sizeof(double));
942 yvalues = calloc(nvalues, sizeof(double));
943 zvalues = calloc(nvalues, sizeof(double));
944
945 /* Get local solution in the x-edges */
946 {
947 HYPRE_Int var = 0;
948 HYPRE_Int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
949 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
950 HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
951 var, xvalues);
952 }
953 /* Get local solution in the y-edges */
954 {
955 HYPRE_Int var = 1;
956 HYPRE_Int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
957 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
958 HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
959 var, yvalues);
960 }
961 /* Get local solution in the z-edges */
962 {
963 HYPRE_Int var = 2;
964 HYPRE_Int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
965 HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
966 HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
967 var, zvalues);
968 }
969
970 sprintf(filename, "sstruct.out.x.00.00.%05d", myid);
971 if ((file = fopen(filename, "w")) == NULL)
972 {
973 printf("Error: can't open output file %s\n", filename);
974 MPI_Finalize();
975 exit(1);
976 }
977
978 /* Save the edge values, element by element, using the same numbering
979 as the local finite element degrees of freedom. */
980 {
981 int i, j, k, s;
982
983 /* Initial x-, y- and z-edge indices in the values arrays */
984 int oi[4] = { 0, n, n*(n+1), n*(n+1)+n }; /* e_0, e_2, e_4, e_6 */
985 int oj[4] = { 0, 1, n*(n+1), n*(n+1)+1 }; /* e_3, e_1, e_7, e_5 */
986 int ok[4] = { 0, 1, n+1, n+2 }; /* e_8, e_9, e_11, e_10 */
987 /* Loop over the cells while updating the above offsets */
988 for (k = 0; k < n; k++)
989 {
990 for (j = 0; j < n; j++)
991 {
992 for (i = 0; i < n; i++)
993 {
994 fprintf(file,
995 "%.14e\n%.14e\n%.14e\n%.14e\n"
996 "%.14e\n%.14e\n%.14e\n%.14e\n"
997 "%.14e\n%.14e\n%.14e\n%.14e\n",
998 xvalues[oi[0]], yvalues[oj[1]], xvalues[oi[1]], yvalues[oj[0]],
999 xvalues[oi[2]], yvalues[oj[3]], xvalues[oi[3]], yvalues[oj[2]],
1000 zvalues[ok[0]], zvalues[ok[1]], zvalues[ok[3]], zvalues[ok[2]]);
1001
1002 for (s=0; s<4; s++) oi[s]++, oj[s]++, ok[s]++;
1003 }
1004 for (s=0; s<4; s++) oj[s]++, ok[s]++;
1005 }
1006 for (s=0; s<4; s++) oi[s]+=n, ok[s]+=n+1;
1007 }
1008 }
1009
1010 fflush(file);
1011 fclose(file);
1012
1013 free(xvalues);
1014 free(yvalues);
1015 free(zvalues);
1016 }
1017
1018 if (myid == 0)
1019 {
1020 printf("\n");
1021 printf("Iterations = %lld\n", its);
1022 printf("Final Relative Residual Norm = %g\n", final_res_norm);
1023 printf("\n");
1024 }
1025 }
1026
1027 /* Free memory */
1028 HYPRE_SStructGridDestroy(edge_grid);
1029 HYPRE_SStructGraphDestroy(A_graph);
1030 HYPRE_SStructMatrixDestroy(A);
1031 HYPRE_SStructVectorDestroy(b);
1032 HYPRE_SStructVectorDestroy(x);
1033 HYPRE_SStructGridDestroy(node_grid);
1034 HYPRE_SStructGraphDestroy(G_graph);
1035 HYPRE_SStructStencilDestroy(G_stencil[0]);
1036 HYPRE_SStructStencilDestroy(G_stencil[1]);
1037 HYPRE_SStructStencilDestroy(G_stencil[2]);
1038 HYPRE_SStructMatrixDestroy(G);
1039 HYPRE_SStructVectorDestroy(xcoord);
1040 HYPRE_SStructVectorDestroy(ycoord);
1041 HYPRE_SStructVectorDestroy(zcoord);
1042
1043 /* Finalize MPI */
1044 MPI_Finalize();
1045
1046 return 0;
1047 }
syntax highlighted by Code2HTML, v. 0.9.1