NFFT 3.5.3alpha
fastsum_benchomp_detail.c
1/*
2 * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <stdio.h>
20#include <math.h>
21#include <string.h>
22#include <stdlib.h>
23#include <complex.h>
24
25#include "nfft3.h"
26#include "infft.h"
27#include "fastsum.h"
28#include "kernels.h"
29#ifdef _OPENMP
30#include <omp.h>
31#endif
32
33int bench_openmp(FILE *infile, int n, int m, int p,
34 C (*kernel)(R, int, const R *), R c, R eps_I, R eps_B)
35{
36 fastsum_plan my_fastsum_plan;
37 int d, L, M;
38 int t, j;
39 R re, im;
40 R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0);
41 ticks t0, t1;
42 R tt_total;
43
44 fscanf(infile, "%d %d %d", &d, &L, &M);
45
46#ifdef _OPENMP
47 FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_threads.plan");
48#else
49 FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_single.plan");
50#endif
51
52 fastsum_init_guru(&my_fastsum_plan, d, L, M, kernel, &c, NEARFIELD_BOXES, n,
53 m, p, eps_I, eps_B);
54
55#ifdef _OPENMP
56 FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_threads.plan");
57#else
58 FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_single.plan");
59#endif
60
61 for (j = 0; j < L; j++)
62 {
63 for (t = 0; t < d; t++)
64 {
65 R v;
66 fscanf(infile, __FR__, &v);
67 my_fastsum_plan.x[d * j + t] = v * r_max;
68 }
69 }
70
71 for (j = 0; j < L; j++)
72 {
73 fscanf(infile, __FR__ " " __FR__, &re, &im);
74 my_fastsum_plan.alpha[j] = re + II * im;
75 }
76
77 for (j = 0; j < M; j++)
78 {
79 for (t = 0; t < d; t++)
80 {
81 R v;
82 fscanf(infile, __FR__, &v);
83 my_fastsum_plan.y[d * j + t] = v * r_max;
84 }
85 }
86
88 t0 = getticks();
89 fastsum_precompute(&my_fastsum_plan);
90
92 fastsum_trafo(&my_fastsum_plan);
93 t1 = getticks();
94 tt_total = NFFT(elapsed_seconds)(t1, t0);
95
96#ifndef MEASURE_TIME
97 my_fastsum_plan.MEASURE_TIME_t[0] = K(0.0);
98 my_fastsum_plan.MEASURE_TIME_t[1] = K(0.0);
99 my_fastsum_plan.MEASURE_TIME_t[2] = K(0.0);
100 my_fastsum_plan.MEASURE_TIME_t[3] = K(0.0);
101 my_fastsum_plan.MEASURE_TIME_t[4] = K(0.0);
102 my_fastsum_plan.MEASURE_TIME_t[5] = K(0.0);
103 my_fastsum_plan.MEASURE_TIME_t[6] = K(0.0);
104 my_fastsum_plan.MEASURE_TIME_t[7] = K(0.0);
105 my_fastsum_plan.mv1.MEASURE_TIME_t[0] = K(0.0);
106 my_fastsum_plan.mv1.MEASURE_TIME_t[2] = K(0.0);
107 my_fastsum_plan.mv2.MEASURE_TIME_t[0] = K(0.0);
108 my_fastsum_plan.mv2.MEASURE_TIME_t[2] = K(0.0);
109#endif
110#ifndef MEASURE_TIME_FFTW
111 my_fastsum_plan.mv1.MEASURE_TIME_t[1] = K(0.0);
112 my_fastsum_plan.mv2.MEASURE_TIME_t[1] = K(0.0);
113#endif
114
115 printf(
116 "%.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ "\n",
117 my_fastsum_plan.MEASURE_TIME_t[0], my_fastsum_plan.MEASURE_TIME_t[1],
118 my_fastsum_plan.MEASURE_TIME_t[2], my_fastsum_plan.MEASURE_TIME_t[3],
119 my_fastsum_plan.MEASURE_TIME_t[4], my_fastsum_plan.MEASURE_TIME_t[5],
120 my_fastsum_plan.MEASURE_TIME_t[6], my_fastsum_plan.MEASURE_TIME_t[7],
121 tt_total - my_fastsum_plan.MEASURE_TIME_t[0]
122 - my_fastsum_plan.MEASURE_TIME_t[1]
123 - my_fastsum_plan.MEASURE_TIME_t[2]
124 - my_fastsum_plan.MEASURE_TIME_t[3]
125 - my_fastsum_plan.MEASURE_TIME_t[4]
126 - my_fastsum_plan.MEASURE_TIME_t[5]
127 - my_fastsum_plan.MEASURE_TIME_t[6]
128 - my_fastsum_plan.MEASURE_TIME_t[7], tt_total,
129 my_fastsum_plan.mv1.MEASURE_TIME_t[0],
130 my_fastsum_plan.mv1.MEASURE_TIME_t[1],
131 my_fastsum_plan.mv1.MEASURE_TIME_t[2],
132 my_fastsum_plan.mv2.MEASURE_TIME_t[0],
133 my_fastsum_plan.mv2.MEASURE_TIME_t[1],
134 my_fastsum_plan.mv2.MEASURE_TIME_t[2]);
135
136 fastsum_finalize(&my_fastsum_plan);
137
138 return 0;
139}
140
141int main(int argc, char **argv)
142{
143 int n;
144 int m;
145 int p;
146 char *s;
147 C (*kernel)(R, int, const R *);
148 R c;
149 R eps_I;
150 R eps_B;
152#ifdef _OPENMP
153 int nthreads;
154
155 if (argc != 9)
156 return EXIT_FAILURE;
157
158 nthreads = atoi(argv[8]);
159 FFTW(init_threads)();
160 omp_set_num_threads(nthreads);
161#else
162 if (argc != 8)
163 return EXIT_FAILURE;
164#endif
165
166 n = atoi(argv[1]);
167 m = atoi(argv[2]);
168 p = atoi(argv[3]);
169 s = argv[4];
170 c = atof(argv[5]);
171 eps_I = (R)(atof(argv[6]));
172 eps_B = (R)(atof(argv[7]));
173 if (strcmp(s, "gaussian") == 0)
174 kernel = gaussian;
175 else if (strcmp(s, "multiquadric") == 0)
176 kernel = multiquadric;
177 else if (strcmp(s, "inverse_multiquadric") == 0)
178 kernel = inverse_multiquadric;
179 else if (strcmp(s, "logarithm") == 0)
180 kernel = logarithm;
181 else if (strcmp(s, "thinplate_spline") == 0)
182 kernel = thinplate_spline;
183 else if (strcmp(s, "one_over_square") == 0)
184 kernel = one_over_square;
185 else if (strcmp(s, "one_over_modulus") == 0)
186 kernel = one_over_modulus;
187 else if (strcmp(s, "one_over_x") == 0)
188 kernel = one_over_x;
189 else if (strcmp(s, "inverse_multiquadric3") == 0)
190 kernel = inverse_multiquadric3;
191 else if (strcmp(s, "sinc_kernel") == 0)
192 kernel = sinc_kernel;
193 else if (strcmp(s, "cosc") == 0)
194 kernel = cosc;
195 else if (strcmp(s, "cot") == 0)
196 kernel = kcot;
197 else if (strcmp(s, "one_over_cube") == 0)
198 kernel = one_over_cube;
199 else if (strcmp(s, "log_sin") == 0)
200 kernel = log_sin;
201 else
202 {
203 s = "multiquadric";
204 kernel = multiquadric;
205 }
206
207 bench_openmp(stdin, n, m, p, kernel, c, eps_I, eps_B);
208
209 return EXIT_SUCCESS;
210}
211
Header file for the fast NFFT-based summation algorithm.
void fastsum_precompute(fastsum_plan *ths)
precomputation for fastsum
Definition fastsum.c:1173
C inverse_multiquadric(R x, int der, const R *param)
K(x)=1/sqrt(x^2+c^2)
Definition kernels.c:90
C logarithm(R x, int der, const R *param)
K(x)=log |x|.
Definition kernels.c:116
C multiquadric(R x, int der, const R *param)
K(x)=sqrt(x^2+c^2)
Definition kernels.c:64
void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
initialization of fastsum plan
Definition fastsum.c:987
R * x
source knots in d-ball with radius 1/4-eps_b/2
Definition fastsum.h:94
C one_over_cube(R x, int der, const R *param)
K(x) = 1/x^3.
Definition kernels.c:374
C one_over_square(R x, int der, const R *param)
K(x) = 1/x^2.
Definition kernels.c:177
C sinc_kernel(R x, int der, const R *param)
K(x) = sin(cx)/x.
Definition kernels.c:287
C kcot(R x, int der, const R *param)
K(x) = cot(cx)
Definition kernels.c:346
R MEASURE_TIME_t[8]
Measured time for each step if MEASURE_TIME is set.
Definition fastsum.h:134
C one_over_modulus(R x, int der, const R *param)
K(x) = 1/|x|.
Definition kernels.c:205
C * alpha
source coefficients
Definition fastsum.h:91
R eps_B
outer boundary
Definition fastsum.h:114
C thinplate_spline(R x, int der, const R *param)
K(x) = x^2 log |x|.
Definition kernels.c:149
void fastsum_trafo(fastsum_plan *ths)
fast NFFT-based summation
Definition fastsum.c:1180
void fastsum_finalize(fastsum_plan *ths)
finalization of fastsum plan
Definition fastsum.c:1048
C inverse_multiquadric3(R x, int der, const R *param)
K(x) = 1/sqrt(x^2+c^2)^3.
Definition kernels.c:261
C gaussian(R x, int der, const R *param)
K(x)=exp(-x^2/c^2)
Definition kernels.c:38
C one_over_x(R x, int der, const R *param)
K(x) = 1/x.
Definition kernels.c:233
C cosc(R x, int der, const R *param)
K(x) = cos(cx)/x.
Definition kernels.c:314
R * y
target knots in d-ball with radius 1/4-eps_b/2
Definition fastsum.h:95
C log_sin(R x, int der, const R *param)
K(x) = log(|sin(cx)|)
Definition kernels.c:402
Internal header file for auxiliary definitions and functions.
Header file with predefined kernels for the fast summation algorithm.
Header file for the nfft3 library.
plan for fast summation algorithm
Definition fastsum.h:83