NFFT 3.5.3alpha
fastsum_benchomp.c
1/*
2 * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include <unistd.h>
22
23#include "config.h"
24
25#include "nfft3.h"
26#include "infft.h"
27
28#define NREPEAT 5
29
30#if defined(_WIN32) || defined(_WIN64)
31const char *CMD_CREATEDATASET = "fastsum_benchomp_createdataset.exe";
32const char *CMD_DETAIL_SINGLE = "fastsum_benchomp_detail_single.exe";
33const char *CMD_DETAIL_THREADS = "fastsum_benchomp_detail_threads.exe";
34#else
35const char *CMD_CREATEDATASET = "./fastsum_benchomp_createdataset";
36const char *CMD_DETAIL_SINGLE = "./fastsum_benchomp_detail_single";
37const char *CMD_DETAIL_THREADS = "./fastsum_benchomp_detail_threads";
38#endif
39
40static FILE* file_out_tex = NULL;
41
42int get_nthreads_array(int **arr)
43{
44 int max_threads = NFFT(get_num_threads)();
45 int alloc_num = 2;
46 int k;
47 int ret_number = 0;
48 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
49
50 if (max_threads <= 5)
51 {
52 *arr = (int*) NFFT(malloc)((size_t) (max_threads) * sizeof(int));
53 for (k = 0; k < max_threads; k++)
54 *(*arr + k) = k + 1;
55 return max_threads;
56 }
57
58 for (k = 1; k <= max_threads; k *= 2, alloc_num++)
59 ;
60
61 *arr = (int*) NFFT(malloc)((size_t)(alloc_num) * sizeof(int));
62
63 for (k = 1; k <= max_threads; k *= 2)
64 {
65 if (k != max_threads && 2 * k > max_threads && max_threads_pw2)
66 {
67 *(*arr + ret_number) = max_threads / 2;
68 ret_number++;
69 }
70
71 *(*arr + ret_number) = k;
72 ret_number++;
73
74 if (k != max_threads && 2 * k > max_threads)
75 {
76 *(*arr + ret_number) = max_threads;
77 ret_number++;
78 break;
79 }
80 }
81
82 return ret_number;
83}
84
85void check_result_value(const int val, const int ok, const char *msg)
86{
87 if (val != ok)
88 {
89 fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
90
91 exit(EXIT_FAILURE);
92 }
93}
94
95void run_test_create(int d, int L, int M)
96{
97 char cmd[1025];
98
99 snprintf(cmd, 1024,
100 "%s %d %d %d > fastsum_benchomp_test.data",
101 CMD_CREATEDATASET, d, L, M);
102 fprintf(stderr, "%s\n", cmd);
103 check_result_value(system(cmd), 0, "createdataset");
104}
105
106void run_test_init_output()
107{
108 FILE *f = fopen("fastsum_benchomp_test.result", "w");
109 if (f != NULL)
110 fclose(f);
111}
112
113typedef struct
114{
115 int d;
116 int L;
117 int M;
118 int n;
119 int m;
120 int p;
121 char *kernel_name;
122 R c;
123 R eps_I;
124 R eps_B;
125} s_param;
126
127typedef struct
128{
129 R avg;
130 R min;
131 R max;
132} s_resval;
133
134typedef struct
135{
136 int nthreads;
137 s_resval resval[16];
138} s_result;
139
140typedef struct
141{
142 s_param param;
143 s_result *results;
144 int nresults;
145} s_testset;
146
147void run_test(s_resval *res, int nrepeat, int n, int m, int p,
148 char *kernel_name, R c, R eps_I, R eps_B, int nthreads)
149{
150 char cmd[1025];
151 int r, t;
152
153 for (t = 0; t < 16; t++)
154 {
155 res[t].avg = K(0.0);
156 res[t].min = K(1.0) / K(0.0);
157 res[t].max = K(0.0);
158 }
159
160 if (nthreads < 2)
161 snprintf(cmd, 1024,
162 "%s %d %d %d %s " __FR__ " " __FR__ " " __FR__ " < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
163 CMD_DETAIL_SINGLE, n, m, p, kernel_name, c, eps_I, eps_B);
164 else
165 snprintf(cmd, 1024,
166 "%s %d %d %d %s " __FR__ " " __FR__ " " __FR__ " %d < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
167 CMD_DETAIL_THREADS, n, m, p, kernel_name, c, eps_I, eps_B, nthreads);
168 fprintf(stderr, "%s\n", cmd);
169 check_result_value(system(cmd), 0, cmd);
170
171 for (r = 0; r < nrepeat; r++)
172 {
173 int retval;
174 R v[16];
175 FILE *f;
176 check_result_value(system(cmd), 0, cmd);
177 f = fopen("fastsum_benchomp_test.out", "r");
178 retval = fscanf(f,
179 "" __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ "", v,
180 v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8, v + 9, v + 10,
181 v + 11, v + 12, v + 13, v + 14, v + 15);
182 check_result_value(retval, 16, "read fastsum_benchomp_test.out");
183 fclose(f);
184
185 for (t = 0; t < 16; t++)
186 {
187 res[t].avg += v[t];
188 if (res[t].min > v[t])
189 res[t].min = v[t];
190 if (res[t].max < v[t])
191 res[t].max = v[t];
192 }
193 }
194
195 for (t = 0; t < 16; t++)
196 res[t].avg /= (R)(nrepeat);
197
198 fprintf(stderr, "%d %d: ", nthreads, nrepeat);
199 for (t = 0; t < 16; t++)
200 fprintf(stderr, "%.3" __FES__ " %.3" __FES__ " %.3" __FES__ " | ", res[t].avg, res[t].min, res[t].max);
201 fprintf(stderr, "\n");
202}
203
204const char *get_psi_string(int flags)
205{
206 if (flags & PRE_PSI)
207 return "prepsi";
208 else if (flags & PRE_ONE_PSI)
209 return "unknownPSI";
210
211 return "nopsi";
212}
213const char *get_sort_string(int flags)
214{
215 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
216 return "";
217
218 if (flags & NFFT_SORT_NODES)
219 return "sorted";
220
221 return "unsorted";
222}
223
224const char *get_adjoint_omp_string(int flags)
225{
226 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
227 return "blockwise";
228
229 return "";
230}
231
232#define MASK_FSUM_D (1U<<0)
233#define MASK_FSUM_L (1U<<1)
234#define MASK_FSUM_M (1U<<2)
235#define MASK_FSUM_MULTIBW (1U<<3)
236#define MASK_FSUM_WINM (1U<<4)
237#define MASK_FSUM_P (1U<<5)
238#define MASK_FSUM_KERNEL (1U<<6)
239#define MASK_FSUM_EPSI (1U<<7)
240#define MASK_FSUM_EPSB (1U<<8)
241
242unsigned int fastsum_determine_different_parameters(s_testset *testsets,
243 int ntestsets)
244{
245 int t;
246 unsigned int mask = 0;
247
248 if (ntestsets < 2)
249 return 0;
250
251 for (t = 1; t < ntestsets; t++)
252 {
253 if (testsets[t - 1].param.d != testsets[t].param.d)
254 mask |= MASK_FSUM_D;
255 if (testsets[t - 1].param.L != testsets[t].param.L)
256 mask |= MASK_FSUM_L;
257 if (testsets[t - 1].param.M != testsets[t].param.M)
258 mask |= MASK_FSUM_M;
259 if (testsets[t - 1].param.n != testsets[t].param.n)
260 mask |= MASK_FSUM_MULTIBW;
261 if (testsets[t - 1].param.m != testsets[t].param.m)
262 mask |= MASK_FSUM_WINM;
263 if (testsets[t - 1].param.p != testsets[t].param.p)
264 mask |= MASK_FSUM_P;
265 if (strcmp(testsets[t - 1].param.kernel_name, testsets[t].param.kernel_name)
266 != 0)
267 mask |= MASK_FSUM_KERNEL;
268 if (testsets[t - 1].param.eps_I != testsets[t].param.eps_I)
269 mask |= MASK_FSUM_EPSI;
270 if (testsets[t - 1].param.eps_B != testsets[t].param.eps_B)
271 mask |= MASK_FSUM_EPSB;
272 }
273
274 return mask;
275}
276
277void strEscapeUnderscore(char *dst, char *src, int maxlen)
278{
279 int i = 0;
280 int len;
281 int offset = 0;
282
283 while (src[i] != '\0' && len + offset < maxlen - 1)
284 {
285 if (src[i] == '_')
286 len = snprintf(dst + offset, maxlen - offset, "\\_{}");
287 else
288 len = snprintf(dst + offset, maxlen - offset, "%c", src[i]);
289 offset += len;
290 i++;
291 }
292}
293
294void fastsum_get_plot_title_minus_indep(char *outstr, int maxlen,
295 char *hostname, s_param param, unsigned int diff_mask)
296{
297 unsigned int mask = ~diff_mask;
298 int offset = 0;
299 int len;
300
301 len = snprintf(outstr, maxlen, "%s", hostname);
302 if (len < 0 || len + offset >= maxlen - 1)
303 return;
304 offset += len;
305
306 if (mask & MASK_FSUM_D)
307 {
308 len = snprintf(outstr + offset, maxlen - offset, " %dd fastsum", param.d);
309 if (len < 0 || len + offset >= maxlen - 1)
310 return;
311 offset += len;
312 }
313
314 if ((mask & (MASK_FSUM_L | MASK_FSUM_M)) && param.L == param.M)
315 {
316 len = snprintf(outstr + offset, maxlen - offset, " L=M=%d", param.L);
317 if (len < 0 || len + offset >= maxlen - 1)
318 return;
319 offset += len;
320 }
321 else
322 {
323 if (mask & MASK_FSUM_L)
324 {
325 len = snprintf(outstr + offset, maxlen - offset, " L=%d", param.L);
326 if (len < 0 || len + offset >= maxlen - 1)
327 return;
328 offset += len;
329 }
330
331 if (mask & MASK_FSUM_M)
332 {
333 len = snprintf(outstr + offset, maxlen - offset, " M=%d", param.M);
334 if (len < 0 || len + offset >= maxlen - 1)
335 return;
336 offset += len;
337 }
338 }
339
340 if (mask & MASK_FSUM_MULTIBW)
341 {
342 len = snprintf(outstr + offset, maxlen - offset, " n=%d", param.n);
343 if (len < 0 || len + offset >= maxlen - 1)
344 return;
345 offset += len;
346 }
347
348 if (mask & MASK_FSUM_WINM)
349 {
350 len = snprintf(outstr + offset, maxlen - offset, " m=%d", param.m);
351 if (len < 0 || len + offset >= maxlen - 1)
352 return;
353 offset += len;
354 }
355
356 if (mask & MASK_FSUM_P)
357 {
358 len = snprintf(outstr + offset, maxlen - offset, " p=%d", param.p);
359 if (len < 0 || len + offset >= maxlen - 1)
360 return;
361 offset += len;
362 }
363
364 if (mask & MASK_FSUM_KERNEL)
365 {
366 char tmp[maxlen];
367 strEscapeUnderscore(tmp, param.kernel_name, maxlen);
368
369 len = snprintf(outstr + offset, maxlen - offset, " %s", tmp);
370 if (len < 0 || len + offset >= maxlen - 1)
371 return;
372 offset += len;
373 }
374
375 if ((mask & (MASK_FSUM_EPSI | MASK_FSUM_EPSB)) && param.eps_I == param.eps_B)
376 {
377 len = snprintf(outstr + offset, maxlen - offset,
378 " $\\varepsilon_\\mathrm{I}$=$\\varepsilon_\\mathrm{B}$=%" __FGS__ "",
379 param.eps_I);
380 if (len < 0 || len + offset >= maxlen - 1)
381 return;
382 offset += len;
383 }
384 else
385 {
386 if (mask & MASK_FSUM_EPSI)
387 {
388 len = snprintf(outstr + offset, maxlen - offset,
389 " $\\varepsilon_\\mathrm{I}$=%" __FGS__ "", param.eps_I);
390 if (len < 0 || len + offset >= maxlen - 1)
391 return;
392 offset += len;
393 }
394
395 if (mask & MASK_FSUM_EPSB)
396 {
397 len = snprintf(outstr + offset, maxlen - offset,
398 " $\\varepsilon_\\mathrm{B}$=%" __FGS__ "", param.eps_B);
399 if (len < 0 || len + offset >= maxlen - 1)
400 return;
401 offset += len;
402 }
403 }
404}
405
406void nfft_adjoint_print_output_histo_DFBRT(FILE *out, s_testset testset)
407{
408 int i, size = testset.nresults;
409 char hostname[1025];
410
411#ifdef HAVE_GETHOSTNAME
412 if (gethostname(hostname, 1024) != 0)
413#endif
414 strncpy(hostname, "unnamed", 1024);
415
416 fprintf(out, "\\begin{tikzpicture}\n");
417 fprintf(out, "\\begin{axis}[");
418 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
419 fprintf(out, "symbolic x coords={");
420 for (i = 0; i < size; i++)
421 if (i > 0)
422 fprintf(out, ",%d", testset.results[i].nthreads);
423 else
424 fprintf(out, "%d", testset.results[i].nthreads);
425
426 fprintf(out,
427 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
428 fprintf(out,
429 " title={%s %dd $\\textrm{NFFT}^\\top$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
430 hostname, testset.param.d, testset.param.n, testset.param.M,
431 testset.param.m);
432 fprintf(out, " ]\n");
433 fprintf(out, "\\addplot coordinates {");
434 for (i = 0; i < size; i++)
435 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
436 testset.results[i].resval[10].avg);
437 fprintf(out, "};\n");
438
439 fprintf(out, "\\addplot coordinates {");
440 for (i = 0; i < size; i++)
441 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
442 testset.results[i].resval[11].avg);
443 fprintf(out, "};\n");
444
445 fprintf(out, "\\addplot coordinates {");
446 for (i = 0; i < size; i++)
447 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
448 testset.results[i].resval[12].avg);
449 fprintf(out, "};\n");
450
451 fprintf(out, "\\addplot coordinates {");
452 for (i = 0; i < size; i++)
453 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
454 testset.results[i].resval[1].avg);
455 fprintf(out, "};\n");
456
457 fprintf(out, "\\addplot coordinates {");
458 for (i = 0; i < size; i++)
459 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
460 testset.results[i].resval[4].avg + testset.results[i].resval[1].avg);
461 fprintf(out, "};\n");
462 fprintf(out,
463 "\\legend{D,$\\textrm{F}^\\top$,$\\textrm{B}^\\top$,prepsi,total}\n");
464 fprintf(out, "\\end{axis}\n");
465 fprintf(out, "\\end{tikzpicture}\n");
466 fprintf(out, "\n\n");
467
468 fflush(out);
469}
470
471void nfft_trafo_print_output_histo_DFBRT(FILE *out, s_testset testset)
472{
473 int i, size = testset.nresults;
474 char hostname[1025];
475
476#ifdef HAVE_GETHOSTNAME
477 if (gethostname(hostname, 1024) != 0)
478#endif
479 strncpy(hostname, "unnamed", 1024);
480
481 fprintf(out, "\\begin{tikzpicture}\n");
482 fprintf(out, "\\begin{axis}[");
483 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
484 fprintf(out, "symbolic x coords={");
485 for (i = 0; i < size; i++)
486 if (i > 0)
487 fprintf(out, ",%d", testset.results[i].nthreads);
488 else
489 fprintf(out, "%d", testset.results[i].nthreads);
490
491 fprintf(out,
492 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
493 fprintf(out,
494 " title={%s %dd $\\textrm{NFFT}$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
495 hostname, testset.param.d, testset.param.n, testset.param.M,
496 testset.param.m);
497 fprintf(out, " ]\n");
498 fprintf(out, "\\addplot coordinates {");
499 for (i = 0; i < size; i++)
500 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
501 testset.results[i].resval[13].avg);
502 fprintf(out, "};\n");
503
504 fprintf(out, "\\addplot coordinates {");
505 for (i = 0; i < size; i++)
506 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
507 testset.results[i].resval[14].avg);
508 fprintf(out, "};\n");
509
510 fprintf(out, "\\addplot coordinates {");
511 for (i = 0; i < size; i++)
512 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
513 testset.results[i].resval[15].avg);
514 fprintf(out, "};\n");
515
516 fprintf(out, "\\addplot coordinates {");
517 for (i = 0; i < size; i++)
518 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
519 testset.results[i].resval[2].avg);
520 fprintf(out, "};\n");
521
522 fprintf(out, "\\addplot coordinates {");
523 for (i = 0; i < size; i++)
524 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
525 testset.results[i].resval[6].avg + testset.results[i].resval[2].avg);
526 fprintf(out, "};\n");
527 fprintf(out, "\\legend{D,F,B,prepsi,total}\n");
528 fprintf(out, "\\end{axis}\n");
529 fprintf(out, "\\end{tikzpicture}\n");
530 fprintf(out, "\n\n");
531
532 fflush(out);
533}
534
535void fastsum_print_output_histo_PreRfNfT(FILE *out, s_testset testset)
536{
537 int i, size = testset.nresults;
538 char hostname[1025];
539 char plottitle[1025];
540
541#ifdef HAVE_GETHOSTNAME
542 if (gethostname(hostname, 1024) != 0)
543#endif
544 strncpy(hostname, "unnamed", 1024);
545
546 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testset.param,
547 0);
548
549 fprintf(out, "\\begin{tikzpicture}\n");
550 fprintf(out, "\\begin{axis}[");
551 fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
552 fprintf(out, "symbolic x coords={");
553 for (i = 0; i < size; i++)
554 if (i > 0)
555 fprintf(out, ",%d", testset.results[i].nthreads);
556 else
557 fprintf(out, "%d", testset.results[i].nthreads);
558
559 fprintf(out,
560 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
561 fprintf(out, " title={%s}", plottitle);
562 fprintf(out, " ]\n");
563 fprintf(out, "\\addplot coordinates {");
564 for (i = 0; i < size; i++)
565 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
566 testset.results[i].resval[1].avg + testset.results[i].resval[2].avg);
567 fprintf(out, "};\n");
568
569 fprintf(out, "\\addplot coordinates {");
570 for (i = 0; i < size; i++)
571 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
572 testset.results[i].resval[3].avg);
573 fprintf(out, "};\n");
574
575 fprintf(out, "\\addplot coordinates {");
576 for (i = 0; i < size; i++)
577 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
578 testset.results[i].resval[4].avg + testset.results[i].resval[5].avg
579 + testset.results[i].resval[6].avg);
580 fprintf(out, "};\n");
581
582 fprintf(out, "\\addplot coordinates {");
583 for (i = 0; i < size; i++)
584 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
585 testset.results[i].resval[7].avg);
586 fprintf(out, "};\n");
587
588 fprintf(out, "\\addplot coordinates {");
589 for (i = 0; i < size; i++)
590 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
591 testset.results[i].resval[9].avg - testset.results[i].resval[0].avg);
592 fprintf(out, "};\n");
593 fprintf(out,
594 "\\legend{prepsi (step 1b),init nearfield (step 1c),far field (steps 2a-c),nearfield (step 2d),total $-$ step 1a}\n");
595 fprintf(out, "\\end{axis}\n");
596 fprintf(out, "\\end{tikzpicture}\n");
597 fprintf(out, "\n\n");
598
599 fflush(out);
600}
601
602void fastsum_print_output_speedup_total_minus_indep(FILE *out,
603 s_testset *testsets, int ntestsets)
604{
605 int i, t;
606 char hostname[1025];
607 char plottitle[1025];
608 unsigned int diff_mask = fastsum_determine_different_parameters(testsets,
609 ntestsets);
610
611#ifdef HAVE_GETHOSTNAME
612 if (gethostname(hostname, 1024) != 0)
613#endif
614 strncpy(hostname, "unnamed", 1024);
615
616 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname,
617 testsets[0].param, diff_mask | MASK_FSUM_WINM);
618
619 fprintf(out, "\\begin{tikzpicture}\n");
620 fprintf(out, "\\begin{axis}[");
621 fprintf(out,
622 "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
623 fprintf(out, " title={%s}", plottitle);
624 fprintf(out, " ]\n");
625
626 for (t = 0; t < ntestsets; t++)
627 {
628 s_testset testset = testsets[t];
629
630 R tref = K(0.0);
631 for (i = 0; i < testset.nresults; i++)
632 if (testset.results[i].nthreads == 1)
633 tref = testset.results[i].resval[9].avg
634 - testset.results[i].resval[0].avg;
635
636 fprintf(out, "\\addplot coordinates {");
637 for (i = 0; i < testset.nresults; i++)
638 fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
639 tref
640 / (testset.results[i].resval[9].avg
641 - testset.results[i].resval[0].avg));
642 fprintf(out, "};\n");
643
644 for (i = 0; i < testset.nresults; i++)
645 {
646 fprintf(stderr, "%d:%.3" __FIS__ " ", testset.results[i].nthreads,
647 tref
648 / (testset.results[i].resval[9].avg
649 - testset.results[i].resval[0].avg));
650 }
651 fprintf(stderr, "\n\n");
652 }
653
654 fprintf(out, "\\legend{{");
655 for (t = 0; t < ntestsets; t++)
656 {
657 char title[256];
658 if (t > 0)
659 fprintf(out, "},{");
660 fastsum_get_plot_title_minus_indep(title, 255, "", testsets[t].param,
661 ~(diff_mask | MASK_FSUM_WINM));
662 fprintf(out, "%s", title);
663 }
664 fprintf(out, "}}\n");
665 fprintf(out, "\\end{axis}\n");
666 fprintf(out, "\\end{tikzpicture}\n");
667 fprintf(out, "\n\n");
668
669 fflush(out);
670}
671
672void run_testset(s_testset *testset, int d, int L, int M, int n, int m, int p,
673 char *kernel_name, R c, R eps_I, R eps_B,
674 int *nthreads_array, int n_threads_array_size)
675{
676 int i;
677 testset->param.d = d;
678 testset->param.L = L;
679 testset->param.M = M;
680 testset->param.n = n;
681 testset->param.m = m;
682 testset->param.p = p;
683 testset->param.kernel_name = kernel_name;
684 testset->param.c = c;
685 testset->param.eps_I = eps_I;
686 testset->param.eps_B = eps_B;
687
688 testset->results = (s_result*) NFFT(malloc)(
689 (size_t)(n_threads_array_size) * sizeof(s_result));
690 testset->nresults = n_threads_array_size;
691
692 run_test_create(testset->param.d, testset->param.L, testset->param.M);
693 for (i = 0; i < n_threads_array_size; i++)
694 {
695 testset->results[i].nthreads = nthreads_array[i];
696 run_test(testset->results[i].resval, NREPEAT, testset->param.n,
697 testset->param.m, testset->param.p, testset->param.kernel_name,
698 testset->param.c, testset->param.eps_I, testset->param.eps_B,
699 testset->results[i].nthreads);
700 }
701
702}
703
704void test1(int *nthreads_array, int n_threads_array_size)
705{
706 s_testset testsets[1];
707
708#if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
709 run_testset(&testsets[0], 3, 100000, 100000, 128, 4, 7, "one_over_x", K(0.0), K(0.03125), K(0.03125), nthreads_array, n_threads_array_size);
710
711 fastsum_print_output_speedup_total_minus_indep(file_out_tex, testsets, 1);
712
713 fastsum_print_output_histo_PreRfNfT(file_out_tex, testsets[0]);
714
715 nfft_adjoint_print_output_histo_DFBRT(file_out_tex, testsets[0]);
716
717 nfft_trafo_print_output_histo_DFBRT(file_out_tex, testsets[0]);
718#endif
719}
720
721int main(int argc, char** argv)
722{
723 int *nthreads_array;
724 int n_threads_array_size = get_nthreads_array(&nthreads_array);
725 int k;
726
727#if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
728 fprintf(stderr, "WARNING: Detailed time measurements are not activated.\n");
729 fprintf(stderr, "Please re-run the configure script with options\n");
730 fprintf(stderr,
731 "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
732 fprintf(stderr, "and run \"make clean all\"\n\n");
733#endif
734
735 for (k = 0; k < n_threads_array_size; k++)
736 fprintf(stderr, "%d ", nthreads_array[k]);
737 fprintf(stderr, "\n");
738
739 file_out_tex = fopen("fastsum_benchomp_results_plots.tex", "w");
740
741 test1(nthreads_array, n_threads_array_size);
742
743 fclose(file_out_tex);
744
745 return EXIT_SUCCESS;
746}
747
#define PRE_ONE_PSI
Definition nfft3.h:200
#define PRE_PSI
Definition nfft3.h:191
Internal header file for auxiliary definitions and functions.
Header file for the nfft3 library.