Skip to content

Commit

Permalink
add 1 to degree for larger-w upsampfac=1.25 ker eval, recovers prior …
Browse files Browse the repository at this point in the history
…accuracy
  • Loading branch information
ahbarnett committed Jul 22, 2024
1 parent 50c5a35 commit b32929c
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 15 deletions.
4 changes: 2 additions & 2 deletions devel/gen_all_horner_C_code.m
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
opts = struct();

ws = 2:16;
upsampfac = 2.0; % sigma (upsampling): either 2 (default) or low (eg 5/4)
upsampfac = 1.25; % sigma (upsampling): either 2 (default) or low (eg 5/4)
opts.wpad = true; % pad kernel eval to multiple of 4

if upsampfac==2, fid = fopen('../src/ker_horner_allw_loop_constexpr.c','w');
Expand All @@ -28,7 +28,7 @@
gamma=0.97; % safety factor
betaoverws = gamma*pi*(1-1/(2*upsampfac)); % from cutoff freq formula
beta = betaoverws * w;
d = ceil(0.55*w+2.2); % from ker_ppval_coeff_mat expts
d = ceil(0.6*w+2.2); % from ker_ppval_coeff_mat expts
end
str = gen_ker_horner_loop_C_code(w,d,beta,opts);
if j==1 % write switch statement
Expand Down
4 changes: 2 additions & 2 deletions devel/gen_all_horner_cpp_header.m
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
get_nc_code = 'w + 2 + (w <= 8)'; % must be C++ expression for nc=d+1, not d
elseif upsampfac==1.25
fid = fopen('../src/ker_lowupsampfac_horner_allw_loop_constexpr.h','w');
get_nc_code = 'std::ceil(0.55*w + 3.2)'; % must be C++ code for nc=d+1, not d
get_nc_code = 'std::ceil(0.6*w + 3.2)'; % must be C++ code for nc=d+1, not d
end
fwrite(fid,sprintf('// Header of static arrays of monomial coeffs of spreading kernel function in each\n'));
fwrite(fid,sprintf('// fine-grid interval. Generated by gen_all_horner_cpp_header.m in finufft/devel\n'));
Expand All @@ -39,7 +39,7 @@
gamma=0.97; % safety factor
betaoverws = gamma*pi*(1-1/(2*upsampfac)); % from cutoff freq formula
beta = betaoverws * w;
d = ceil(0.55*w+2.2); % less, since beta smaller, smoother
d = ceil(0.6*w+2.2); % less, since beta smaller, smoother
end

str = gen_ker_horner_loop_cpp_code(w,d,beta,opts); % code strings for this w
Expand Down
7 changes: 4 additions & 3 deletions devel/ker_ppval_coeff_mat.m
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
% is the local variable in 1/w units about each grid pt.
%
% Notes: the self-test of this function is useful to tweak d (degree) for
% each w.
% each w. Also once FINUFFT CPU compiled, run test/checkallaccs.sh

% Barnett 4/23/18. Test all w, and rescale to 1 max val, 7/21/24
if nargin==0, test_ker_ppval_coeff_mat; return; end
Expand All @@ -41,7 +41,7 @@

%%%%%%
function test_ker_ppval_coeff_mat
sigma = 2.0; % upsampfac
sigma = 1.25; % upsampfac
for w=2:16
if sigma==2.0 % we test the rules used in setup_spreader...
betaoverws = [2.20 2.26 2.38 2.30];
Expand All @@ -53,7 +53,8 @@
betaoverns = gamma*pi*(1-1/(2*sigma)); % actually good for any sigma
beta=betaoverns*w;
tol = exp(-pi*w*sqrt(1-1/sigma));
d = ceil(0.55*w+2.2); % hacking the degree rule so error << tol
d = ceil(0.6*w+2.2); % hacking the degree rule so error << tol
% (and needs a little more acc at large w)
end
f = @(z) exp(beta*(sqrt(1-z.^2)-1)); % must match the above, to test

Expand Down
18 changes: 12 additions & 6 deletions src/ker_lowupsampfac_horner_allw_loop_constexpr.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
FLT c3[] = {1.8780973157032109E-02, -3.8322611720715674E-02, 2.0186098450281545E-17, 3.8322611720715723E-02, -1.8780973157032105E-02, 0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00};
FLT c4[] = {-2.3306908700109395E-05, -8.3858973028988343E-03, 1.4886952481383780E-02, -8.3858973028988516E-03, -2.3306908700108859E-05, 0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00};
FLT c5[] = {-1.5212353034889752E-03, 1.7151925122365424E-03, 8.6737859434123232E-18, -1.7151925122365892E-03, 1.5212353034889810E-03, 0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00};
for (int i=0; i<8; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i])))));
FLT c6[] = {-2.3519362630789253E-04, 5.4254817714690881E-04, -7.2101024641135097E-04, 5.4254817714689439E-04, -2.3519362630789972E-04, 0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00};
for (int i=0; i<8; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i]))))));
} else if constexpr(w==6) {
FLT c0[] = {7.3992041846532757E-03, 2.2998056434514016E-01, 8.5775196559356104E-01, 8.5775196559356104E-01, 2.2998056434514022E-01, 7.3992041847816149E-03, 0.0000000000000000E+00, 0.0000000000000000E+00};
FLT c1[] = {2.0397684222696253E-02, 2.4277466601214734E-01, 2.6509440217151270E-01, -2.6509440217151259E-01, -2.4277466601214734E-01, -2.0397684222557694E-02, 0.0000000000000000E+00, 0.0000000000000000E+00};
Expand Down Expand Up @@ -81,7 +82,8 @@
FLT c6[] = {5.7780052154064923E-06, -1.5636835808662441E-05, -1.6121807313048698E-05, 8.1230533420432682E-05, -5.5456530742837516E-05, -5.5456530742837516E-05, 8.1230533420447061E-05, -1.6121807313052293E-05, -1.5636835808664237E-05, 5.7780052154064500E-06, 0.0000000000000000E+00, 0.0000000000000000E+00};
FLT c7[] = {2.7742147829396434E-07, -3.2550081973300800E-06, 5.9212960378052008E-06, 8.5495977199931152E-07, -1.3248468528067400E-05, 1.3248468528201922E-05, -8.5495977192042543E-07, -5.9212960378031248E-06, 3.2550081973319486E-06, -2.7742147829393835E-07, 0.0000000000000000E+00, 0.0000000000000000E+00};
FLT c8[] = {-1.2089379439830882E-07, -3.4743143854854381E-08, 8.2889801007304441E-07, -1.5830293785166221E-06, 8.7461219396318958E-07, 8.7461219390199166E-07, -1.5830293786451377E-06, 8.2889801007763427E-07, -3.4743143856671194E-08, -1.2089379439833272E-07, 0.0000000000000000E+00, 0.0000000000000000E+00};
for (int i=0; i<12; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i]))))))));
FLT c9[] = {-2.5033479260931243E-08, 6.3042298325822974E-08, -5.2303271559903752E-08, -7.6226091793981470E-08, 2.3316553106919865E-07, -2.3316553114670068E-07, 7.6226091852107994E-08, 5.2303271559903752E-08, -6.3042298325822974E-08, 2.5033479260954218E-08, 0.0000000000000000E+00, 0.0000000000000000E+00};
for (int i=0; i<12; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i] + z*(c9[i])))))))));
} else if constexpr(w==11) {
FLT c0[] = {8.0191950887587672E-06, 1.8211144887695901E-03, 3.8565497751765716E-02, 2.5236459439543668E-01, 7.1517256669690443E-01, 1.0000000000000000E+00, 7.1517256669690399E-01, 2.5236459439543651E-01, 3.8565497751765709E-02, 1.8211144887695910E-03, 8.0191950887586656E-06, 0.0000000000000000E+00};
FLT c1[] = {3.1996260415636094E-05, 3.5282769389657653E-03, 4.5889527487056471E-02, 1.8012194355267486E-01, 2.4178022040260394E-01, -1.4849044935138820E-16, -2.4178022040260408E-01, -1.8012194355267491E-01, -4.5889527487056485E-02, -3.5282769389657670E-03, -3.1996260415635877E-05, 0.0000000000000000E+00};
Expand All @@ -105,7 +107,8 @@
FLT c7[] = {2.1206307767330490E-07, -4.5869687934425177E-07, -1.3462277877572238E-06, 4.2970047520095079E-06, -1.1214870287414941E-06, -6.9831974682611276E-06, 6.9831974682960042E-06, 1.1214870288062637E-06, -4.2970047519858427E-06, 1.3462277877584693E-06, 4.5869687934430365E-07, -2.1206307766916437E-07};
FLT c8[] = {1.5395324498811026E-08, -1.2022118042098672E-07, 1.5464523856461759E-07, 2.7605497715117822E-07, -8.4964626030792186E-07, 5.2067203455623376E-07, 5.2067203460519205E-07, -8.4964626028956253E-07, 2.7605497715882793E-07, 1.5464523855945402E-07, -1.2022118042095684E-07, 1.5395324502815186E-08};
FLT c9[] = {-2.0816585198663231E-09, -6.8192670392721950E-09, 3.6338774646281261E-08, -4.9464521005206807E-08, -1.3242031043825771E-08, 1.0671664853011416E-07, -1.0671664860484826E-07, 1.3242031029986123E-08, 4.9464520965071825E-08, -3.6338774641091391E-08, 6.8192670390235131E-09, 2.0816585232936298E-09};
for (int i=0; i<12; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i] + z*(c9[i])))))))));
FLT c10[] = {-6.3791929313927889E-10, 1.2240176129392066E-09, 5.3586929655729871E-10, -6.2807356207213370E-09, 1.0600657345063913E-08, -5.5585209137321311E-09, -5.5585209024191334E-09, 1.0600657351506037E-08, -6.2807355779833450E-09, 5.3586929058654981E-10, 1.2240176130570502E-09, -6.3791928984364409E-10};
for (int i=0; i<12; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i] + z*(c9[i] + z*(c10[i]))))))))));
} else if constexpr(w==13) {
FLT c0[] = {4.4408051211162671E-07, 1.8756193861873413E-04, 6.5146989208011699E-03, 6.8352802598867848E-02, 3.1564238810082496E-01, 7.5353649746793927E-01, 9.9999999999999944E-01, 7.5353649746793827E-01, 3.1564238810082473E-01, 6.8352802598867710E-02, 6.5146989208011664E-03, 1.8756193861873272E-04, 4.4408051211162740E-07, 0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00};
FLT c1[] = {1.9487148068106048E-06, 4.1285069961250690E-04, 9.2995630713278779E-03, 6.5021145064983535E-02, 1.8663042875530000E-01, 2.1451870821533811E-01, -4.2425842671825291E-17, -2.1451870821533800E-01, -1.8663042875529998E-01, -6.5021145064983424E-02, -9.2995630713278762E-03, -4.1285069961250441E-04, -1.9487148068106044E-06, 0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00};
Expand All @@ -131,7 +134,8 @@
FLT c8[] = {6.5041263396090684E-09, -9.9149367808892008E-09, -6.6845758889566122E-08, 1.6286641993591861E-07, 5.8507874937330077E-08, -4.7688540979254475E-07, 3.2559878513865341E-07, 3.2559878505297634E-07, -4.7688540973134683E-07, 5.8507875016887355E-08, 1.6286641992979879E-07, -6.6845758890092050E-08, -9.9149367809190818E-09, 6.5041263397797216E-09, 0.0000000000000000E+00, 0.0000000000000000E+00};
FLT c9[] = {5.5138523621058489E-10, -3.4792607432848048E-09, 2.1621109683219443E-09, 1.6802313214377317E-08, -3.4440501477287078E-08, 3.6408052006210212E-09, 5.4274262219974878E-08, -5.4274262276717443E-08, -3.6408052283003184E-09, 3.4440501466215358E-08, -1.6802313213339344E-08, -2.1621109680192019E-09, 3.4792607432685862E-09, -5.5138523606432426E-10, 0.0000000000000000E+00, 0.0000000000000000E+00};
FLT c10[] = {-2.3785683828648566E-11, -2.9453404128779668E-10, 1.0997757892316608E-09, -8.6020469050217690E-10, -2.2974593148638725E-09, 5.5064436862062262E-09, -3.1470906039204597E-09, -3.1470906435159520E-09, 5.5064436312124872E-09, -2.2974593224058709E-09, -8.6020468893092726E-10, 1.0997757877782549E-09, -2.9453404132462281E-10, -2.3785683688841006E-11, 0.0000000000000000E+00, 0.0000000000000000E+00};
for (int i=0; i<16; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i] + z*(c9[i] + z*(c10[i]))))))))));
FLT c11[] = {-1.2240623323565210E-11, 1.4269095045857491E-11, 6.3689195678622977E-11, -2.3523039312409202E-10, 2.6546832599550726E-10, 9.4137124835858634E-11, -5.6473808447746174E-10, 5.6473802485264837E-10, -9.4137192268683287E-11, -2.6546836370465266E-10, 2.3523038893615868E-10, -6.3689194596148682E-11, -1.4269094972657386E-11, 1.2240623457626768E-11, 0.0000000000000000E+00, 0.0000000000000000E+00};
for (int i=0; i<16; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i] + z*(c9[i] + z*(c10[i] + z*(c11[i])))))))))));
} else if constexpr(w==15) {
FLT c0[] = {2.3183302143948740E-08, 1.7202745817468638E-05, 9.2668857465754892E-04, 1.4607490553401940E-02, 1.0130044556641118E-01, 3.7041488405244688E-01, 7.8279781886019206E-01, 1.0000000000000011E+00, 7.8279781886019195E-01, 3.7041488405244721E-01, 1.0130044556641132E-01, 1.4607490553401953E-02, 9.2668857465754838E-04, 1.7202745817468631E-05, 2.3183302143948743E-08, 0.0000000000000000E+00};
FLT c1[] = {1.1019919454791575E-07, 4.1938159428224133E-05, 1.5154850601194975E-03, 1.6839357628952684E-02, 8.0835952724673241E-02, 1.8739074372244102E-01, 1.9255567517255726E-01, 1.6956773054418529E-31, -1.9255567517255731E-01, -1.8739074372244113E-01, -8.0835952724673366E-02, -1.6839357628952709E-02, -1.5154850601194975E-03, -4.1938159428224133E-05, -1.1019919454791579E-07, 0.0000000000000000E+00};
Expand All @@ -145,7 +149,8 @@
FLT c9[] = {3.5447644664515603E-10, -1.1390658479631380E-09, -2.4324028601744043E-09, 1.2152005525649129E-08, -7.1102518397187493E-09, -2.5878341881540945E-08, 4.0855407208672653E-08, -5.2935793078520102E-17, -4.0855407200368865E-08, 2.5878341983954342E-08, 7.1102518798537297E-09, -1.2152005534644900E-08, 2.4324028599797840E-09, 1.1390658479621243E-09, -3.5447644664522991E-10, 0.0000000000000000E+00};
FLT c10[] = {1.6106092880560131E-11, -1.9612809867391534E-10, 3.3667881343327413E-10, 5.4740705721569177E-10, -2.3219918274241966E-09, 1.8783264065861090E-09, 2.1531914801939135E-09, -4.8374639374556914E-09, 2.1531914732804149E-09, 1.8783263688761161E-09, -2.3219918425081937E-09, 5.4740705894406642E-10, 3.3667881359039910E-10, -1.9612809867391534E-10, 1.6106092880566125E-11, 0.0000000000000000E+00};
FLT c11[] = {-2.9809392328870459E-12, -8.3268200106445154E-12, 5.7687950421418410E-11, -9.1929199328063136E-11, -3.9289939147449750E-11, 3.0713723883726984E-10, -3.5332678542514975E-10, -2.7146437259190364E-19, 3.5332673644762448E-10, -3.0713734467131358E-10, 3.9289960193589238E-11, 9.1929195849949024E-11, -5.7687950527891289E-11, 8.3268200078717850E-12, 2.9809392328263928E-12, 0.0000000000000000E+00};
for (int i=0; i<16; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i] + z*(c9[i] + z*(c10[i] + z*(c11[i])))))))))));
FLT c12[] = {-6.7275763610714952E-13, 1.4037883799551298E-12, 1.0122748703359079E-12, -1.0507011558415453E-11, 1.9186622029950655E-11, -7.9757821000147658E-12, -2.2999231890282221E-11, 4.0853143156922655E-11, -2.2999289058288172E-11, -7.9759209366006470E-12, 1.9186575580945818E-11, -1.0507009389093798E-11, 1.0122747666550936E-12, 1.4037883779612681E-12, -6.7275763607599545E-13, 0.0000000000000000E+00};
for (int i=0; i<16; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i] + z*(c9[i] + z*(c10[i] + z*(c11[i] + z*(c12[i]))))))))))));
} else if constexpr(w==16) {
FLT c0[] = {5.2012152104083984E-09, 5.0291159580938677E-06, 3.3201112337137920E-04, 6.3015433246683353E-03, 5.2427915343763412E-02, 2.3104762006593377E-01, 5.9521037322997217E-01, 9.4441119081353875E-01, 9.4441119081353875E-01, 5.9521037322997217E-01, 2.3104762006593377E-01, 5.2427915343763412E-02, 6.3015433246683353E-03, 3.3201112337137920E-04, 5.0291159580938694E-06, 5.2012152104083868E-09};
FLT c1[] = {2.5620581163903708E-08, 1.2815874111792787E-05, 5.7471335914300670E-04, 7.8386860177525539E-03, 4.6638901641906962E-02, 1.3897554029141571E-01, 2.0773808644544137E-01, 1.0813440420918320E-01, -1.0813440420918335E-01, -2.0773808644544145E-01, -1.3897554029141571E-01, -4.6638901641906975E-02, -7.8386860177525539E-03, -5.7471335914300648E-04, -1.2815874111792787E-05, -2.5620581163903721E-08};
Expand All @@ -159,6 +164,7 @@
FLT c9[] = {1.7210848751139206E-10, -1.3819378018485677E-10, -2.4707116695746685E-09, 4.6626394244300632E-09, 6.2513494738369478E-09, -2.2225751670676472E-08, 7.2716681748129466E-09, 2.9914504847745951E-08, -2.9914504925247984E-08, -7.2716681969563846E-09, 2.2225751655452858E-08, -6.2513494779888428E-09, -4.6626394252950410E-09, 2.4707116695043892E-09, 1.3819378018409654E-10, -1.7210848751141845E-10};
FLT c10[] = {1.5548426850867747E-11, -8.2967690041035768E-11, -2.0776280275005410E-11, 6.5818716252940090E-10, -9.7473366764093964E-10, -7.2114134421445299E-10, 2.9974008586911667E-09, -1.8729407766830212E-09, -1.8729408099935145E-09, 2.9974008676571101E-09, -7.2114133321570515E-10, -9.7473366606969001E-10, 6.5818716284365085E-10, -2.0776280294646031E-11, -8.2967690039501348E-11, 1.5548426850871941E-11};
FLT c11[] = {1.7715918240672815E-14, -8.7094275514577869E-12, 2.5402078534858863E-11, 5.6643120203537577E-13, -1.1273397749808333E-10, 1.7831198930961025E-10, 2.2123190757406476E-13, -2.7985827080469500E-10, 2.7985821912985675E-10, -2.2124766556045869E-13, -1.7831199578671051E-10, 1.1273397565255340E-10, -5.6643233774610691E-13, -2.5402078583658933E-11, 8.7094275509032414E-12, -1.7715918237423519E-14};
for (int i=0; i<16; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i] + z*(c9[i] + z*(c10[i] + z*(c11[i])))))))))));
FLT c12[] = {-2.1496737417083317E-13, -2.2214974042200800E-14, 2.3291735717266144E-12, -5.9732917765233235E-12, 3.0556712628179253E-12, 1.1858122635605482E-11, -2.4316415414833160E-11, 1.3235499986994189E-11, 1.3235536737855158E-11, -2.4316433790263641E-11, 1.1858112427032992E-11, 3.0556697315320517E-12, -5.9732921593447914E-12, 2.3291735677388905E-12, -2.2214973792968073E-14, -2.1496737416207108E-13};
for (int i=0; i<16; i++) ker[i] = c0[i] + z*(c1[i] + z*(c2[i] + z*(c3[i] + z*(c4[i] + z*(c5[i] + z*(c6[i] + z*(c7[i] + z*(c8[i] + z*(c9[i] + z*(c10[i] + z*(c11[i] + z*(c12[i]))))))))))));
} else
printf("width not implemented!\n");
Loading

0 comments on commit b32929c

Please sign in to comment.