-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from swag-kaust/Mahesh
GPU pragmas are now available
- Loading branch information
Showing
28 changed files
with
521 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,16 @@ | ||
#---- general compilation | ||
|
||
cd ../src | ||
make | ||
#make sofi3D | ||
|
||
make clean | ||
make -j 8 sofi3D | ||
|
||
#make clean | ||
# make sofi3D | ||
# make sofi3D | ||
|
||
#make snapmerge | ||
cd ../par | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
==45842== Profiling result: | ||
Time(%) Time Calls Avg Min Max Name | ||
25.86% 15.4165s 76 202.85ms 198.15ms 205.01ms update_s_elastic_159_gpu | ||
12.69% 7.56531s 76 99.544ms 98.223ms 100.82ms update_v_126_gpu | ||
9.41% 5.60882s 76 73.800ms 69.868ms 81.132ms update_s_CPML_elastic_591_gpu | ||
7.49% 4.46770s 76 58.786ms 48.310ms 80.522ms update_s_CPML_elastic_479_gpu | ||
6.84% 4.07530s 76 53.622ms 53.373ms 53.846ms update_s_CPML_elastic_393_gpu | ||
6.81% 4.06096s 76 53.434ms 52.938ms 53.937ms update_s_CPML_elastic_311_gpu | ||
5.10% 3.04203s 76 40.027ms 39.876ms 40.315ms update_v_CPML_407_gpu | ||
4.92% 2.93369s 76 38.601ms 38.386ms 38.838ms update_v_CPML_314_gpu | ||
4.83% 2.87799s 76 37.868ms 37.593ms 38.236ms update_v_CPML_258_gpu | ||
3.54% 2.11239s 76 27.795ms 25.102ms 40.658ms update_v_CPML_371_gpu | ||
3.29% 1.96067s 76 25.798ms 25.554ms 26.036ms update_s_CPML_elastic_98_gpu | ||
3.26% 1.94498s 76 25.592ms 25.434ms 25.719ms update_s_CPML_elastic_202_gpu | ||
2.34% 1.39695s 76 18.381ms 18.205ms 18.545ms update_v_CPML_105_gpu | ||
2.32% 1.38053s 76 18.165ms 18.065ms 18.257ms update_v_CPML_181_gpu | ||
0.65% 390.27ms 10704 36.459us 4.4480us 67.648us [CUDA memcpy DtoH] | ||
0.64% 381.92ms 1736 220.00us 3.0720us 1.9478ms [CUDA memcpy HtoD] | ||
0.00% 833.16us 6 138.86us 135.97us 146.79us [CUDA memcpy DtoD] | ||
|
||
==43017== Profiling application: ../bin/sofi3D ./in_and_out/sofi3D.json | ||
==43017== Profiling result: | ||
Time(%) Time Calls Avg Min Max Name | ||
26.40% 15.4008s 76 202.64ms 198.09ms 205.20ms update_s_elastic_159_gpu | ||
12.95% 7.55518s 76 99.410ms 98.122ms 100.59ms update_v_126_gpu | ||
9.58% 5.58590s 76 73.499ms 70.073ms 81.162ms update_s_CPML_elastic_588_gpu | ||
7.63% 4.45298s 76 58.592ms 48.052ms 80.914ms update_s_CPML_elastic_476_gpu | ||
6.98% 4.07094s 76 53.565ms 53.331ms 53.780ms update_s_CPML_elastic_390_gpu | ||
6.95% 4.05447s 76 53.348ms 52.808ms 53.755ms update_s_CPML_elastic_308_gpu | ||
5.21% 3.03771s 76 39.970ms 39.731ms 40.178ms update_v_CPML_407_gpu | ||
5.03% 2.93152s 76 38.573ms 38.361ms 38.887ms update_v_CPML_314_gpu | ||
4.93% 2.87687s 76 37.854ms 37.597ms 38.148ms update_v_CPML_258_gpu | ||
3.63% 2.11601s 76 27.842ms 25.033ms 40.485ms update_v_CPML_371_gpu | ||
3.33% 1.94405s 76 25.580ms 25.464ms 25.723ms update_s_CPML_elastic_199_gpu | ||
2.39% 1.39591s 76 18.367ms 18.213ms 18.519ms update_v_CPML_105_gpu | ||
2.36% 1.37828s 76 18.135ms 18.036ms 18.252ms update_v_CPML_181_gpu | ||
1.35% 786.87ms 76 10.354ms 10.282ms 10.418ms update_s_CPML_elastic_98_gpu | ||
0.65% 379.43ms 10704 35.447us 4.4480us 51.232us [CUDA memcpy DtoH] | ||
0.63% 369.39ms 1736 212.78us 3.0400us 942.06us [CUDA memcpy HtoD] | ||
0.00% 828.32us 6 138.05us 136.54us 140.48us [CUDA memcpy DtoD] | ||
|
||
==126591== Profiling result: | ||
Time(%) Time Calls Avg Min Max Name | ||
13.24% 6.31063s 76 83.035ms 82.987ms 83.086ms update_s_elastic_159_gpu | ||
12.30% 5.86413s 76 77.160ms 70.381ms 82.308ms update_s_CPML_elastic_589_gpu | ||
11.36% 5.41593s 76 71.262ms 71.229ms 71.292ms update_v_126_gpu | ||
9.70% 4.62594s 76 60.868ms 48.595ms 81.910ms update_s_CPML_elastic_477_gpu | ||
8.66% 4.12933s 76 54.333ms 54.136ms 54.514ms update_s_CPML_elastic_391_gpu | ||
8.53% 4.06766s 76 53.522ms 53.099ms 53.829ms update_s_CPML_elastic_309_gpu | ||
6.40% 3.05003s 76 40.132ms 32.902ms 40.566ms update_v_CPML_407_gpu | ||
6.15% 2.93372s 76 38.602ms 38.411ms 38.855ms update_v_CPML_314_gpu | ||
6.03% 2.87349s 76 37.809ms 37.478ms 38.172ms update_v_CPML_258_gpu | ||
4.45% 2.11982s 76 27.892ms 25.263ms 40.809ms update_v_CPML_371_gpu | ||
4.09% 1.95082s 76 25.669ms 25.567ms 25.863ms update_s_CPML_elastic_200_gpu | ||
2.94% 1.40140s 76 18.440ms 18.293ms 18.567ms update_v_CPML_105_gpu | ||
2.90% 1.38065s 76 18.166ms 18.069ms 18.215ms update_v_CPML_181_gpu | ||
1.66% 791.74ms 76 10.418ms 10.363ms 10.504ms update_s_CPML_elastic_98_gpu | ||
0.81% 384.25ms 10710 35.877us 4.5120us 46.912us [CUDA memcpy DtoH] | ||
0.77% 367.67ms 1739 211.43us 3.1040us 454.66us [CUDA memcpy HtoD] | ||
0.00% 826.37us 6 137.73us 136.10us 138.53us [CUDA memcpy DtoD] | ||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
|
||
|
||
double *time_v_update | ||
time_v_update = dvector(1, NT); | ||
|
||
double *time_s_update | ||
time_s_update = dvector(1, NT); | ||
|
||
double *time_s_exchane | ||
time_s_exchange = dvector(1, NT); | ||
|
||
double *time_v_exchange | ||
time_v_exchange = dvector(1, NT); |
Binary file not shown.
Oops, something went wrong.