@@ -34,10 +34,7 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
34
34
nrn_ion_global_map_copyto_device ();
35
35
36
36
#ifdef UNIFIED_MEMORY
37
-
38
- int i;
39
-
40
- for (i = 0 ; i < nthreads; i++) {
37
+ for (int i = 0 ; i < nthreads; i++) {
41
38
NrnThread* nt = threads + i; // NrnThread on host
42
39
43
40
if (nt->n_presyn ) {
@@ -64,8 +61,6 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
64
61
}
65
62
66
63
#else
67
- int i;
68
-
69
64
/* -- copy NrnThread to device. this needs to be contigious vector because offset is used to
70
65
* find
71
66
* corresponding NrnThread using Point_process in NET_RECEIVE block
@@ -78,7 +73,7 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
78
73
79
74
/* pointers for data struct on device, starting with d_ */
80
75
81
- for (i = 0 ; i < nthreads; i++) {
76
+ for (int i = 0 ; i < nthreads; i++) {
82
77
NrnThread* nt = threads + i; // NrnThread on host
83
78
NrnThread* d_nt = d_threads + i; // NrnThread on device
84
79
if (!nt->compute_gpu ) {
@@ -140,23 +135,20 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
140
135
141
136
/* -- copy NrnThreadMembList list ml to device -- */
142
137
143
- NrnThreadMembList* tml;
144
- NrnThreadMembList* d_tml;
145
138
NrnThreadMembList* d_last_tml;
146
139
147
- Memb_list* d_ml;
148
- int first_tml = 1 ;
140
+ bool first_tml = true ;
149
141
size_t offset = 6 * ne;
150
142
151
- for (tml = nt->tml ; tml; tml = tml->next ) {
143
+ for (auto tml = nt->tml ; tml; tml = tml->next ) {
152
144
/* copy tml to device*/
153
145
/* QUESTIONS: does tml will point to nullptr as in host ? : I assume so!*/
154
- d_tml = (NrnThreadMembList*)acc_copyin (tml, sizeof (NrnThreadMembList));
146
+ auto d_tml = (NrnThreadMembList*)acc_copyin (tml, sizeof (NrnThreadMembList));
155
147
156
148
/* first tml is pointed by nt */
157
149
if (first_tml) {
158
150
acc_memcpy_to_device (&(d_nt->tml ), &d_tml, sizeof (NrnThreadMembList*));
159
- first_tml = 0 ;
151
+ first_tml = false ;
160
152
} else {
161
153
/* rest of tml forms linked list */
162
154
acc_memcpy_to_device (&(d_last_tml->next ), &d_tml, sizeof (NrnThreadMembList*));
@@ -166,7 +158,7 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
166
158
d_last_tml = d_tml;
167
159
168
160
/* now for every tml, there is a ml. copy that and setup pointer */
169
- d_ml = (Memb_list*)acc_copyin (tml->ml , sizeof (Memb_list));
161
+ auto d_ml = (Memb_list*)acc_copyin (tml->ml , sizeof (Memb_list));
170
162
acc_memcpy_to_device (&(d_tml->ml ), &d_ml, sizeof (Memb_list*));
171
163
172
164
/* setup nt._ml_list */
@@ -519,9 +511,7 @@ static void net_receive_buffer_order(NetReceiveBuffer_t* nrb) {
519
511
* functional version.
520
512
*/
521
513
void update_net_receive_buffer (NrnThread* nt) {
522
- NrnThreadMembList* tml;
523
-
524
- for (tml = nt->tml ; tml; tml = tml->next ) {
514
+ for (auto tml = nt->tml ; tml; tml = tml->next ) {
525
515
// net_receive buffer to copy
526
516
NetReceiveBuffer_t* nrb = tml->ml ->_net_receive_buffer ;
527
517
@@ -574,10 +564,7 @@ void update_nrnthreads_on_host(NrnThread* threads, int nthreads) {
574
564
575
565
printf (" \n --- Copying to Host! --- \n " );
576
566
577
- int i;
578
- NetReceiveBuffer_t* nrb;
579
-
580
- for (i = 0 ; i < nthreads; i++) {
567
+ for (int i = 0 ; i < nthreads; i++) {
581
568
NrnThread* nt = threads + i;
582
569
583
570
if (nt->compute_gpu && (nt->end > 0 )) {
@@ -598,8 +585,7 @@ void update_nrnthreads_on_host(NrnThread* threads, int nthreads) {
598
585
/* @todo: nt._ml_list[tml->index] = tml->ml; */
599
586
600
587
/* -- copy NrnThreadMembList list ml to host -- */
601
- NrnThreadMembList* tml;
602
- for (tml = nt->tml ; tml; tml = tml->next ) {
588
+ for (auto tml = nt->tml ; tml; tml = tml->next ) {
603
589
Memb_list* ml = tml->ml ;
604
590
605
591
acc_update_self (&tml->index , sizeof (int ));
@@ -625,7 +611,7 @@ void update_nrnthreads_on_host(NrnThread* threads, int nthreads) {
625
611
acc_update_self (ml->pdata , pcnt * sizeof (int ));
626
612
}
627
613
628
- nrb = tml->ml ->_net_receive_buffer ;
614
+ auto nrb = tml->ml ->_net_receive_buffer ;
629
615
630
616
if (nrb) {
631
617
acc_update_self (&nrb->_cnt , sizeof (int ));
@@ -674,10 +660,7 @@ void update_nrnthreads_on_device(NrnThread* threads, int nthreads) {
674
660
675
661
printf (" \n --- Copying to Device! --- \n " );
676
662
677
- int i;
678
- NetReceiveBuffer_t* nrb;
679
-
680
- for (i = 0 ; i < nthreads; i++) {
663
+ for (int i = 0 ; i < nthreads; i++) {
681
664
NrnThread* nt = threads + i;
682
665
683
666
if (nt->compute_gpu && (nt->end > 0 )) {
@@ -698,21 +681,19 @@ void update_nrnthreads_on_device(NrnThread* threads, int nthreads) {
698
681
/* @todo: nt._ml_list[tml->index] = tml->ml; */
699
682
700
683
/* -- copy NrnThreadMembList list ml to host -- */
701
- NrnThreadMembList* tml;
702
- for (tml = nt->tml ; tml; tml = tml->next ) {
684
+ for (auto tml = nt->tml ; tml; tml = tml->next ) {
703
685
Memb_list* ml = tml->ml ;
704
686
int type = tml->index ;
705
687
int n = ml->nodecount ;
706
688
int szp = corenrn.get_prop_param_size ()[type];
707
689
int szdp = corenrn.get_prop_dparam_size ()[type];
708
- int is_art = corenrn.get_is_artificial ()[type];
709
690
int layout = corenrn.get_mech_data_layout ()[type];
710
691
711
692
int pcnt = nrn_soa_padded_size (n, layout) * szp;
712
693
713
694
acc_update_device (ml->data , pcnt * sizeof (double ));
714
695
715
- if (!is_art ) {
696
+ if (!corenrn. get_is_artificial ()[type] ) {
716
697
acc_update_device (ml->nodeindices , n * sizeof (int ));
717
698
}
718
699
@@ -721,7 +702,7 @@ void update_nrnthreads_on_device(NrnThread* threads, int nthreads) {
721
702
acc_update_device (ml->pdata , pcnt * sizeof (int ));
722
703
}
723
704
724
- nrb = tml->ml ->_net_receive_buffer ;
705
+ auto nrb = tml->ml ->_net_receive_buffer ;
725
706
726
707
if (nrb) {
727
708
acc_update_device (&nrb->_cnt , sizeof (int ));
@@ -824,10 +805,6 @@ void finalize_data_on_device() {
824
805
called yet for Ramdom123 data / streams etc. So handle this better!
825
806
*/
826
807
return ;
827
-
828
- #ifdef _OPENACC
829
- acc_shutdown (acc_device_default);
830
- #endif
831
808
}
832
809
833
810
void nrn_newtonspace_copyto_device (NewtonSpace* ns) {
@@ -841,8 +818,6 @@ void nrn_newtonspace_copyto_device(NewtonSpace* ns) {
841
818
NewtonSpace* d_ns = (NewtonSpace*)acc_copyin (ns, sizeof (NewtonSpace));
842
819
843
820
double * pd;
844
- int * pint;
845
- double ** ppd;
846
821
847
822
pd = (double *)acc_copyin (ns->delta_x , n * sizeof (double ));
848
823
acc_memcpy_to_device (&(d_ns->delta_x ), &pd, sizeof (double *));
@@ -856,10 +831,10 @@ void nrn_newtonspace_copyto_device(NewtonSpace* ns) {
856
831
pd = (double *)acc_copyin (ns->rowmax , n * sizeof (double ));
857
832
acc_memcpy_to_device (&(d_ns->rowmax ), &pd, sizeof (double *));
858
833
859
- pint = (int *)acc_copyin (ns->perm , n * sizeof (int ));
834
+ auto pint = (int *)acc_copyin (ns->perm , n * sizeof (int ));
860
835
acc_memcpy_to_device (&(d_ns->perm ), &pint, sizeof (int *));
861
836
862
- ppd = (double **)acc_copyin (ns->jacobian , ns->n * sizeof (double *));
837
+ auto ppd = (double **)acc_copyin (ns->jacobian , ns->n * sizeof (double *));
863
838
acc_memcpy_to_device (&(d_ns->jacobian ), &ppd, sizeof (double **));
864
839
865
840
// the actual jacobian doubles were allocated as a single array
@@ -886,25 +861,19 @@ void nrn_sparseobj_copyto_device(SparseObj* so) {
886
861
// r_down, c_right, value
887
862
// do not care about the Elm* ptr value, just the space.
888
863
889
- Elm** ppelm;
890
- Elm* pelm;
891
- unsigned * pu;
892
- double * pd;
893
- double ** ppd;
894
-
895
864
Elm** d_rowst = (Elm**)acc_copyin (so->rowst , n1 * sizeof (Elm*));
896
865
acc_memcpy_to_device (&(d_so->rowst ), &d_rowst, sizeof (Elm**));
897
866
898
867
Elm** d_diag = (Elm**)acc_copyin (so->diag , n1 * sizeof (Elm*));
899
868
acc_memcpy_to_device (&(d_so->diag ), &d_diag, sizeof (Elm**));
900
869
901
- pu = (unsigned *)acc_copyin (so->ngetcall , so->_cntml_padded * sizeof (unsigned ));
870
+ auto pu = (unsigned *)acc_copyin (so->ngetcall , so->_cntml_padded * sizeof (unsigned ));
902
871
acc_memcpy_to_device (&(d_so->ngetcall ), &pu, sizeof (Elm**));
903
872
904
- pd = (double *)acc_copyin (so->rhs , n1 * so->_cntml_padded * sizeof (double ));
873
+ auto pd = (double *)acc_copyin (so->rhs , n1 * so->_cntml_padded * sizeof (double ));
905
874
acc_memcpy_to_device (&(d_so->rhs ), &pd, sizeof (double *));
906
875
907
- double ** d_coef_list =
876
+ auto d_coef_list =
908
877
(double **)acc_copyin (so->coef_list , so->coef_list_size * sizeof (double *));
909
878
acc_memcpy_to_device (&(d_so->coef_list ), &d_coef_list, sizeof (double **));
910
879
@@ -940,13 +909,13 @@ void nrn_sparseobj_copyto_device(SparseObj* so) {
940
909
// visit all the Elm again and fill in pelm->r_down and pelm->c_left
941
910
for (unsigned irow = 1 ; irow < n1; ++irow) {
942
911
for (Elm* elm = so->rowst [irow]; elm; elm = elm->c_right ) {
943
- pelm = (Elm*)acc_deviceptr (elm);
912
+ auto pelm = (Elm*)acc_deviceptr (elm);
944
913
if (elm->r_down ) {
945
- Elm* d_e = (Elm*)acc_deviceptr (elm->r_down );
914
+ auto d_e = (Elm*)acc_deviceptr (elm->r_down );
946
915
acc_memcpy_to_device (&(pelm->r_down ), &d_e, sizeof (Elm*));
947
916
}
948
917
if (elm->c_right ) {
949
- Elm* d_e = (Elm*)acc_deviceptr (elm->c_right );
918
+ auto d_e = (Elm*)acc_deviceptr (elm->c_right );
950
919
acc_memcpy_to_device (&(pelm->c_right ), &d_e, sizeof (Elm*));
951
920
}
952
921
}
@@ -983,8 +952,7 @@ void init_gpu(int nthreads, NrnThread* threads) {
983
952
}
984
953
985
954
/* * @todo: currently only checking nvidia gpu */
986
- int num_gpus = acc_get_num_devices (acc_device_nvidia);
987
- if (num_gpus == 0 ) {
955
+ if (acc_get_num_devices (acc_device_nvidia) == 0 ) {
988
956
printf (" \n WARNING: Enabled GPU execution but couldn't find NVIDIA GPU! \n " );
989
957
}
990
958
0 commit comments