allocate host and device memory, define data structure

tjz2026 · Nov 21, 2016 · 8311d52 · 8311d52
1 parent 54858b7
commit 8311d52
Show file tree

Hide file tree

Showing 12 changed files with 643 additions and 20 deletions.
diff --git a/Config.txt b/Config.txt
@@ -0,0 +1,8 @@
+Dim=3
+Nx=64, Ny=32, Nz=16
+lx=4.0, ly=4.0, lz=4.0
+XN=20
+fA=0.5
+Ns=100
+
+
diff --git a/constants.h b/constants.h
@@ -0,0 +1,10 @@
+#ifndef	_CONSTANTS_H
+#define _CONSTANTS_H
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream> 
+#include <fstream>
+#include <vector>
+using namespace std;
+#endif
diff --git a/field.cpp b/field.cpp
@@ -0,0 +1,87 @@
+#include "field.h"
+// initialize the field with certain specification.
+int andsn_dim;
+int simple_num;
+int iter_counter;
+double lambda_andsn; 
+double ***andsn_W_sp;
+double ***andsn_dW_sp;
+double *global_t_diff;
+double *yita;  // pressure field 
+
+void field_init(int field_type, GRID *grid, CHEMICAL *chemical, CHAIN *chain )
+{
+
+   int i1,j1,k1;
+   long K;
+   double temp_coff=0.8;
+// init the pressure field
+// select the initial field type
+switch (field_type) {
+// random noise 
+   case 0 : {
+     break;
+         }
+// FCC
+   case 1 : {
+
+     break;
+         }
+// BCC
+   case 2 : {
+
+     break;
+         }
+// GYR
+   case 3 : {
+
+     break;
+         }
+// HEX
+   case 4 : {
+    assert(grid->Dim==2 || grid->Dim==3);
+// the hexagonal structure is kept translation constant along the z axis, and the ratio of size on X and Y axis is sqrt(3)         
+      for (i1=0,K=0;i1<=grid->Nz;i1++) {  // Z axis
+         for (j1=0;j1<=grid->Ny;j1++)      { // Y axis
+            for (k1=0;k1<=grid->Nx;k1++,K++) { // X axis
+                chemical->R_sp[0][K]=(chain->Blk_f[0])*(1.0+ \ 
+                 temp_coff*cos(2.0*Pi*(j1+1)/grid->Ny)*cos(2.0*Pi*(k1+1)/grid->Nx));
+                                                              }
+                                                           }
+
+                                                       }    
+     break;
+         }
+// LAM
+   case 5 : {
+// the sine-like lam is  along the X axis
+      for (i1=0,K=0;i1<=grid->Nz;i1++) {  // Z axis
+         for (j1=0;j1<=grid->Ny;j1++)      { // Y axis
+            for (k1=0;k1<=grid->Nx;k1++,K++) { // X axis
+                chemical->R_sp[0][K]=(chain->Blk_f[0])*(1.0+ \ 
+                 temp_coff*sin(2.0*Pi*(k1+1)/grid->Nx));
+                                                              }
+                                                           }
+                                                       }    
+
+
+     break;
+         }
+
+   default : {}
+     break;
+ } 
+
+// currently, only AB melts is supported, field initialization of more molecular species is to be implemented.
+      for (i1=0,K=0;i1<=grid->Nz;i1++) {  // Z axis
+         for (j1=0;j1<=grid->Ny;j1++)      { // Y axis
+            for (k1=0;k1<=grid->Nx;k1++,K++) { // X axis
+                chemical->R_sp[1][K]=1.0-chemical->R_sp[0][K];
+                chemical->W_sp[0][K]=chemical->XN[0][1]*chemical->R_sp[1][K];
+                chemical->W_sp[1][K]=chemical->XN[0][1]*chemical->R_sp[0][K];
+                                                              }
+                                                           }
+                                                       }    
+
+}
+
diff --git a/field.h b/field.h
@@ -0,0 +1,20 @@
+#ifndef	FIELD_H
+#define FIELD_H
+#include"struct.h"
+// parameters for anderson mixing
+extern int andsn_dim;
+extern int simple_num;
+extern int iter_counter;
+extern double lambda_andsn; 
+extern double ***andsn_W_sp;
+extern double ***andsn_dW_sp;
+extern double *global_t_diff;
+extern double *yita;  // pressure field 
+
+void field_init(int field_type, GRID *grid, CHEMICAL *chemical, CHAIN *chain );
+//void andsn_init( int andsn_dim,int num, double lambda, grid &Grid, chemical &Chemical, chain &Chain );
+//void andsn_iterate_diblock(int andsn_dim, grid &Grid, chemical &Chemical, chain &Chain, cell &Cell  ) ;
+//int indx_update_andsn(int index_i, int n_r);
+//void field_clean(CHEMICAL *chemical );
+
+#endif
diff --git a/init.cpp b/init.cpp
@@ -4,15 +4,15 @@
 	initialize the grid and cell infomation
 */
 
-void init_grid_cell(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell){
+void init_AB_diblock_scft(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain){
 
 	char *typeChoice = 0;
 
 	gpu_info->kernal_type=1;
 
 	gpu_info->GPU_N=1; // specify the number of GPU to be used, otherwise by default is 1.
 	char comment[200];
-
+        int k,j;
 
 	if(gpu_info->kernal_type==1){
 
@@ -42,26 +42,72 @@ void init_grid_cell(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *
 
                 cufft_info->Nx=grid->Nx;  
                 cufft_info->Ny=grid->Ny;  
-                cufft_info->Nz=grid->Nz;  
+                cufft_info->Nz=grid->Nz; 
+                // currently only AB melts and AB diblock are supported in the program
+                chemical->N_spe=2;
+                for (k=0;k<chemical->N_spe;k++) {
+                    for (j=0;j<chemical->N_spe;j++) {
+                         chemical->XN[k][j]=0.0;
+                                          } 
+                                      }  
+		fscanf(fp,"XN=%lf\n",&chemical->XN[0][1]);
+                chemical->XN[1][0]=chemical->XN[0][1];
+                chemical->Nxyz=grid->Nx*grid->Ny*grid->Nz;
 
-
+                chain->N_blk=2; 
+                chain->Blk_start=(int*)malloc(chain->N_blk*sizeof(int));
+                chain->Blk_end=(int*)malloc(chain->N_blk*sizeof(int));
+                chain->Blk_spe=(int*)malloc(chain->N_blk*sizeof(int));
+                chain->Blk_f=(double*)malloc(chain->N_blk*sizeof(double));
+
+                chain->Blk_spe[0]=0; // 0 for A, 1 for B
+                chain->Blk_spe[1]=1; 
+		fscanf(fp,"fA=%d\n",&chain->Blk_f[0]);
+                chain->Blk_f[1]=1.0-chain->Blk_f[0];
+		fscanf(fp,"Ns=%d\n",&chain->Ns);
+		fclose(fp);
+
+                int NA,NB;
+                NA=int(chain->Ns*chain->Blk_f[0]);
+                NB=chain->Ns-NA;
+                chain->Blk_start[0]=0;
+                chain->Blk_start[1]=NA;
+                chain->Blk_end[0]=NA;
+                chain->Blk_end[1]=chain->Ns;
+                chain->Nxyz=chemical->Nxyz;  
+		chain->Q=0.0;
 		//! output configuration.
 
 		printf("%d %d %d\n",cufft_info->Nx,cufft_info->Ny,cufft_info->Nz);	
 		printf("lx=%lf, ly=%lf, lz=%lf\n",cell->Lx, cell->Ly, cell->Lz);
 
-
-
 		init_cuda(gpu_info,0);
 
 		initialize_cufft(gpu_info,cufft_info);
                 test_cufft(gpu_info,cufft_info);
-
- //               finalize_cufft(gpu_info,cufft_info);
+                // allocate the device memeory for chain and chemical;
+                init_chain_chemical(gpu_info,grid,cell,chemical,chain);
+                field_init(4,grid,chemical, chain);   // init hexagonal field
+                field_cp_gpu(grid,chemical,chain); // copy field to gpu memeory  
+
+                //finalize_cufft(gpu_info,cufft_info);
 
 	          }
 
 
-
-
 }
+
+
+void AB_diblock_scft_driver(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain ){
+//   build the grid info and cufft info, allocate the memeory for scft arrays on both cpu and gpu
+     init_AB_diblock_scft(cufft_info,gpu_info,grid, cell, chemical, chain);
+// fill the fields with selected type, in this case, hexagonal is tested.
+
+
+
+
+}
+
+
+
+
diff --git a/init.h b/init.h
@@ -1,6 +1,9 @@
-
 #include"struct.h"
 #include <helper_functions.h>
 #include "init_cuda.h"
+#include "field.h"
 
-void init_grid_cell(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info, GRID *grid, CELL *cell);
+//void init_grid_cell(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info, GRID *grid, CELL *cell);
+//void init_scft(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info, GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain);
+void init_AB_diblock_scft(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain);
+void AB_diblock_scft_driver(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain );
diff --git a/init_cuda.cu b/init_cuda.cu
@@ -28,10 +28,14 @@ extern void init_cuda(GPU_INFO *gpu_info,int display){
 	for(i=0;i<(gpu_info->GPU_N);i++) {
 		gpu_info->GPU_list[i]=i;}	//!Define on these GPU to calculate 
 
-
+	int dev_indx[gpu_info->GPU_N];
+        // user defined GPU device index, check which GPU to use by type "nvidia-smi"
+        assert(gpu_info->GPU_N==1);
+        dev_indx[0]=1;
+
 	for (i=0; i < gpu_info->GPU_N; i++){
 
-        	checkCudaErrors(cudaGetDeviceProperties(&gpu_info->prop[i], i)); // get the device properties for the specified device number i
+        	checkCudaErrors(cudaGetDeviceProperties(&gpu_info->prop[i], dev_indx[i])); // get the device properties for the specified device number i
 
 		checkCudaErrors(cudaSetDevice(gpu_info->GPU_list[i])); // cuda runtime API, thread-safe, to select which GPU to execute CUDA calls on
 
@@ -165,6 +169,100 @@ extern void initialize_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info){
 }
 
 
+extern void init_chain_chemical(GPU_INFO *gpu_info,GRID *grid,CELL *cell,CHEMICAL *chemical,CHAIN *chain) {
+        double *kx,*ky,*kz;  
+        double dx,dy,dz,ksq;
+        int Nx,Ny,Nz,i,j,k;
+        long NxNyNz,ijk;
+        Nx=grid->Nx;   
+        Ny=grid->Ny;   
+        Nz=grid->Nz;   
+        NxNyNz=Nx*Nx*Nz;   
+	kx=(double *)malloc(sizeof(double)*Nx);
+	ky=(double *)malloc(sizeof(double)*Ny);
+	kz=(double *)malloc(sizeof(double)*Nz);
+	dx=cell->dx;
+	dy=cell->dy;
+	dz=cell->dz;
+
+	chain->exp_ksq=(double *)malloc(sizeof(double)*NxNyNz);	
+	chemical->exp_w=(double *)malloc(sizeof(double)*NxNyNz*chemical->N_spe);	
+	for(i=0;i<=Nx/2-1;i++) kx[i]=2*Pi*i*1.0/Nx/dx;
+	for(i=Nx/2;i<Nx;i++)   kx[i]=2*Pi*(i-Nx)*1.0/dx/Nx;
+	for(i=0;i<Nx;i++)      kx[i]*=kx[i];
+
+	for(i=0;i<=Ny/2-1;i++) ky[i]=2*Pi*i*1.0/Ny/dy;
+	for(i=Ny/2;i<Ny;i++)   ky[i]=2*Pi*(i-Ny)*1.0/dy/Ny;
+	for(i=0;i<Ny;i++)      ky[i]*=ky[i];
+
+	for(i=0;i<=Nz/2-1;i++) kz[i]=2*Pi*i*1.0/Nz/dz;
+	for(i=Nz/2;i<Nz;i++)   kz[i]=2*Pi*(i-Nz)*1.0/dz/Nz;
+	for(i=0;i<Nz;i++)      kz[i]*=kz[i];
+	double ds;
+        ds=1.0/chain->Ns;
+	for(k=0;k<Nz;k++) {
+	   for(j=0;j<Ny;j++){
+	      for(i=0;i<Nx;i++){
+		ijk=(long)((k*Ny+j)*Nx+i);// x is the fastest dimension!!
+		ksq=kx[i]+ky[j]+kz[k];
+		chain->exp_ksq[ijk]=exp(-ds*ksq);
+	                       }
+                             }
+                           }
+
+	checkCudaErrors(cudaMallocManaged(&chain->exp_ksq_cu, sizeof(double)* NxNyNz));
+	checkCudaErrors(cudaMallocManaged(&chemical->exp_w_cu, sizeof(double)* NxNyNz*chemical->N_spe));
+
+	checkCudaErrors(cudaMemcpy(chain->exp_ksq_cu, chain->exp_ksq,sizeof(double)*NxNyNz,cudaMemcpyHostToDevice));
+
+    checkCudaErrors(cudaDeviceSynchronize());
+
+    cudaEvent_t start, stop;
+    float time;
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+
+
+    // allocate chemical fields and density profile on CPU
+    chemical->W_sp=dmatrix(0,chemical->N_spe-1,0,chemical->Nxyz-1);
+    chemical->R_sp=dmatrix(0,chemical->N_spe-1,0,chemical->Nxyz-1);
+    // propagators are allocated on GPU gobal memeory
+    cudaEventRecord(start, 0);
+    checkCudaErrors(cudaMallocManaged(&chain->qf, sizeof(double)* chain->Nxyz*chain->Ns));
+    checkCudaErrors(cudaMallocManaged(&chain->qb, sizeof(double)* chain->Nxyz*chain->Ns));
+
+    checkCudaErrors(cudaDeviceSynchronize());
+    cudaEventRecord(stop, 0);
+    cudaEventSynchronize(stop);
+    cudaEventElapsedTime(&time, start, stop);
+    printf ("Time for the kernel: %f ms\n", time);
+
+}
+
+extern void field_cp_gpu(GRID *grid,CHEMICAL *chemical,CHAIN *chain) {
+     int Nx,Ny,Nz,i,j,k,spe;
+     long ijk;
+     Nx=grid->Nx;   
+     Ny=grid->Ny;   
+     Nz=grid->Nz;   
+     double ds;
+     ds=1.0/chain->Ns;
+      for (spe=0;spe<chemical->N_spe;spe++) {
+	for(k=0;k<Nz;k++) {
+	   for(j=0;j<Ny;j++){
+	      for(i=0;i<Nx;i++){
+		ijk=(long)((k*Ny+j)*Nx+i+ spe*chemical->Nxyz );// x is the fastest dimension!!
+		chemical->exp_w[ijk]=exp(-0.5*ds*chemical->W_sp[spe][ijk]);
+	                       }
+                             }
+                           }
+	                 }
+
+    checkCudaErrors(cudaMemcpy(chemical->exp_w_cu, chemical->exp_w,sizeof(double)*chemical->Nxyz*chemical->N_spe,cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaDeviceSynchronize());
+
+}
+
 extern void test_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info){
        double *h_in;
        double complex *h_out;
@@ -230,8 +328,6 @@ extern void finalize_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info){
 
 	}
 
-
-
 	//! free memery on CPU
 
 	free(gpu_info->stream);

diff --git a/init_cuda.h b/init_cuda.h
@@ -15,5 +15,7 @@ extern void initialize_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info);
 extern void finalize_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info);
 extern void test_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info);
 
+extern void init_chain_chemical(GPU_INFO *gpu_info,GRID *grid,CELL *cell,CHEMICAL *chemical,CHAIN *chain);
 
+extern void field_cp_gpu(GRID *grid,CHEMICAL *chemical,CHAIN *chain);
 #endif
diff --git a/main.cpp b/main.cpp
@@ -25,18 +25,21 @@
 #include "init.h"
 
 #include <ctime>
+
 // z dimension must be larger than Nz/GPU_N>=8
 // CUDA runtime
 
 int main(void){
 
-
+	// declare user defined structures 
 	GPU_INFO gpu_info;
         GRID grid;   
         CELL cell;   
 	CUFFT_INFO cufft_info;
-
-	init_grid_cell(&cufft_info,&gpu_info,&grid,&cell);
+        CHEMICAL AB_melts;
+        CHAIN diblock;
+	//init_grid_cell(&cufft_info,&gpu_info,&grid,&cell);
+        AB_diblock_scft_driver(&cufft_info,&gpu_info,&grid, &cell, &AB_melts, &diblock);
 
 //	fft_test(&gpu_info,&cufft_info);