Skip to content

Commit

Permalink
allocate host and device memory, define data structure
Browse files Browse the repository at this point in the history
  • Loading branch information
Jiuzhou Tang committed Nov 21, 2016
1 parent 54858b7 commit 8311d52
Show file tree
Hide file tree
Showing 12 changed files with 643 additions and 20 deletions.
8 changes: 8 additions & 0 deletions Config.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Dim=3
Nx=64, Ny=32, Nz=16
lx=4.0, ly=4.0, lz=4.0
XN=20
fA=0.5
Ns=100


10 changes: 10 additions & 0 deletions constants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef _CONSTANTS_H
#define _CONSTANTS_H
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <vector>
using namespace std;
#endif
87 changes: 87 additions & 0 deletions field.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include "field.h"
// initialize the field with certain specification.
int andsn_dim;
int simple_num;
int iter_counter;
double lambda_andsn;
double ***andsn_W_sp;
double ***andsn_dW_sp;
double *global_t_diff;
double *yita; // pressure field

void field_init(int field_type, GRID *grid, CHEMICAL *chemical, CHAIN *chain )
{

int i1,j1,k1;
long K;
double temp_coff=0.8;
// init the pressure field
// select the initial field type
switch (field_type) {
// random noise
case 0 : {
break;
}
// FCC
case 1 : {

break;
}
// BCC
case 2 : {

break;
}
// GYR
case 3 : {

break;
}
// HEX
case 4 : {
assert(grid->Dim==2 || grid->Dim==3);
// the hexagonal structure is kept translation constant along the z axis, and the ratio of size on X and Y axis is sqrt(3)
for (i1=0,K=0;i1<=grid->Nz;i1++) { // Z axis
for (j1=0;j1<=grid->Ny;j1++) { // Y axis
for (k1=0;k1<=grid->Nx;k1++,K++) { // X axis
chemical->R_sp[0][K]=(chain->Blk_f[0])*(1.0+ \
temp_coff*cos(2.0*Pi*(j1+1)/grid->Ny)*cos(2.0*Pi*(k1+1)/grid->Nx));
}
}

}
break;
}
// LAM
case 5 : {
// the sine-like lam is along the X axis
for (i1=0,K=0;i1<=grid->Nz;i1++) { // Z axis
for (j1=0;j1<=grid->Ny;j1++) { // Y axis
for (k1=0;k1<=grid->Nx;k1++,K++) { // X axis
chemical->R_sp[0][K]=(chain->Blk_f[0])*(1.0+ \
temp_coff*sin(2.0*Pi*(k1+1)/grid->Nx));
}
}
}


break;
}

default : {}
break;
}

// currently, only AB melts is supported, field initialization of more molecular species is to be implemented.
for (i1=0,K=0;i1<=grid->Nz;i1++) { // Z axis
for (j1=0;j1<=grid->Ny;j1++) { // Y axis
for (k1=0;k1<=grid->Nx;k1++,K++) { // X axis
chemical->R_sp[1][K]=1.0-chemical->R_sp[0][K];
chemical->W_sp[0][K]=chemical->XN[0][1]*chemical->R_sp[1][K];
chemical->W_sp[1][K]=chemical->XN[0][1]*chemical->R_sp[0][K];
}
}
}

}

20 changes: 20 additions & 0 deletions field.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef FIELD_H
#define FIELD_H
#include"struct.h"
// parameters for anderson mixing
extern int andsn_dim;
extern int simple_num;
extern int iter_counter;
extern double lambda_andsn;
extern double ***andsn_W_sp;
extern double ***andsn_dW_sp;
extern double *global_t_diff;
extern double *yita; // pressure field

void field_init(int field_type, GRID *grid, CHEMICAL *chemical, CHAIN *chain );
//void andsn_init( int andsn_dim,int num, double lambda, grid &Grid, chemical &Chemical, chain &Chain );
//void andsn_iterate_diblock(int andsn_dim, grid &Grid, chemical &Chemical, chain &Chain, cell &Cell ) ;
//int indx_update_andsn(int index_i, int n_r);
//void field_clean(CHEMICAL *chemical );

#endif
66 changes: 56 additions & 10 deletions init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
initialize the grid and cell infomation
*/

void init_grid_cell(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell){
void init_AB_diblock_scft(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain){

char *typeChoice = 0;

gpu_info->kernal_type=1;

gpu_info->GPU_N=1; // specify the number of GPU to be used, otherwise by default is 1.
char comment[200];

int k,j;

if(gpu_info->kernal_type==1){

Expand Down Expand Up @@ -42,26 +42,72 @@ void init_grid_cell(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *

cufft_info->Nx=grid->Nx;
cufft_info->Ny=grid->Ny;
cufft_info->Nz=grid->Nz;
cufft_info->Nz=grid->Nz;
// currently only AB melts and AB diblock are supported in the program
chemical->N_spe=2;
for (k=0;k<chemical->N_spe;k++) {
for (j=0;j<chemical->N_spe;j++) {
chemical->XN[k][j]=0.0;
}
}
fscanf(fp,"XN=%lf\n",&chemical->XN[0][1]);
chemical->XN[1][0]=chemical->XN[0][1];
chemical->Nxyz=grid->Nx*grid->Ny*grid->Nz;


chain->N_blk=2;
chain->Blk_start=(int*)malloc(chain->N_blk*sizeof(int));
chain->Blk_end=(int*)malloc(chain->N_blk*sizeof(int));
chain->Blk_spe=(int*)malloc(chain->N_blk*sizeof(int));
chain->Blk_f=(double*)malloc(chain->N_blk*sizeof(double));

chain->Blk_spe[0]=0; // 0 for A, 1 for B
chain->Blk_spe[1]=1;
fscanf(fp,"fA=%d\n",&chain->Blk_f[0]);
chain->Blk_f[1]=1.0-chain->Blk_f[0];
fscanf(fp,"Ns=%d\n",&chain->Ns);
fclose(fp);

int NA,NB;
NA=int(chain->Ns*chain->Blk_f[0]);
NB=chain->Ns-NA;
chain->Blk_start[0]=0;
chain->Blk_start[1]=NA;
chain->Blk_end[0]=NA;
chain->Blk_end[1]=chain->Ns;
chain->Nxyz=chemical->Nxyz;
chain->Q=0.0;
//! output configuration.

printf("%d %d %d\n",cufft_info->Nx,cufft_info->Ny,cufft_info->Nz);
printf("lx=%lf, ly=%lf, lz=%lf\n",cell->Lx, cell->Ly, cell->Lz);



init_cuda(gpu_info,0);

initialize_cufft(gpu_info,cufft_info);
test_cufft(gpu_info,cufft_info);

// finalize_cufft(gpu_info,cufft_info);
// allocate the device memeory for chain and chemical;
init_chain_chemical(gpu_info,grid,cell,chemical,chain);
field_init(4,grid,chemical, chain); // init hexagonal field
field_cp_gpu(grid,chemical,chain); // copy field to gpu memeory

//finalize_cufft(gpu_info,cufft_info);

}




}


void AB_diblock_scft_driver(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain ){
// build the grid info and cufft info, allocate the memeory for scft arrays on both cpu and gpu
init_AB_diblock_scft(cufft_info,gpu_info,grid, cell, chemical, chain);
// fill the fields with selected type, in this case, hexagonal is tested.




}




7 changes: 5 additions & 2 deletions init.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

#include"struct.h"
#include <helper_functions.h>
#include "init_cuda.h"
#include "field.h"

void init_grid_cell(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info, GRID *grid, CELL *cell);
//void init_grid_cell(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info, GRID *grid, CELL *cell);
//void init_scft(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info, GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain);
void init_AB_diblock_scft(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain);
void AB_diblock_scft_driver(CUFFT_INFO *cufft_info,GPU_INFO *gpu_info,GRID *grid, CELL *cell, CHEMICAL *chemical, CHAIN *chain );
104 changes: 100 additions & 4 deletions init_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@ extern void init_cuda(GPU_INFO *gpu_info,int display){
for(i=0;i<(gpu_info->GPU_N);i++) {
gpu_info->GPU_list[i]=i;} //!Define on these GPU to calculate


int dev_indx[gpu_info->GPU_N];
// user defined GPU device index, check which GPU to use by type "nvidia-smi"
assert(gpu_info->GPU_N==1);
dev_indx[0]=1;

for (i=0; i < gpu_info->GPU_N; i++){

checkCudaErrors(cudaGetDeviceProperties(&gpu_info->prop[i], i)); // get the device properties for the specified device number i
checkCudaErrors(cudaGetDeviceProperties(&gpu_info->prop[i], dev_indx[i])); // get the device properties for the specified device number i

checkCudaErrors(cudaSetDevice(gpu_info->GPU_list[i])); // cuda runtime API, thread-safe, to select which GPU to execute CUDA calls on

Expand Down Expand Up @@ -165,6 +169,100 @@ extern void initialize_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info){
}


extern void init_chain_chemical(GPU_INFO *gpu_info,GRID *grid,CELL *cell,CHEMICAL *chemical,CHAIN *chain) {
double *kx,*ky,*kz;
double dx,dy,dz,ksq;
int Nx,Ny,Nz,i,j,k;
long NxNyNz,ijk;
Nx=grid->Nx;
Ny=grid->Ny;
Nz=grid->Nz;
NxNyNz=Nx*Nx*Nz;
kx=(double *)malloc(sizeof(double)*Nx);
ky=(double *)malloc(sizeof(double)*Ny);
kz=(double *)malloc(sizeof(double)*Nz);
dx=cell->dx;
dy=cell->dy;
dz=cell->dz;

chain->exp_ksq=(double *)malloc(sizeof(double)*NxNyNz);
chemical->exp_w=(double *)malloc(sizeof(double)*NxNyNz*chemical->N_spe);
for(i=0;i<=Nx/2-1;i++) kx[i]=2*Pi*i*1.0/Nx/dx;
for(i=Nx/2;i<Nx;i++) kx[i]=2*Pi*(i-Nx)*1.0/dx/Nx;
for(i=0;i<Nx;i++) kx[i]*=kx[i];

for(i=0;i<=Ny/2-1;i++) ky[i]=2*Pi*i*1.0/Ny/dy;
for(i=Ny/2;i<Ny;i++) ky[i]=2*Pi*(i-Ny)*1.0/dy/Ny;
for(i=0;i<Ny;i++) ky[i]*=ky[i];

for(i=0;i<=Nz/2-1;i++) kz[i]=2*Pi*i*1.0/Nz/dz;
for(i=Nz/2;i<Nz;i++) kz[i]=2*Pi*(i-Nz)*1.0/dz/Nz;
for(i=0;i<Nz;i++) kz[i]*=kz[i];
double ds;
ds=1.0/chain->Ns;
for(k=0;k<Nz;k++) {
for(j=0;j<Ny;j++){
for(i=0;i<Nx;i++){
ijk=(long)((k*Ny+j)*Nx+i);// x is the fastest dimension!!
ksq=kx[i]+ky[j]+kz[k];
chain->exp_ksq[ijk]=exp(-ds*ksq);
}
}
}

checkCudaErrors(cudaMallocManaged(&chain->exp_ksq_cu, sizeof(double)* NxNyNz));
checkCudaErrors(cudaMallocManaged(&chemical->exp_w_cu, sizeof(double)* NxNyNz*chemical->N_spe));

checkCudaErrors(cudaMemcpy(chain->exp_ksq_cu, chain->exp_ksq,sizeof(double)*NxNyNz,cudaMemcpyHostToDevice));

checkCudaErrors(cudaDeviceSynchronize());

cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);


// allocate chemical fields and density profile on CPU
chemical->W_sp=dmatrix(0,chemical->N_spe-1,0,chemical->Nxyz-1);
chemical->R_sp=dmatrix(0,chemical->N_spe-1,0,chemical->Nxyz-1);
// propagators are allocated on GPU gobal memeory
cudaEventRecord(start, 0);
checkCudaErrors(cudaMallocManaged(&chain->qf, sizeof(double)* chain->Nxyz*chain->Ns));
checkCudaErrors(cudaMallocManaged(&chain->qb, sizeof(double)* chain->Nxyz*chain->Ns));

checkCudaErrors(cudaDeviceSynchronize());
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
printf ("Time for the kernel: %f ms\n", time);

}

extern void field_cp_gpu(GRID *grid,CHEMICAL *chemical,CHAIN *chain) {
int Nx,Ny,Nz,i,j,k,spe;
long ijk;
Nx=grid->Nx;
Ny=grid->Ny;
Nz=grid->Nz;
double ds;
ds=1.0/chain->Ns;
for (spe=0;spe<chemical->N_spe;spe++) {
for(k=0;k<Nz;k++) {
for(j=0;j<Ny;j++){
for(i=0;i<Nx;i++){
ijk=(long)((k*Ny+j)*Nx+i+ spe*chemical->Nxyz );// x is the fastest dimension!!
chemical->exp_w[ijk]=exp(-0.5*ds*chemical->W_sp[spe][ijk]);
}
}
}
}

checkCudaErrors(cudaMemcpy(chemical->exp_w_cu, chemical->exp_w,sizeof(double)*chemical->Nxyz*chemical->N_spe,cudaMemcpyHostToDevice));
checkCudaErrors(cudaDeviceSynchronize());

}

extern void test_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info){
double *h_in;
double complex *h_out;
Expand Down Expand Up @@ -230,8 +328,6 @@ extern void finalize_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info){

}



//! free memery on CPU

free(gpu_info->stream);
Expand Down
2 changes: 2 additions & 0 deletions init_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,7 @@ extern void initialize_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info);
extern void finalize_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info);
extern void test_cufft(GPU_INFO *gpu_info,CUFFT_INFO *cufft_info);

extern void init_chain_chemical(GPU_INFO *gpu_info,GRID *grid,CELL *cell,CHEMICAL *chemical,CHAIN *chain);

extern void field_cp_gpu(GRID *grid,CHEMICAL *chemical,CHAIN *chain);
#endif
9 changes: 6 additions & 3 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,21 @@
#include "init.h"

#include <ctime>

// z dimension must be larger than Nz/GPU_N>=8
// CUDA runtime

int main(void){


// declare user defined structures
GPU_INFO gpu_info;
GRID grid;
CELL cell;
CUFFT_INFO cufft_info;

init_grid_cell(&cufft_info,&gpu_info,&grid,&cell);
CHEMICAL AB_melts;
CHAIN diblock;
//init_grid_cell(&cufft_info,&gpu_info,&grid,&cell);
AB_diblock_scft_driver(&cufft_info,&gpu_info,&grid, &cell, &AB_melts, &diblock);

// fft_test(&gpu_info,&cufft_info);

Expand Down
Loading

0 comments on commit 8311d52

Please sign in to comment.