From 792df8c2a3a202b7a9b939ffdb061ef87ed92e23 Mon Sep 17 00:00:00 2001 From: KAO <ohearnk@seawolf.cis.gvsu.edu> Date: Thu, 24 Nov 2016 10:58:31 -0500 Subject: [PATCH] PuReMD-GPU: code clean-up, and begin updating build system. --- PuReMD-GPU/Makefile.am | 8 +- PuReMD-GPU/configure.ac | 6 +- PuReMD-GPU/src/GMRES.cu | 3 +- PuReMD-GPU/src/allocate.cu | 16 +- PuReMD-GPU/src/bond_orders.cu | 2 +- PuReMD-GPU/src/bond_orders.h | 3 + PuReMD-GPU/src/cuda_copy.cu | 5 +- PuReMD-GPU/src/cuda_copy.h | 2 +- PuReMD-GPU/src/cuda_helpers.h | 40 +++-- PuReMD-GPU/src/cuda_init.cu | 16 +- PuReMD-GPU/src/cuda_utils.cu | 5 +- PuReMD-GPU/src/cuda_utils.h | 10 +- PuReMD-GPU/src/four_body_interactions.cu | 36 ++--- PuReMD-GPU/src/init_md.cu | 8 +- PuReMD-GPU/src/list.h | 4 +- PuReMD-GPU/src/mytypes.h | 182 +++++++++++----------- PuReMD-GPU/src/reduction.cu | 2 +- PuReMD-GPU/src/single_body_interactions.h | 3 +- PuReMD-GPU/src/system_props.cu | 4 +- PuReMD-GPU/src/system_props.h | 4 +- PuReMD-GPU/src/two_body_interactions.h | 3 +- 21 files changed, 183 insertions(+), 179 deletions(-) diff --git a/PuReMD-GPU/Makefile.am b/PuReMD-GPU/Makefile.am index d4332370..1a914816 100644 --- a/PuReMD-GPU/Makefile.am +++ b/PuReMD-GPU/Makefile.am @@ -18,12 +18,12 @@ NVCCFLAGS += --compiler-options "$(DEFS) -D__SM_35__ -O3 -funroll-loops -fstrict #NVCCFLAGS += --ptxas-options -v bin_PROGRAMS = bin/puremd-gpu -bin_puremd_gpu_SOURCES = src/analyze.c src/print_utils.c src/reset_utils.c src/param.c src/pdb_tools.c \ +bin_puremd_gpu_SOURCES = src/analyze.c src/print_utils.c src/restart.c src/param.c src/pdb_tools.c \ src/GMRES.cu src/QEq.cu src/allocate.cu src/bond_orders.cu \ src/box.cu src/forces.cu src/four_body_interactions.cu \ src/grid.cu src/init_md.cu src/integrate.cu src/list.cu \ src/lookup.cu src/neighbors.cu \ - src/restart.cu src/single_body_interactions.cu \ + src/reset_utils.cu src/single_body_interactions.cu \ src/system_props.cu src/three_body_interactions.cu \ src/traj.cu src/two_body_interactions.cu src/vector.cu \ src/testmd.cu \ @@ -31,12 +31,12 @@ bin_puremd_gpu_SOURCES = src/analyze.c src/print_utils.c src/reset_utils.c src/p src/center_mass.cu src/helpers.cu src/validation.cu src/matvec.cu include_HEADERS = src/mytypes.h src/analyze.h src/print_utils.h \ - src/reset_utils.h src/param.h src/pdb_tools.h \ + src/restart.h src/param.h src/pdb_tools.h \ src/GMRES.h src/QEq.h src/allocate.h src/bond_orders.h \ src/box.h src/forces.h src/four_body_interactions.h \ src/grid.h src/init_md.h src/integrate.h src/list.h \ src/lookup.h src/neighbors.h \ - src/restart.h src/single_body_interactions.h \ + src/reset_utils.h src/single_body_interactions.h \ src/system_props.h src/three_body_interactions.h \ src/traj.h src/two_body_interactions.h src/vector.h \ src/cuda_utils.h src/cuda_copy.h src/cuda_init.h src/reduction.h \ diff --git a/PuReMD-GPU/configure.ac b/PuReMD-GPU/configure.ac index c947ed02..9736714d 100644 --- a/PuReMD-GPU/configure.ac +++ b/PuReMD-GPU/configure.ac @@ -50,7 +50,7 @@ AC_SEARCH_LIBS([cublasDdot], [cublas]) AC_SEARCH_LIBS([cudaThreadSynchronize], [cuda]) AC_SEARCH_LIBS([cudaCheckError], [cuda]) # FIXME: Replace `main' with a function in `-lcudart': -#AC_CHECK_LIB([cudart], [main]) +AC_CHECK_LIB([cudart], [main]) AC_SEARCH_LIBS([cusparseCheckError], [cusparse]) AC_SEARCH_LIBS([cusparseCreateMatDescr], [cusparse]) AC_SEARCH_LIBS([cusparseSetMatType], [cusparse]) @@ -89,10 +89,6 @@ AC_CHECK_TYPES([cusparseHandle_t], [], [AC_MSG_FAILURE([cusparseHandle_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) AC_CHECK_TYPES([cusparseMatDescr_t], [], [AC_MSG_FAILURE([cusparseMatDescr_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) -#AC_CHECK_TYPES([CUSPARSE_MATRIX_TYPE_GENERAL], [], -# [AC_MSG_FAILURE([CUSPARSE_MATRIX_TYPE_GENERAL type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) -#AC_CHECK_TYPES([CUSPARSE_INDEX_BASE_ZERO], [], -# [AC_MSG_FAILURE([CUSPARSE_INDEX_BASE_ZERO type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) AC_CONFIG_FILES([Makefile]) diff --git a/PuReMD-GPU/src/GMRES.cu b/PuReMD-GPU/src/GMRES.cu index 2c1b0cf6..1c5c28c8 100644 --- a/PuReMD-GPU/src/GMRES.cu +++ b/PuReMD-GPU/src/GMRES.cu @@ -480,11 +480,10 @@ int Cublas_GMRES(reax_system *system, static_storage *workspace, real *b, real t real cc, tmp1, tmp2, temp, bnorm; real v_add_tmp; sparse_matrix *H = &workspace->H; - real t_start, t_elapsed; - real *spad = (real *)scratch; real *g = (real *) calloc ((RESTART+1), REAL_SIZE); + cublasHandle_t cublasHandle; N = H->n; diff --git a/PuReMD-GPU/src/allocate.cu b/PuReMD-GPU/src/allocate.cu index 37b80693..bae0dde8 100644 --- a/PuReMD-GPU/src/allocate.cu +++ b/PuReMD-GPU/src/allocate.cu @@ -26,8 +26,8 @@ void Reallocate_Neighbor_List( list *far_nbrs, int n, int num_intrs ) { - Delete_List( far_nbrs ); - if(!Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, far_nbrs )){ + Delete_List( far_nbrs, TYP_HOST ); + if(!Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, far_nbrs, TYP_HOST )){ fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n"); exit( INIT_ERR ); } @@ -153,7 +153,7 @@ int Allocate_HBond_List( int n, int num_h, int *h_index, int *hb_top, hb_top[i] += hb_top[i-1]; num_hbonds = hb_top[n-1]; - if( !Make_List(num_h, num_hbonds, TYP_HBOND, hbonds ) ) { + if( !Make_List(num_h, num_hbonds, TYP_HBOND, hbonds, TYP_HOST ) ) { fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); exit( INIT_ERR ); } @@ -238,7 +238,7 @@ int Reallocate_HBonds_List( int n, int num_h, int *h_index, list *hbonds ) if( h_index[i] >= 0 ) hb_top[i] = MAX(Num_Entries(h_index[i],hbonds)*SAFE_HBONDS, MIN_HBONDS); - Delete_List( hbonds ); + Delete_List( hbonds, TYP_HOST ); Allocate_HBond_List( n, num_h, h_index, hb_top, hbonds ); @@ -336,7 +336,7 @@ int Allocate_Bond_List( int n, int *bond_top, list *bonds ) bond_top[i] += bond_top[i-1]; num_bonds = bond_top[n-1]; - if( !Make_List(n, num_bonds, TYP_BOND, bonds ) ) { + if( !Make_List(n, num_bonds, TYP_BOND, bonds, TYP_HOST ) ) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); exit( INIT_ERR ); } @@ -372,7 +372,7 @@ int Reallocate_Bonds_List( int n, list *bonds, int *num_bonds, int *est_3body ) bond_top[i] = MAX( Num_Entries( i, bonds ) * 2, MIN_BONDS ); } - Delete_List( bonds ); + Delete_List( bonds, TYP_HOST ); Allocate_Bond_List( n, bond_top, bonds ); *num_bonds = bond_top[n-1]; @@ -585,14 +585,14 @@ void Reallocate( reax_system *system, static_storage *workspace, list **lists, if( realloc->num_3body > 0 ) { fprintf (stderr, " Reallocating 3Body \n"); - Delete_List( (*lists)+THREE_BODIES ); + Delete_List( (*lists)+THREE_BODIES, TYP_HOST ); if( num_bonds == -1 ) num_bonds = ((*lists)+BONDS)->num_intrs; realloc->num_3body *= SAFE_ZONE; if( !Make_List( num_bonds, realloc->num_3body, - TYP_THREE_BODY, (*lists)+THREE_BODIES ) ) { + TYP_THREE_BODY, (*lists)+THREE_BODIES, TYP_HOST ) ) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); exit( INIT_ERR ); } diff --git a/PuReMD-GPU/src/bond_orders.cu b/PuReMD-GPU/src/bond_orders.cu index 57f5baac..d39f1b41 100644 --- a/PuReMD-GPU/src/bond_orders.cu +++ b/PuReMD-GPU/src/bond_orders.cu @@ -19,12 +19,12 @@ ----------------------------------------------------------------------*/ #include "bond_orders.h" + #include "list.h" #include "lookup.h" #include "print_utils.h" #include "vector.h" - #include "index_utils.h" #include "cuda_utils.h" #include "cuda_helpers.h" diff --git a/PuReMD-GPU/src/bond_orders.h b/PuReMD-GPU/src/bond_orders.h index 19fea911..3987a5c6 100644 --- a/PuReMD-GPU/src/bond_orders.h +++ b/PuReMD-GPU/src/bond_orders.h @@ -21,8 +21,10 @@ #ifndef __BOND_ORDERS_H_ #define __BOND_ORDERS_H_ + #include "mytypes.h" + typedef struct { real C1dbo, C2dbo, C3dbo; @@ -31,6 +33,7 @@ typedef struct real C1dDelta, C2dDelta, C3dDelta; } dbond_coefficients; + #ifdef TEST_FORCES void Get_dBO( reax_system*, list**, int, int, real, rvec* ); void Get_dBOpinpi2( reax_system*, list**, int, int, real, real, rvec*, rvec* ); diff --git a/PuReMD-GPU/src/cuda_copy.cu b/PuReMD-GPU/src/cuda_copy.cu index 2db79e37..a29d38d3 100644 --- a/PuReMD-GPU/src/cuda_copy.cu +++ b/PuReMD-GPU/src/cuda_copy.cu @@ -18,12 +18,11 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ - - - #include "cuda_copy.h" + #include "vector.h" + void Sync_Host_Device (grid *host, grid *dev, enum cudaMemcpyKind dir) { copy_host_device (host->top, dev->top, diff --git a/PuReMD-GPU/src/cuda_copy.h b/PuReMD-GPU/src/cuda_copy.h index 561a49fb..6f2509e8 100644 --- a/PuReMD-GPU/src/cuda_copy.h +++ b/PuReMD-GPU/src/cuda_copy.h @@ -24,7 +24,7 @@ #define __CUDA_COPY_H_ #include "cuda_utils.h" -#include "cuda.h" + #include "mytypes.h" #include "list.h" diff --git a/PuReMD-GPU/src/cuda_helpers.h b/PuReMD-GPU/src/cuda_helpers.h index e021cf84..adbf73ce 100644 --- a/PuReMD-GPU/src/cuda_helpers.h +++ b/PuReMD-GPU/src/cuda_helpers.h @@ -21,9 +21,11 @@ #ifndef __CUDA_HELPERS__ #define __CUDA_HELPERS__ + #include "mytypes.h" -DEVICE inline int cuda_strcmp (char *a, char *b, int len) + +DEVICE static inline int cuda_strcmp(char *a, char *b, int len) { char *src, *dst; @@ -32,20 +34,26 @@ DEVICE inline int cuda_strcmp (char *a, char *b, int len) for (int i = 0; i < len; i++) { - if (*dst == '\0') + { return 0; + } - if (*src != *dst) return 1; + if (*src != *dst) + { + return 1; + } - src ++; - dst ++; + src++; + dst++; } return 0; } -DEVICE inline real atomicAdd(real* address, real val) + +#if __CUDA_ARCH__ < 600 +DEVICE static inline real atomicAdd(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; @@ -59,19 +67,23 @@ DEVICE inline real atomicAdd(real* address, real val) while (assumed != old); return __longlong_as_double(old); } +#endif + -DEVICE inline void atomic_rvecAdd( rvec ret, rvec v ) +DEVICE static inline void atomic_rvecAdd( rvec ret, rvec v ) { - atomicAdd ( &ret[0], v[0] ); - atomicAdd ( &ret[1], v[1] ); - atomicAdd ( &ret[2], v[2] ); + atomicAdd( (double*)&ret[0], (double)v[0] ); + atomicAdd( (double*)&ret[1], (double)v[1] ); + atomicAdd( (double*)&ret[2], (double)v[2] ); } -DEVICE inline void atomic_rvecScaledAdd( rvec ret, real c, rvec v ) + +DEVICE static inline void atomic_rvecScaledAdd( rvec ret, real c, rvec v ) { - atomicAdd ( &ret[0], c * v[0] ); - atomicAdd ( &ret[1], c * v[1] ); - atomicAdd ( &ret[2], c * v[2] ); + atomicAdd( (double*)&ret[0], (double)(c * v[0]) ); + atomicAdd( (double*)&ret[1], (double)(c * v[1]) ); + atomicAdd( (double*)&ret[2], (double)(c * v[2]) ); } + #endif diff --git a/PuReMD-GPU/src/cuda_init.cu b/PuReMD-GPU/src/cuda_init.cu index 09515038..6bad5e22 100644 --- a/PuReMD-GPU/src/cuda_init.cu +++ b/PuReMD-GPU/src/cuda_init.cu @@ -18,15 +18,15 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ - - - #include "cuda_init.h" + #include "cuda_utils.h" #include "cuda_copy.h" + #include "vector.h" #include "reset_utils.h" + void Cuda_Init_System ( reax_system *system) { cuda_malloc ( (void **) &system->d_atoms, system->N * REAX_ATOM_SIZE, 1, RES_SYSTEM_ATOMS ); @@ -55,17 +55,20 @@ void Cuda_Init_System ( reax_system *system) system->reaxprm.d_gp.vdw_type = 0; } + void Cuda_Init_Control (control_params *control) { cuda_malloc ((void **)&control->d_control, CONTROL_PARAMS_SIZE, 1, RES_CONTROL_PARAMS ); copy_host_device (control, control->d_control, CONTROL_PARAMS_SIZE, cudaMemcpyHostToDevice, RES_CONTROL_PARAMS ); } + void Cuda_Init_Simulation_Data (simulation_data *data) { cuda_malloc ((void **) &(data->d_simulation_data), SIMULATION_DATA_SIZE, 1, RES_SIMULATION_DATA ); } + GLOBAL void Initialize_Grid (ivec *nbrs, rvec *nbrs_cp, int N) { int index = blockIdx.x * blockDim.x + threadIdx.x; @@ -80,6 +83,7 @@ GLOBAL void Initialize_Grid (ivec *nbrs, rvec *nbrs_cp, int N) nbrs_cp[index][2] = -1; } + void Cuda_Init_Grid (grid *host, grid *dev) { int total = host->ncell[0] * host->ncell[1] * host->ncell[2]; @@ -112,6 +116,7 @@ void Cuda_Init_Grid (grid *host, grid *dev) cudaCheckError (); } + GLOBAL void Init_Workspace_Arrays (single_body_parameters *sbp, reax_atom *atoms, static_storage workspace, int N) { @@ -127,6 +132,7 @@ GLOBAL void Init_Workspace_Arrays (single_body_parameters *sbp, reax_atom *atoms workspace.b[i+N] = -1.0; } + GLOBAL void Init_Map_Serials (int *input, int N) { int i = blockIdx.x * blockDim.x + threadIdx.x; @@ -135,6 +141,7 @@ GLOBAL void Init_Map_Serials (int *input, int N) input[i] = -1; } + void Cuda_Init_Workspace_System (reax_system *system, static_storage *workspace ) { int blocks, block_size = BLOCK_SIZE; @@ -262,6 +269,7 @@ void Cuda_Init_Workspace( reax_system *system, control_params *control, Cuda_Reset_Workspace( system, workspace ); } + void Cuda_Init_Workspace_Device ( static_storage *workspace ) { workspace->realloc.estimate_nbrs = -1; @@ -273,6 +281,7 @@ void Cuda_Init_Workspace_Device ( static_storage *workspace ) workspace->realloc.gcell_atoms = -1; } + void Cuda_Init_Sparse_Matrix (sparse_matrix *matrix, int entries, int N) { cuda_malloc ((void **) &matrix->start, INT_SIZE * (N + 1), 1, RES_SPARSE_MATRIX_INDEX ); @@ -284,6 +293,7 @@ void Cuda_Init_Sparse_Matrix (sparse_matrix *matrix, int entries, int N) } + void Cuda_Init_Scratch () { cuda_malloc ((void **) &scratch, SCRATCH_SIZE, 0, RES_SCRATCH ); diff --git a/PuReMD-GPU/src/cuda_utils.cu b/PuReMD-GPU/src/cuda_utils.cu index 2c632c05..fc8b132a 100644 --- a/PuReMD-GPU/src/cuda_utils.cu +++ b/PuReMD-GPU/src/cuda_utils.cu @@ -18,11 +18,8 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ - - - #include "cuda_utils.h" -#include "mytypes.h" + void cuda_malloc (void **ptr, int size, int memset, int err_code) { diff --git a/PuReMD-GPU/src/cuda_utils.h b/PuReMD-GPU/src/cuda_utils.h index ba793e40..363b7c14 100644 --- a/PuReMD-GPU/src/cuda_utils.h +++ b/PuReMD-GPU/src/cuda_utils.h @@ -21,14 +21,14 @@ #ifndef __CUDA_UTILS_H_ #define __CUDA_UTILS_H_ -#include "cuda.h" -#include "cublas_v2.h" -#include "cusparse_v2.h" -#include "stdlib.h" -#include "stdio.h" +#include "mytypes.h" + +#include <stdlib.h> +#include <stdio.h> #define IDX2C(i,j,ld) (((j)*(ld))+(i)) + static __inline__ void modify (cublasHandle_t handle, float *m, int ldm, int n, int p, int q, float alpha, float beta) { cublasSscal (handle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); diff --git a/PuReMD-GPU/src/four_body_interactions.cu b/PuReMD-GPU/src/four_body_interactions.cu index d7bf757e..af134bab 100644 --- a/PuReMD-GPU/src/four_body_interactions.cu +++ b/PuReMD-GPU/src/four_body_interactions.cu @@ -993,33 +993,23 @@ GLOBAL void Four_Body_Interactions ( reax_atom *atoms, if( control->ensemble == NVE || control->ensemble == NVT ||control->ensemble == bNVT) { /* dcos_theta_ijk */ //PERFORMANCE IMPACT - atomic_rvecScaledAdd (pbond_ij->i_f, - CEtors7 + CEconj4, p_ijk->dcos_dk ); - rvec_ScaledAdd( atoms[j].f, - CEtors7 + CEconj4, p_ijk->dcos_dj ); - atomic_rvecScaledAdd( pbond_jk->k_f, - CEtors7 + CEconj4, p_ijk->dcos_di ); + atomic_rvecScaledAdd( pbond_ij->i_f, CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_ScaledAdd( atoms[j].f, CEtors7 + CEconj4, p_ijk->dcos_dj ); + atomic_rvecScaledAdd( pbond_jk->k_f, CEtors7 + CEconj4, p_ijk->dcos_di ); /* dcos_theta_jkl */ //PERFORMANCE IMPACT - rvec_ScaledAdd( atoms[j].f, - CEtors8 + CEconj5, p_jkl->dcos_di ); - atomic_rvecScaledAdd( pbond_jk->i_f, - CEtors8 + CEconj5, p_jkl->dcos_dj ); - atomic_rvecScaledAdd( pbond_kl->k_f, - CEtors8 + CEconj5, p_jkl->dcos_dk ); + rvec_ScaledAdd( atoms[j].f, CEtors8 + CEconj5, p_jkl->dcos_di ); + atomic_rvecScaledAdd( pbond_jk->i_f, CEtors8 + CEconj5, p_jkl->dcos_dj ); + atomic_rvecScaledAdd( pbond_kl->k_f, CEtors8 + CEconj5, p_jkl->dcos_dk ); /* dcos_omega */ //PERFORMANCE IMPACT - atomic_rvecScaledAdd( pbond_ij->i_f, - CEtors9 + CEconj6, dcos_omega_di ); - rvec_ScaledAdd( atoms[j].f, - CEtors9 + CEconj6, dcos_omega_dj ); - atomic_rvecScaledAdd( pbond_jk->i_f, - CEtors9 + CEconj6, dcos_omega_dk ); - atomic_rvecScaledAdd( pbond_kl->k_f, - CEtors9 + CEconj6, dcos_omega_dl ); + atomic_rvecScaledAdd( pbond_ij->i_f, CEtors9 + CEconj6, dcos_omega_di ); + rvec_ScaledAdd( atoms[j].f, CEtors9 + CEconj6, dcos_omega_dj ); + atomic_rvecScaledAdd( pbond_jk->i_f, CEtors9 + CEconj6, dcos_omega_dk ); + atomic_rvecScaledAdd( pbond_kl->k_f, CEtors9 + CEconj6, dcos_omega_dl ); } else { ivec_Sum(rel_box_jl, pbond_jk->rel_box, pbond_kl->rel_box); @@ -1033,8 +1023,7 @@ GLOBAL void Four_Body_Interactions ( reax_atom *atoms, //rvec_Add (sh_press [threadIdx.x], ext_press); //PERFORMANCE IMPACT - rvec_ScaledAdd( atoms[j].f, - CEtors7 + CEconj4, p_ijk->dcos_dj ); + rvec_ScaledAdd( atoms[j].f, CEtors7 + CEconj4, p_ijk->dcos_dj ); rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_di ); //PERFORMANCE IMPACT @@ -1047,8 +1036,7 @@ GLOBAL void Four_Body_Interactions ( reax_atom *atoms, /* dcos_theta_jkl */ //PERFORMANCE IMPACT - rvec_ScaledAdd( atoms[j].f, - CEtors8 + CEconj5, p_jkl->dcos_di ); + rvec_ScaledAdd( atoms[j].f, CEtors8 + CEconj5, p_jkl->dcos_di ); rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dj ); //PERFORMANCE IMPACT diff --git a/PuReMD-GPU/src/init_md.cu b/PuReMD-GPU/src/init_md.cu index e1912d3c..87b84a76 100644 --- a/PuReMD-GPU/src/init_md.cu +++ b/PuReMD-GPU/src/init_md.cu @@ -967,7 +967,7 @@ void compare_far_neighbors (int *test, int *start, int *end, far_neighbor_data * fprintf (stderr, "Serial NumNeighbors ---> %d \n", num_nbrs); #endif - if( !Make_List(system->N, num_nbrs, TYP_FAR_NEIGHBOR, (*lists)+FAR_NBRS) ) { + if( !Make_List(system->N, num_nbrs, TYP_FAR_NEIGHBOR, (*lists)+FAR_NBRS), TYP_HOST ) { fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n"); exit( INIT_ERR ); } @@ -1036,7 +1036,7 @@ void compare_far_neighbors (int *test, int *start, int *end, far_neighbor_data * #endif /* 3bodies list */ - if(!Make_List(num_bonds, num_3body, TYP_THREE_BODY, (*lists)+THREE_BODIES)) { + if(!Make_List(num_bonds, num_3body, TYP_THREE_BODY, (*lists)+THREE_BODIES), TYP_HOST) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); exit( INIT_ERR ); } @@ -1046,12 +1046,12 @@ void compare_far_neighbors (int *test, int *start, int *end, far_neighbor_data * num_3body * sizeof(three_body_interaction_data) / (1024*1024) ); #endif #ifdef TEST_FORCES - if(!Make_List( system->N, num_bonds * 8, TYP_DDELTA, (*lists) + DDELTA )) { + if(!Make_List( system->N, num_bonds * 8, TYP_DDELTA, (*lists) + DDELTA, TYP_HOST )) { fprintf( stderr, "Problem in initializing dDelta list. Terminating!\n" ); exit( INIT_ERR ); } - if( !Make_List( num_bonds, num_bonds*MAX_BONDS*3, TYP_DBO, (*lists)+DBO ) ) { + if( !Make_List( num_bonds, num_bonds*MAX_BONDS*3, TYP_DBO, (*lists)+DBO, TYP_HOST ) ) { fprintf( stderr, "Problem in initializing dBO list. Terminating!\n" ); exit( INIT_ERR ); } diff --git a/PuReMD-GPU/src/list.h b/PuReMD-GPU/src/list.h index f341c2e2..66456acb 100644 --- a/PuReMD-GPU/src/list.h +++ b/PuReMD-GPU/src/list.h @@ -23,8 +23,8 @@ #include "mytypes.h" -HOST char Make_List( int, int, int, list* , int proc = TYP_HOST); -HOST void Delete_List( list* , int proc = TYP_HOST); +HOST char Make_List( int, int, int, list* , int ); +HOST void Delete_List( list* , int ); inline HOST_DEVICE int Num_Entries(int i, list* l) diff --git a/PuReMD-GPU/src/mytypes.h b/PuReMD-GPU/src/mytypes.h index c7d42ee9..91b9438d 100644 --- a/PuReMD-GPU/src/mytypes.h +++ b/PuReMD-GPU/src/mytypes.h @@ -18,8 +18,34 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#ifndef __MYTYPES_H_ -#define __MYTYPES_H_ +#if !(defined(__MYTYPES_H_) || defined(__CUDA_MYTYPES_H_)) + +#ifdef __CUDACC__ + #ifndef __CUDA_MYTYPES_H_ + #define __CUDA_MYTYPES_H_ + #define HOST __host__ + #define DEVICE __device__ + #define GLOBAL __global__ + #define HOST_DEVICE __host__ __device__ + + #include <cuda.h> + #include <cublas_v2.h> + #include <cusparse_v2.h> + #endif +#else + #ifndef __MYTYPES_H_ + #define __MYTYPES_H_ + #define HOST + #define DEVICE + #define GLOBAL + #define HOST_DEVICE + #endif +#endif + +#if (defined(HAVE_CONFIG_H) && !defined(__CONFIG_H_)) + #define __CONFIG_H_ + #include "config.h" +#endif #include "math.h" //#include "random.h" @@ -30,29 +56,12 @@ #include "time.h" #include "zlib.h" - //#define DEBUG_FOCUS //#define TEST_FORCES //#define TEST_ENERGY //#define REORDER_ATOMS // turns on nbrgen opt by re-ordering atoms //#define LGJ -#ifdef __USE_GPU__ - -#include "cublas_v2.h" -#include "cusparse_v2.h" - -#define HOST __host__ -#define DEVICE __device__ -#define GLOBAL __global__ -#define HOST_DEVICE __host__ __device__ -#else -#define HOST -#define DEVICE -#define GLOBAL -#define HOST_DEVICE -#endif - #define EXP exp #define SQRT sqrt #define POW pow @@ -140,10 +149,10 @@ #define RES_GRID_MARK 0x03 #define RES_GRID_START 0x04 #define RES_GRID_END 0x05 -#define RES_GRID_NBRS 0x06 -#define RES_GRID_NBRS_CP 0x07 +#define RES_GRID_NBRS 0x06 +#define RES_GRID_NBRS_CP 0x07 -#define RES_SYSTEM_ATOMS 0x10 +#define RES_SYSTEM_ATOMS 0x10 #define RES_SYSTEM_SIMULATION_BOX 0x11 #define RES_REAX_INT_SBP 0x20 @@ -154,58 +163,58 @@ #define RES_SIMULATION_DATA 0x30 -#define RES_STORAGE 0x401 -#define RES_STORAGE_HBOND_INDEX 0x402 -#define RES_STORAGE_TOTAL_BOND_ORDER 0x403 -#define RES_STORAGE_DELTAP 0x404 -#define RES_STORAGE_DELTAP_BOC 0x404 -#define RES_STORAGE_DDELTAP_SELF 0x405 -#define RES_STORAGE_DELTA 0x406 -#define RES_STORAGE_DELTA_LP 0x407 -#define RES_STORAGE_DELTA_LP_TEMP 0x408 -#define RES_STORAGE_DDELTA_LP 0x409 -#define RES_STORAGE_DDELTA_LP_TEMP 0x40A -#define RES_STORAGE_DELTA_E 0x40B -#define RES_STORAGE_DELTA_BOC 0x40C -#define RES_STORAGE_NL 0x40D -#define RES_STORAGE_NLP_TEMP 0x40E -#define RES_STORAGE_CLP 0x40F -#define RES_STORAGE_CDDELTA 0x410 -#define RES_STORAGE_VLPEX 0x411 -#define RES_STORAGE_DROPTOL 0x412 -#define RES_STORAGE_W 0x413 -#define RES_STORAGE_HDIA_INV 0x414 -#define RES_STORAGE_B 0x415 -#define RES_STORAGE_B_S 0x416 -#define RES_STORAGE_B_T 0x417 -#define RES_STORAGE_B_PRC 0x418 -#define RES_STORAGE_B_PRM 0x419 -#define RES_STORAGE_S_T 0x41A -#define RES_STORAGE_S 0x41B -#define RES_STORAGE_T 0x41C -#define RES_STORAGE_Y 0x41D -#define RES_STORAGE_Z 0x41E -#define RES_STORAGE_G 0x41F -#define RES_STORAGE_HS 0x420 -#define RES_STORAGE_HC 0x421 -#define RES_STORAGE_RN 0x422 -#define RES_STORAGE_V 0x423 -#define RES_STORAGE_H 0x424 -#define RES_STORAGE_R 0x425 -#define RES_STORAGE_D 0x426 -#define RES_STORAGE_Q 0x427 -#define RES_STORAGE_P 0x428 -#define RES_STORAGE_A 0x429 -#define RES_STORAGE_F_OLD 0x42A -#define RES_STORAGE_V_CONST 0x42B -#define RES_STORAGE_MARK 0x42C -#define RES_STORAGE_OLD_MARK 0x42D -#define RES_STORAGE_X_OLD 0x42E -#define RES_STORAGE_NLP 0x42F -#define RES_STORAGE_MAP_SERIALS 0x430 -#define RES_STORAGE_RESTRICTED 0x431 -#define RES_STORAGE_RESTRICTED_LIST 0x432 -#define RES_STORAGE_ORIG_ID 0x433 +#define RES_STORAGE 0x401 +#define RES_STORAGE_HBOND_INDEX 0x402 +#define RES_STORAGE_TOTAL_BOND_ORDER 0x403 +#define RES_STORAGE_DELTAP 0x404 +#define RES_STORAGE_DELTAP_BOC 0x404 +#define RES_STORAGE_DDELTAP_SELF 0x405 +#define RES_STORAGE_DELTA 0x406 +#define RES_STORAGE_DELTA_LP 0x407 +#define RES_STORAGE_DELTA_LP_TEMP 0x408 +#define RES_STORAGE_DDELTA_LP 0x409 +#define RES_STORAGE_DDELTA_LP_TEMP 0x40A +#define RES_STORAGE_DELTA_E 0x40B +#define RES_STORAGE_DELTA_BOC 0x40C +#define RES_STORAGE_NL 0x40D +#define RES_STORAGE_NLP_TEMP 0x40E +#define RES_STORAGE_CLP 0x40F +#define RES_STORAGE_CDDELTA 0x410 +#define RES_STORAGE_VLPEX 0x411 +#define RES_STORAGE_DROPTOL 0x412 +#define RES_STORAGE_W 0x413 +#define RES_STORAGE_HDIA_INV 0x414 +#define RES_STORAGE_B 0x415 +#define RES_STORAGE_B_S 0x416 +#define RES_STORAGE_B_T 0x417 +#define RES_STORAGE_B_PRC 0x418 +#define RES_STORAGE_B_PRM 0x419 +#define RES_STORAGE_S_T 0x41A +#define RES_STORAGE_S 0x41B +#define RES_STORAGE_T 0x41C +#define RES_STORAGE_Y 0x41D +#define RES_STORAGE_Z 0x41E +#define RES_STORAGE_G 0x41F +#define RES_STORAGE_HS 0x420 +#define RES_STORAGE_HC 0x421 +#define RES_STORAGE_RN 0x422 +#define RES_STORAGE_V 0x423 +#define RES_STORAGE_H 0x424 +#define RES_STORAGE_R 0x425 +#define RES_STORAGE_D 0x426 +#define RES_STORAGE_Q 0x427 +#define RES_STORAGE_P 0x428 +#define RES_STORAGE_A 0x429 +#define RES_STORAGE_F_OLD 0x42A +#define RES_STORAGE_V_CONST 0x42B +#define RES_STORAGE_MARK 0x42C +#define RES_STORAGE_OLD_MARK 0x42D +#define RES_STORAGE_X_OLD 0x42E +#define RES_STORAGE_NLP 0x42F +#define RES_STORAGE_MAP_SERIALS 0x430 +#define RES_STORAGE_RESTRICTED 0x431 +#define RES_STORAGE_RESTRICTED_LIST 0x432 +#define RES_STORAGE_ORIG_ID 0x433 #define RES_CONTROL_PARAMS 0x50 @@ -224,7 +233,6 @@ #define RES_SCRATCH 0x90 - #define LIST_INDEX 0x00 #define LIST_END_INDEX 0x01 #define LIST_FAR_NEIGHBOR_DATA 0x10 @@ -288,7 +296,6 @@ #define MATVEC_THREADS_PER_ROW 32 - enum {TYP_HOST, TYP_DEVICE}; typedef double real; @@ -309,7 +316,6 @@ enum {WRITE_ASCII, WRITE_BINARY, RF_N}; enum {XYZ, PDB, BGF, ASCII_RESTART, BINARY_RESTART, GF_N}; - /* Global params mapping */ /* l[0] = p_boc1 @@ -352,7 +358,6 @@ l[36] = N/A l[37] = version number l[38] = p_coa3 */ - typedef struct { int n_global; @@ -361,7 +366,6 @@ typedef struct } global_parameters; - typedef struct { /* Line one in field file */ @@ -405,7 +409,6 @@ typedef struct } single_body_parameters; - /* Two Body Parameters */ typedef struct { @@ -435,7 +438,6 @@ typedef struct } two_body_parameters; - /* 3-body parameters */ typedef struct { @@ -458,7 +460,6 @@ typedef struct } three_body_header; - /* hydrogen-bond parameters */ typedef struct { @@ -466,7 +467,6 @@ typedef struct } hbond_parameters; - /* 4-body parameters */ typedef struct { @@ -560,7 +560,6 @@ typedef struct int *end; ivec *nbrs; rvec *nbrs_cp; - } grid; @@ -768,8 +767,6 @@ typedef struct reax_timing timing; //CUDA reax_timing d_timing; - - void *d_simulation_data; } simulation_data; @@ -837,6 +834,7 @@ typedef struct rvec dBO, dBOpi, dBOpi2; } dbond_data; + typedef struct { real BO, BO_s, BO_pi, BO_pi2; @@ -847,6 +845,7 @@ typedef struct rvec dBOp, dln_BOp_s, dln_BOp_pi, dln_BOp_pi2; } bond_order_data; + typedef struct { int nbr; @@ -886,6 +885,7 @@ typedef struct real val; } sparse_matrix_entry; + typedef struct { int n, m; @@ -914,6 +914,7 @@ typedef struct int gcell_atoms; } reallocate_data; + typedef struct { /* bond order related storage */ @@ -999,7 +1000,6 @@ typedef struct } list; - typedef struct { FILE *trj; @@ -1070,12 +1070,12 @@ typedef struct } LR_data; - typedef struct { real a, b, c, d; } cubic_spline_coef; + typedef struct { real xmin, xmax; @@ -1126,8 +1126,7 @@ typedef void (*get_far_neighbors_function)(rvec, rvec, simulation_box*, int*); -// CUDA structures -// +/* CUDA structures */ extern list *dev_lists; extern static_storage *dev_workspace; extern LR_lookup_table *d_LR; @@ -1138,7 +1137,7 @@ extern void *scratch; extern int BLOCKS, BLOCKS_POW_2, BLOCK_SIZE; extern int MATVEC_BLOCKS; -#ifdef __USE_GPU__ +#ifdef __CUDACC__ extern cublasStatus_t cublasStatus; extern cublasHandle_t cublasHandle; @@ -1148,5 +1147,4 @@ extern cusparseMatDescr_t matdescriptor; #endif - #endif diff --git a/PuReMD-GPU/src/reduction.cu b/PuReMD-GPU/src/reduction.cu index 48fb5efc..99eead52 100644 --- a/PuReMD-GPU/src/reduction.cu +++ b/PuReMD-GPU/src/reduction.cu @@ -19,8 +19,8 @@ ----------------------------------------------------------------------*/ #include "reduction.h" + #include "vector.h" -#include "mytypes.h" GLOBAL void Cuda_reduction(const real *input, real *per_block_results, const size_t n) diff --git a/PuReMD-GPU/src/single_body_interactions.h b/PuReMD-GPU/src/single_body_interactions.h index dd266797..b31d9431 100644 --- a/PuReMD-GPU/src/single_body_interactions.h +++ b/PuReMD-GPU/src/single_body_interactions.h @@ -21,7 +21,8 @@ #ifndef __SINGLE_BODY_INTERACTIONS_H_ #define __SINGLE_BODY_INTERACTIONS_H_ -#include <mytypes.h> +#include "mytypes.h" + void LonePair_OverUnder_Coordination_Energy( reax_system*, control_params*, simulation_data*, static_storage*, diff --git a/PuReMD-GPU/src/system_props.cu b/PuReMD-GPU/src/system_props.cu index 3ec39134..e0bb2626 100644 --- a/PuReMD-GPU/src/system_props.cu +++ b/PuReMD-GPU/src/system_props.cu @@ -100,7 +100,7 @@ void Cuda_Compute_Total_Mass( reax_system *system, simulation_data *data ) //cuda_malloc ((void **)&partial_sums, sizeof (real) * (blocks + 1), 1, 0); cuda_memset (partial_sums, 0, REAL_SIZE * (BLOCKS_POW_2 + 1), RES_SCRATCH ); - Compute_Total_Mass <<<BLOCKS_POW_2, BLOCK_SIZE, REAL_SIZE * BLOCK_SIZE >>> + k_Compute_Total_Mass <<<BLOCKS_POW_2, BLOCK_SIZE, REAL_SIZE * BLOCK_SIZE >>> (system->reaxprm.d_sbp, system->d_atoms, partial_sums, system->N); cudaThreadSynchronize (); cudaCheckError (); @@ -133,7 +133,7 @@ void Cuda_Compute_Total_Mass( reax_system *system, simulation_data *data ) } -GLOBAL void Compute_Total_Mass (single_body_parameters *sbp, reax_atom *atoms, real *per_block_results, size_t n) +GLOBAL void k_Compute_Total_Mass (single_body_parameters *sbp, reax_atom *atoms, real *per_block_results, size_t n) { extern __shared__ real sdata[]; unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/PuReMD-GPU/src/system_props.h b/PuReMD-GPU/src/system_props.h index d287992f..d152c254 100644 --- a/PuReMD-GPU/src/system_props.h +++ b/PuReMD-GPU/src/system_props.h @@ -21,7 +21,7 @@ #ifndef __SYSTEM_PROP_H_ #define __SYSTEM_PROP_H_ -#include <mytypes.h> +#include "mytypes.h" real Get_Time( ); @@ -43,7 +43,7 @@ void Compute_Pressure( reax_system*, simulation_data*, static_storage* ); void Compute_Pressure_Isotropic( reax_system*, control_params*, simulation_data*, output_controls* ); void prep_dev_system (reax_system *system); -GLOBAL void Compute_Total_Mass (single_body_parameters *, reax_atom *, real *, size_t ); +GLOBAL void k_Compute_Total_Mass (single_body_parameters *, reax_atom *, real *, size_t ); //GLOBAL void Compute_Kinetic_Energy (single_body_parameters *, reax_atom *, unsigned int , simulation_data *, real *); diff --git a/PuReMD-GPU/src/two_body_interactions.h b/PuReMD-GPU/src/two_body_interactions.h index 41483222..efc19927 100644 --- a/PuReMD-GPU/src/two_body_interactions.h +++ b/PuReMD-GPU/src/two_body_interactions.h @@ -21,9 +21,10 @@ #ifndef __TWO_BODY_INTERACTIONS_H_ #define __TWO_BODY_INTERACTIONS_H_ -#include <mytypes.h> +#include "mytypes.h" #include "index_utils.h" + void Bond_Energy( reax_system*, control_params*, simulation_data*, static_storage*, list**, output_controls* ); void vdW_Coulomb_Energy( reax_system*, control_params*, simulation_data*, -- GitLab