From 3de10e335f390b8af4c48c5a1516f2969bc59fd1 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@msu.edu> Date: Tue, 14 Jun 2016 12:55:11 -0400 Subject: [PATCH] Autotools: add support for compiling shared sources between NVCC and CC trajectories. --- puremd_rc_1003/PG-PuReMD/Makefile.am | 45 +++--- puremd_rc_1003/PG-PuReMD/configure.ac | 46 +++---- puremd_rc_1003/PG-PuReMD/src/allocate.c | 8 ++ puremd_rc_1003/PG-PuReMD/src/center_mass.h | 2 +- .../PG-PuReMD/src/cuda_bond_orders.h | 2 +- puremd_rc_1003/PG-PuReMD/src/cuda_bonds.cu | 2 +- puremd_rc_1003/PG-PuReMD/src/cuda_forces.cu | 2 +- .../PG-PuReMD/src/cuda_hydrogen_bonds.h | 2 +- .../PG-PuReMD/src/cuda_integrate.cu | 2 +- .../PG-PuReMD/src/cuda_linear_solvers.cu | 2 +- puremd_rc_1003/PG-PuReMD/src/cuda_lookup.cu | 2 +- .../PG-PuReMD/src/cuda_multi_body.cu | 2 +- .../PG-PuReMD/src/cuda_neighbors.cu | 2 +- .../PG-PuReMD/src/cuda_nonbonded.cu | 2 +- .../PG-PuReMD/src/cuda_post_evolve.cu | 2 +- puremd_rc_1003/PG-PuReMD/src/cuda_qEq.cu | 2 +- .../PG-PuReMD/src/cuda_reax_constants.h | 97 ------------- puremd_rc_1003/PG-PuReMD/src/cuda_shuffle.h | 2 +- .../PG-PuReMD/src/cuda_torsion_angles.h | 2 +- puremd_rc_1003/PG-PuReMD/src/cuda_utils.h | 2 +- .../PG-PuReMD/src/cuda_valence_angles.h | 2 +- puremd_rc_1003/PG-PuReMD/src/dev_list.h | 2 +- puremd_rc_1003/PG-PuReMD/src/dual_matvec.h | 2 +- puremd_rc_1003/PG-PuReMD/src/forces.c | 12 ++ puremd_rc_1003/PG-PuReMD/src/grid.c | 4 + puremd_rc_1003/PG-PuReMD/src/index_utils.h | 1 - puremd_rc_1003/PG-PuReMD/src/init_md.c | 20 ++- puremd_rc_1003/PG-PuReMD/src/integrate.c | 5 +- puremd_rc_1003/PG-PuReMD/src/linear_solvers.c | 10 ++ puremd_rc_1003/PG-PuReMD/src/lookup.c | 7 +- puremd_rc_1003/PG-PuReMD/src/matvec.h | 2 +- puremd_rc_1003/PG-PuReMD/src/parallelreax.c | 74 ++++++++-- puremd_rc_1003/PG-PuReMD/src/qEq.c | 8 ++ puremd_rc_1003/PG-PuReMD/src/reax_types.h | 130 +++++++++++++++--- puremd_rc_1003/PG-PuReMD/src/reduction.h | 2 +- puremd_rc_1003/PG-PuReMD/src/reset_tools.c | 8 +- puremd_rc_1003/PG-PuReMD/src/system_props.c | 28 ++-- puremd_rc_1003/PG-PuReMD/src/traj.c | 9 +- puremd_rc_1003/PG-PuReMD/src/validation.cu | 2 +- puremd_rc_1003/PG-PuReMD/src/vector.h | 2 - 40 files changed, 337 insertions(+), 221 deletions(-) delete mode 100644 puremd_rc_1003/PG-PuReMD/src/cuda_reax_constants.h diff --git a/puremd_rc_1003/PG-PuReMD/Makefile.am b/puremd_rc_1003/PG-PuReMD/Makefile.am index f39c038a..9c51de81 100644 --- a/puremd_rc_1003/PG-PuReMD/Makefile.am +++ b/puremd_rc_1003/PG-PuReMD/Makefile.am @@ -1,42 +1,36 @@ -if BUILD_CUDA +if USE_CUDA SUFFIXES = .cu include cuda.am endif -AM_CFLAGS = -Wall -O3 -funroll-loops -fstrict-aliasing -AM_CPPFLAGS = -I$(srcdir) -AM_LDFLAGS = - -if BUILD_CUDA -AM_CFLAGS += $(CUDA_CFLAGS) -AM_CPPFLAGS += $(CUDA_CFLAGS) +AM_CFLAGS = -Wall -O3 -funroll-loops -fstrict-aliasing $(MPI_CFLAGS) +AM_CPPFLAGS = +AM_LDFLAGS = $(MPI_LDFLAGS) +if USE_CUDA # default CUDA nvcc flags # Note: cc 13 for Tesla # Note: cc 20 for Fermi # Note: cc 30 for Kepler K10 # Note: cc 35 for Kepler K20 -NVCCFLAGS += -D__USE_GPU__ -use_fast_math -NVCCFLAGS += -gencode arch=compute_35,code=sm_35 --compiler-options "-O3 -funroll-loops -fstrict-aliasing" -O3 -NVCCFLAGS += --ptxas-options -v -NVCCFLAGS += --compiler-options -I$(srcdir) -NVCCFLAGS += --compiler-options -fno-strict-aliasing -NVCCFLAGS += --compiler-options -Wno-unused-function -NVCCFLAGS += --compiler-options -Wno-unused-parameter -NVCCFLAGS += --compiler-options "$(MPI_CFLAGS)" +NVCCFLAGS += -use_fast_math +NVCCFLAGS += -gencode arch=compute_35,code=sm_35 +NVCCFLAGS += --compiler-options "$(DEFS) -O3 -funroll-loops -fstrict-aliasing $(MPI_CFLAGS)" +#NVCCFLAGS += --ptxas-options -v endif bin_PROGRAMS = src/pg-puremd src_pg_puremd_SOURCES = src/allocate.c src/basic_comm.c src/ffield.c src/grid.c src/list.c \ src/lookup.c src/io_tools.c src/reset_tools.c src/restart.c src/random.c \ - src/tool_box.c src/traj.c src/vector.c src/analyze.c src/box.c src/system_props.c \ + src/tool_box.c src/traj.c src/analyze.c src/box.c src/system_props.c \ src/control.c src/comm_tools.c src/geo_tools.c src/linear_solvers.c src/neighbors.c \ src/qEq.c src/bond_orders.c src/multi_body.c src/bonds.c src/valence_angles.c \ src/hydrogen_bonds.c src/torsion_angles.c src/nonbonded.c src/forces.c \ src/integrate.c src/init_md.c src/parallelreax.c +src_pg_puremd_LDADD = src/vector.o -if BUILD_CUDA +if USE_CUDA src_pg_puremd_SOURCES += src/cuda_utils.cu src/dev_alloc.cu src/cuda_environment.cu \ src/dev_system_props.cu src/reduction.cu src/center_mass.cu \ src/cuda_copy.cu src/cuda_reset_tools.cu src/dev_list.cu \ @@ -49,12 +43,21 @@ src_pg_puremd_SOURCES += src/cuda_utils.cu src/dev_alloc.cu src/cuda_environment # dummy source to cause C linking nodist_EXTRA_src_pg_puremd_SOURCES = src/dummy.c + +src_vector.o: + $(AM_V_NVCC)$(NVCC) $(NVCCFLAGS) -maxrregcount=$(MAX_REG_COUNT) -o src/vector.o -c src/vector.c +else +src_vector.o: + $(AM_V_CC)$(CC) $(DEFS) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CCFLAGS) \ + -maxrregcount=$(MAX_REG_COUNT) -o src/vector.o -c src/vector.c endif -src_pg_puremd_CFLAGS = $(AM_CFLAGS) $(MPI_CFLAGS) -src_pg_puremd_LDFLAGS = $(AM_LDFLAGS) $(MPI_LDFLAGS) +src_pg_puremd_CFLAGS = $(AM_CFLAGS) $(CFLAGS) +src_pg_puremd_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS) +src_pg_puremd_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS) -if BUILD_CUDA +if USE_CUDA +src_pg_puremd_CFLAGS += $(CUDA_CFLAGS) src_pg_puremd_LDFLAGS += $(CUDA_LIBS) endif diff --git a/puremd_rc_1003/PG-PuReMD/configure.ac b/puremd_rc_1003/PG-PuReMD/configure.ac index 5a121aac..6f07b964 100644 --- a/puremd_rc_1003/PG-PuReMD/configure.ac +++ b/puremd_rc_1003/PG-PuReMD/configure.ac @@ -47,29 +47,6 @@ AC_FUNC_REALLOC AC_FUNC_STRTOD AC_CHECK_FUNCS([gettimeofday memset]) -# Check for CUDA support. -if test "x$BUILD_GPU" = "xyes"; then - CONFIGURE_HEADLINE([ CUDA support ]) - AX_CUDA - NVCCFLAGS= - if test "BUILD_DEBUG" = "true" - then - NVCCFLAGS+=" -g -G" - fi - - AC_DEFINE([HAVE_CUDA], [1], [Define to 1 if you have CUDA support enabled.]) -fi -AM_CONDITIONAL([BUILD_CUDA], [test "x${BUILD_GPU}" = "xyes"]) - -if test "BUILD_PROF" = "true" -then - if test "x$BUILD_GPU" = "xyes"; then - NVCCFLAGS+=" --compiler-options ${gprof_flags}" - else - CFLAGS+=" --compiler-options ${gprof_flags}" - fi -fi - # Check for MPI support. CONFIGURE_HEADLINE([ MPI compiler ]) ACX_MPI([], [AC_MSG_ERROR([could not find mpi library])]) @@ -132,6 +109,29 @@ fi AC_SUBST(MPI_CFLAGS) AC_SUBST(MPI_LDFLAGS) +# Check for CUDA support. +if test "x$BUILD_GPU" = "xyes"; then + CONFIGURE_HEADLINE([ CUDA support ]) + AX_CUDA + NVCCFLAGS= + if test "BUILD_DEBUG" = "true" + then + NVCCFLAGS+=" -g -G" + fi + AC_DEFINE([HAVE_CUDA], [1], [Define to 1 if you have CUDA support enabled.]) +else + AM_CONDITIONAL(USE_CUDA, test "x" = "xyes") +fi + +if test "BUILD_PROF" = "true" +then + if test "x$BUILD_GPU" = "xyes"; then + NVCCFLAGS+=" --compiler-options ${gprof_flags}" + else + CFLAGS+=" ${gprof_flags}" + fi +fi + AC_CONFIG_FILES([Makefile]) AC_OUTPUT diff --git a/puremd_rc_1003/PG-PuReMD/src/allocate.c b/puremd_rc_1003/PG-PuReMD/src/allocate.c index a87a259c..5b07ecd8 100644 --- a/puremd_rc_1003/PG-PuReMD/src/allocate.c +++ b/puremd_rc_1003/PG-PuReMD/src/allocate.c @@ -390,6 +390,7 @@ void Reallocate_Neighbor_List( reax_list *far_nbrs, int n, int num_intrs ) } } +#ifdef HAVE_CUDA void Cuda_Reallocate_Neighbor_List( reax_list *far_nbrs, int n, int num_intrs ) { Dev_Delete_List( far_nbrs); @@ -398,6 +399,7 @@ void Cuda_Reallocate_Neighbor_List( reax_list *far_nbrs, int n, int num_intrs ) MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); } } +#endif /* @@ -480,6 +482,7 @@ int Reallocate_HBonds_List( reax_system *system, reax_list *hbonds ) return total_hbonds; } +#ifdef HAVE_CUDA int Cuda_Reallocate_HBonds_List( int n, int num_intrs, reax_list *hbonds ) { Dev_Delete_List( hbonds); @@ -489,6 +492,7 @@ int Cuda_Reallocate_HBonds_List( int n, int num_intrs, reax_list *hbonds ) } return SUCCESS; } +#endif int Reallocate_Bonds_List( reax_system *system, reax_list *bonds, int *total_bonds, int *est_3body ) @@ -513,6 +517,7 @@ int Reallocate_Bonds_List( reax_system *system, reax_list *bonds, return SUCCESS; } +#ifdef HAVE_CUDA int Cuda_Reallocate_Bonds_List( int n, int num_intrs, reax_list *bonds) { Dev_Delete_List( bonds); @@ -523,6 +528,7 @@ int Cuda_Reallocate_Bonds_List( int n, int num_intrs, reax_list *bonds) return SUCCESS; } +#endif /************* grid *************/ @@ -1053,6 +1059,7 @@ void ReAllocate( reax_system *system, control_params *control, #endif } +#ifdef HAVE_CUDA void Cuda_ReAllocate( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, mpi_datatypes *mpi_data ) @@ -1370,3 +1377,4 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, MPI_Barrier( MPI_COMM_WORLD ); #endif } +#endif diff --git a/puremd_rc_1003/PG-PuReMD/src/center_mass.h b/puremd_rc_1003/PG-PuReMD/src/center_mass.h index 8a40d55c..cf0ef160 100644 --- a/puremd_rc_1003/PG-PuReMD/src/center_mass.h +++ b/puremd_rc_1003/PG-PuReMD/src/center_mass.h @@ -3,7 +3,7 @@ #define __CENTER_MASS_H__ #include "reax_types.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" CUDA_GLOBAL void center_of_mass_blocks (single_body_parameters *, reax_atom *, rvec *res_xcm, diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_bond_orders.h b/puremd_rc_1003/PG-PuReMD/src/cuda_bond_orders.h index cabca8c9..fbda3b76 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_bond_orders.h +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_bond_orders.h @@ -3,7 +3,7 @@ #define __CUDA_BOND_ORDERS_H__ #include "reax_types.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "vector.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_bonds.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_bonds.cu index 20b9d7ea..e3c7bdec 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_bonds.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_bonds.cu @@ -21,7 +21,7 @@ #include "reax_types.h" #include "index_utils.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "dev_list.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_forces.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_forces.cu index 7c47cb6f..4244a3b5 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_forces.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_forces.cu @@ -1,7 +1,7 @@ #include "cuda_forces.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "reax_types.h" #include "dev_list.h" #include "list.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_hydrogen_bonds.h b/puremd_rc_1003/PG-PuReMD/src/cuda_hydrogen_bonds.h index c8fcdfb4..8ec471be 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_hydrogen_bonds.h +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_hydrogen_bonds.h @@ -23,7 +23,7 @@ #define __HBONDS_H_ #include "reax_types.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" CUDA_GLOBAL void Cuda_Hydrogen_Bonds_HNbrs ( reax_atom *, storage , diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_integrate.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_integrate.cu index 92bffd71..6bcdee88 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_integrate.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_integrate.cu @@ -1,6 +1,6 @@ #include "cuda_integrate.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "vector.h" #include "cuda_utils.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_linear_solvers.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_linear_solvers.cu index c0803f91..0a2a1535 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_linear_solvers.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_linear_solvers.cu @@ -21,7 +21,7 @@ #include "cuda_linear_solvers.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "cuda_utils.h" #include "reduction.h" #include "dual_matvec.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_lookup.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_lookup.cu index 887ec249..e3a6d1e6 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_lookup.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_lookup.cu @@ -2,7 +2,7 @@ #include "cuda_lookup.h" #include "index_utils.h" #include "cuda_utils.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" void copy_LR_table_to_device (reax_system *system, control_params *control, int *aggregated) diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_multi_body.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_multi_body.cu index 0e6c48d3..174130aa 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_multi_body.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_multi_body.cu @@ -19,7 +19,7 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "cuda_multi_body.h" #include "index_utils.h" #include "cuda_helpers.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_neighbors.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_neighbors.cu index b0d2a967..4bf1d9fb 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_neighbors.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_neighbors.cu @@ -24,7 +24,7 @@ #include "vector.h" #include "index_utils.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "cuda_utils.h" #include "tool_box.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_nonbonded.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_nonbonded.cu index 9d0cda8f..c9948dfb 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_nonbonded.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_nonbonded.cu @@ -20,7 +20,7 @@ ----------------------------------------------------------------------*/ #include "cuda_nonbonded.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "index_utils.h" #include "dev_list.h" #include "vector.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_post_evolve.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_post_evolve.cu index ea7d8b7d..79a26fd8 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_post_evolve.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_post_evolve.cu @@ -1,6 +1,6 @@ #include "cuda_post_evolve.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "vector.h" #include "cuda_utils.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_qEq.cu b/puremd_rc_1003/PG-PuReMD/src/cuda_qEq.cu index d5a939bf..d4fed53c 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_qEq.cu +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_qEq.cu @@ -21,7 +21,7 @@ #include "cuda_qEq.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "reduction.h" #include "cuda_utils.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_reax_constants.h b/puremd_rc_1003/PG-PuReMD/src/cuda_reax_constants.h deleted file mode 100644 index 2532170f..00000000 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_reax_constants.h +++ /dev/null @@ -1,97 +0,0 @@ - -#ifndef __CUDA_REAX_CONSTANTS_H__ -#define __CUDA_REAX_CONSTANTS_H__ - -#include "cuda.h" - -#ifdef __USE_GPU__ -#define CUDA_HOST __host__ -#define CUDA_DEVICE __device__ -#define CUDA_GLOBAL __global__ -#define CUDA_HOST_DEVICE __host__ __device__ -#else -#define CUDA_HOST -#define CUDA_DEVICE -#define CUDA_GLOBAL -#define CUDA_HOST_DEVICE -#endif - -//BLOCK SIZES for kernels -// -#define HB_SYM_BLOCK_SIZE 64 -#define HB_KER_SYM_THREADS_PER_ATOM 16 -#define HB_POST_PROC_BLOCK_SIZE 256 -#define HB_POST_PROC_KER_THREADS_PER_ATOM 32 - -#if defined( __INIT_BLOCK_SIZE__) -#define DEF_BLOCK_SIZE __INIT_BLOCK_SIZE__ /* all utility functions and all */ -#define CUDA_BLOCK_SIZE __INIT_BLOCK_SIZE__ /* init forces */ -#define ST_BLOCK_SIZE __INIT_BLOCK_SIZE__ -#else -#define DEF_BLOCK_SIZE 256 /* all utility functions and all */ -#define CUDA_BLOCK_SIZE 256 /* init forces */ -#define ST_BLOCK_SIZE 256 -#endif - - -#if defined( __NBRS_THREADS_PER_ATOM__ ) -#define NB_KER_THREADS_PER_ATOM __NBRS_THREADS_PER_ATOM__ -#else -#define NB_KER_THREADS_PER_ATOM 16 -#endif - -#if defined( __NBRS_BLOCK_SIZE__) -#define NBRS_BLOCK_SIZE __NBRS_BLOCK_SIZE__ -#else -#define NBRS_BLOCK_SIZE 256 -#endif - - -#if defined( __HB_THREADS_PER_ATOM__) -#define HB_KER_THREADS_PER_ATOM __HB_THREADS_PER_ATOM__ -#else -#define HB_KER_THREADS_PER_ATOM 32 -#endif - -#if defined(__HB_BLOCK_SIZE__) -#define HB_BLOCK_SIZE __HB_BLOCK_SIZE__ -#else -#define HB_BLOCK_SIZE 256 -#endif - - -#if defined( __VDW_THREADS_PER_ATOM__ ) -#define VDW_KER_THREADS_PER_ATOM __VDW_THREADS_PER_ATOM__ -#else -#define VDW_KER_THREADS_PER_ATOM 32 -#endif - -#if defined( __VDW_BLOCK_SIZE__) -#define VDW_BLOCK_SIZE __VDW_BLOCK_SIZE__ -#else -#define VDW_BLOCK_SIZE 256 -#endif - - -#if defined( __MATVEC_THREADS_PER_ROW__ ) -#define MATVEC_KER_THREADS_PER_ROW __MATVEC_THREADS_PER_ROW__ -#else -#define MATVEC_KER_THREADS_PER_ROW 32 -#endif - - -#if defined( __MATVEC_BLOCK_SIZE__) -#define MATVEC_BLOCK_SIZE __MATVEC_BLOCK_SIZE__ -#else -#define MATVEC_BLOCK_SIZE 512 -#endif - - - - - - -//Validation -#define GPU_TOLERANCE 1e-5 - -#endif diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_shuffle.h b/puremd_rc_1003/PG-PuReMD/src/cuda_shuffle.h index d000864f..e0302f13 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_shuffle.h +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_shuffle.h @@ -23,7 +23,7 @@ #define __CUDA_SHUFFLE_H_ #include "reax_types.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #ifdef __cplusplus extern "C" { diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_torsion_angles.h b/puremd_rc_1003/PG-PuReMD/src/cuda_torsion_angles.h index aa074756..bb1542f8 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_torsion_angles.h +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_torsion_angles.h @@ -23,7 +23,7 @@ #define __TORSION_ANGLES_H_ #include "reax_types.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" CUDA_GLOBAL void Cuda_Torsion_Angles( reax_atom *, global_parameters , diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_utils.h b/puremd_rc_1003/PG-PuReMD/src/cuda_utils.h index 86975a3e..c2536b39 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_utils.h +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_utils.h @@ -6,7 +6,7 @@ #include "stdlib.h" #include "stdio.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #ifdef __cplusplus extern "C" { diff --git a/puremd_rc_1003/PG-PuReMD/src/cuda_valence_angles.h b/puremd_rc_1003/PG-PuReMD/src/cuda_valence_angles.h index 56a93103..fadfc398 100644 --- a/puremd_rc_1003/PG-PuReMD/src/cuda_valence_angles.h +++ b/puremd_rc_1003/PG-PuReMD/src/cuda_valence_angles.h @@ -23,7 +23,7 @@ #define __VALENCE_ANGLES_H_ #include "reax_types.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "vector.h" CUDA_GLOBAL void Cuda_Valence_Angles( reax_atom *, diff --git a/puremd_rc_1003/PG-PuReMD/src/dev_list.h b/puremd_rc_1003/PG-PuReMD/src/dev_list.h index 58523367..2d5c75c5 100644 --- a/puremd_rc_1003/PG-PuReMD/src/dev_list.h +++ b/puremd_rc_1003/PG-PuReMD/src/dev_list.h @@ -23,7 +23,7 @@ #define __LIST_H_ #include "reax_types.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #ifdef __cplusplus extern "C" { diff --git a/puremd_rc_1003/PG-PuReMD/src/dual_matvec.h b/puremd_rc_1003/PG-PuReMD/src/dual_matvec.h index 8298a972..c5b93948 100644 --- a/puremd_rc_1003/PG-PuReMD/src/dual_matvec.h +++ b/puremd_rc_1003/PG-PuReMD/src/dual_matvec.h @@ -4,7 +4,7 @@ #define __DUAL_MATVEC__H_ #include "reax_types.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" CUDA_GLOBAL void k_dual_matvec (sparse_matrix , rvec2 *, rvec2 *, int ); CUDA_GLOBAL void k_dual_matvec_csr(sparse_matrix , rvec2 *, rvec2 *, int ); diff --git a/puremd_rc_1003/PG-PuReMD/src/forces.c b/puremd_rc_1003/PG-PuReMD/src/forces.c index 887a1470..d0f720a5 100644 --- a/puremd_rc_1003/PG-PuReMD/src/forces.c +++ b/puremd_rc_1003/PG-PuReMD/src/forces.c @@ -20,13 +20,16 @@ ----------------------------------------------------------------------*/ #include "reax_types.h" + #include "index_utils.h" +#ifdef HAVE_CUDA #include "cuda_forces.h" #include "cuda_linear_solvers.h" #include "cuda_neighbors.h" //#include "cuda_bond_orders.h" #include "validation.h" +#endif #if defined(PURE_REAX) #include "forces.h" @@ -63,8 +66,10 @@ interaction_function Interaction_Functions[NUM_INTRS]; +#ifdef HAVE_CUDA void Cuda_Total_Forces (reax_system *, control_params *, simulation_data *, storage *); void Cuda_Total_Forces_PURE (reax_system *, storage *); +#endif void Dummy_Interaction( reax_system *system, control_params *control, simulation_data *data, storage *workspace, @@ -198,6 +203,8 @@ void Compute_Total_Force( reax_system *system, control_params *control, #endif } + +#ifdef HAVE_CUDA void Cuda_Compute_Total_Force( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, mpi_datatypes *mpi_data ) @@ -225,6 +232,7 @@ void Cuda_Compute_Total_Force( reax_system *system, control_params *control, #endif } +#endif void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, @@ -1057,6 +1065,8 @@ void Compute_Forces( reax_system *system, control_params *control, #endif } + +#ifdef HAVE_CUDA void Cuda_Compute_Forces( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control, @@ -1158,6 +1168,8 @@ void Cuda_Compute_Forces( reax_system *system, control_params *control, #endif } +#endif + int validate_device (reax_system *system, simulation_data *data, storage *workspace, reax_list **lists ) { diff --git a/puremd_rc_1003/PG-PuReMD/src/grid.c b/puremd_rc_1003/PG-PuReMD/src/grid.c index a25f9aec..1c756dcf 100644 --- a/puremd_rc_1003/PG-PuReMD/src/grid.c +++ b/puremd_rc_1003/PG-PuReMD/src/grid.c @@ -520,11 +520,15 @@ void Bin_My_Atoms( reax_system *system, reallocate_data *realloc ) //fprintf (stderr, "*********** grid max_atoms: %d \n", g->max_atoms ); if( max_atoms >= g->max_atoms * DANGER_ZONE ) { realloc->gcell_atoms = MAX( max_atoms*SAFE_ZONE, MIN_GCELL_POPL ); +#ifdef HAVE_CUDA dev_workspace->realloc.gcell_atoms = MAX( max_atoms*SAFE_ZONE, MIN_GCELL_POPL ); +#endif } else { realloc->gcell_atoms = -1; +#ifdef HAVE_CUDA dev_workspace->realloc.gcell_atoms = -1; +#endif } } diff --git a/puremd_rc_1003/PG-PuReMD/src/index_utils.h b/puremd_rc_1003/PG-PuReMD/src/index_utils.h index 532473b2..b4d0d23b 100644 --- a/puremd_rc_1003/PG-PuReMD/src/index_utils.h +++ b/puremd_rc_1003/PG-PuReMD/src/index_utils.h @@ -1,7 +1,6 @@ #ifndef __INDEX_UTILS_H_ #define __INDEX_UTILS_H_ -#include "cuda_reax_constants.h" #include "reax_types.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/init_md.c b/puremd_rc_1003/PG-PuReMD/src/init_md.c index edd7bc6e..5324b4fa 100644 --- a/puremd_rc_1003/PG-PuReMD/src/init_md.c +++ b/puremd_rc_1003/PG-PuReMD/src/init_md.c @@ -21,10 +21,12 @@ #include "reax_types.h" +#ifdef HAVE_CUDA #include "dev_alloc.h" #include "dev_list.h" #include "cuda_copy.h" #include "validation.h" +#endif #if defined(PURE_REAX) #include "init_md.h" @@ -187,6 +189,7 @@ int Init_System( reax_system *system, control_params *control, } +#ifdef HAVE_CUDA int Cuda_Init_System( reax_system *system, control_params *control, simulation_data *data, storage *workspace, mpi_datatypes *mpi_data, char *msg ) @@ -265,10 +268,7 @@ int Cuda_Init_System( reax_system *system, control_params *control, return SUCCESS; } - - - - +#endif /************************ initialize simulation data ************************/ @@ -363,6 +363,8 @@ int Init_Simulation_Data( reax_system *system, control_params *control, return SUCCESS; } + +#ifdef HAVE_CUDA int Cuda_Init_Simulation_Data( reax_system *system, control_params *control, simulation_data *data, char *msg ) { @@ -444,6 +446,7 @@ int Cuda_Init_Simulation_Data( reax_system *system, control_params *control, #endif return SUCCESS; } +#endif #elif defined(LAMMPS_REAX) @@ -543,6 +546,8 @@ int Init_Workspace( reax_system *system, control_params *control, return SUCCESS; } + +#ifdef HAVE_CUDA int Cuda_Init_Workspace( reax_system *system, control_params *control, storage *workspace, char *msg ) { @@ -561,6 +566,7 @@ int Cuda_Init_Workspace( reax_system *system, control_params *control, return SUCCESS; } +#endif /************** setup communication data structures **************/ @@ -805,6 +811,8 @@ int Init_Lists( reax_system *system, control_params *control, return SUCCESS; } + +#ifdef HAVE_CUDA int Cuda_Init_Lists( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, mpi_datatypes *mpi_data, char *msg ) @@ -1003,6 +1011,7 @@ int Cuda_Init_Lists( reax_system *system, control_params *control, return SUCCESS; } +#endif #if defined(PURE_REAX) @@ -1144,7 +1153,7 @@ void Pure_Initialize( reax_system *system, control_params *control, } - +#ifdef HAVE_CUDA void Cuda_Initialize( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control, @@ -1246,6 +1255,7 @@ void Cuda_Initialize( reax_system *system, control_params *control, fprintf( stderr, "p%d: Device Initialization Done \n", system->my_rank ); #endif } +#endif #elif defined(LAMMPS_REAX) diff --git a/puremd_rc_1003/PG-PuReMD/src/integrate.c b/puremd_rc_1003/PG-PuReMD/src/integrate.c index 084439d4..21b5d555 100644 --- a/puremd_rc_1003/PG-PuReMD/src/integrate.c +++ b/puremd_rc_1003/PG-PuReMD/src/integrate.c @@ -32,10 +32,12 @@ #include "tool_box.h" #include "vector.h" +#ifdef HAVE_CUDA #include "cuda_integrate.h" #include "cuda_copy.h" #include "cuda_neighbors.h" +#endif void Velocity_Verlet_NVE( reax_system* system, control_params* control, @@ -272,6 +274,7 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system, } +#ifdef HAVE_CUDA void Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* control, simulation_data *data, @@ -449,7 +452,7 @@ void Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, MPI_Barrier( MPI_COMM_WORLD ); #endif } - +#endif /* uses Berendsen-type coupling for both T and P. diff --git a/puremd_rc_1003/PG-PuReMD/src/linear_solvers.c b/puremd_rc_1003/PG-PuReMD/src/linear_solvers.c index e3fa2209..65c3398e 100644 --- a/puremd_rc_1003/PG-PuReMD/src/linear_solvers.c +++ b/puremd_rc_1003/PG-PuReMD/src/linear_solvers.c @@ -25,7 +25,9 @@ #include "tool_box.h" #include "vector.h" +#ifdef HAVE_CUDA #include "validation.h" +#endif #if defined(CG_PERFORMANCE) real t_start, t_elapsed, matvec_time, dot_time; @@ -88,9 +90,13 @@ int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, } #endif +#ifdef HAVE_CUDA check_zeros_host (x, system->N, "x"); +#endif Dist( system, mpi_data, x, mpi_data->mpi_rvec2, scale, rvec2_packer ); +#ifdef HAVE_CUDA check_zeros_host (x, system->N, "x"); +#endif dual_Sparse_MatVec( H, x, workspace->q2, N ); @@ -235,6 +241,7 @@ int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, +#ifdef HAVE_CUDA int Cuda_dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, rvec2 *b, real tol, rvec2 *x, mpi_datatypes* mpi_data, FILE *fout, simulation_data *data ) { @@ -522,6 +529,7 @@ int Cuda_dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, return (i+1) + matvecs; } +#endif void Sparse_MatVec( sparse_matrix *A, real *x, real *b, int N ) @@ -614,6 +622,7 @@ int CG( reax_system *system, storage *workspace, sparse_matrix *H, } +#ifdef HAVE_CUDA int Cuda_CG( reax_system *system, storage *workspace, sparse_matrix *H, real *b, real tol, real *x, mpi_datatypes* mpi_data, FILE *fout ) { @@ -720,6 +729,7 @@ int Cuda_CG( reax_system *system, storage *workspace, sparse_matrix *H, return i; } +#endif int CG_test( reax_system *system, storage *workspace, sparse_matrix *H, diff --git a/puremd_rc_1003/PG-PuReMD/src/lookup.c b/puremd_rc_1003/PG-PuReMD/src/lookup.c index 029f0a20..40393bf3 100644 --- a/puremd_rc_1003/PG-PuReMD/src/lookup.c +++ b/puremd_rc_1003/PG-PuReMD/src/lookup.c @@ -20,9 +20,12 @@ ----------------------------------------------------------------------*/ #include "reax_types.h" + #include "index_utils.h" +#ifdef HAVE_CUDA #include "cuda_lookup.h" +#endif #if defined(PURE_REAX) #include "lookup.h" @@ -294,14 +297,14 @@ int Init_Lookup_Tables( reax_system *system, control_params *control, free(fele); free(fCEclmb); - ////////////////////////////////////////////// +#ifdef HAVE_CUDA //copy the LR_Table to the device here. - t_start = Get_Time (); copy_LR_table_to_device (system, control, aggregated); t_end = Get_Timing_Info ( t_start ); fprintf (stderr, "Device copy of LR Lookup table: %f \n", t_end ); +#endif return 1; } diff --git a/puremd_rc_1003/PG-PuReMD/src/matvec.h b/puremd_rc_1003/PG-PuReMD/src/matvec.h index b904b6cc..680abea7 100644 --- a/puremd_rc_1003/PG-PuReMD/src/matvec.h +++ b/puremd_rc_1003/PG-PuReMD/src/matvec.h @@ -4,7 +4,7 @@ #define __MATVEC__H_ #include "reax_types.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" CUDA_GLOBAL void k_matvec (sparse_matrix , real *, real *, int ); CUDA_GLOBAL void k_matvec_csr(sparse_matrix , real *, real *, int ); diff --git a/puremd_rc_1003/PG-PuReMD/src/parallelreax.c b/puremd_rc_1003/PG-PuReMD/src/parallelreax.c index 4c73c9e6..e7c700b2 100644 --- a/puremd_rc_1003/PG-PuReMD/src/parallelreax.c +++ b/puremd_rc_1003/PG-PuReMD/src/parallelreax.c @@ -37,10 +37,12 @@ #include "traj.h" #include "vector.h" +#ifdef HAVE_CUDA #include "cuda_environment.h" #include "cuda_post_evolve.h" #include "validation.h" +#endif evolve_function Evolve; evolve_function Cuda_Evolve; @@ -121,6 +123,8 @@ void Post_Evolve( reax_system* system, control_params* control, Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D ); } + +#ifdef HAVE_CUDA void Cuda_Post_Evolve( reax_system* system, control_params* control, simulation_data* data, storage* workspace, reax_list** lists, output_controls *out_control, @@ -138,7 +142,10 @@ void Cuda_Post_Evolve( reax_system* system, control_params* control, /* compute kinetic energy of the system */ Cuda_Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D ); } +#endif + +#ifdef HAVE_CUDA void init_blocks (reax_system *system) { compute_blocks (&BLOCKS, &BLOCK_SIZE, system->n); @@ -154,6 +161,7 @@ void init_blocks (reax_system *system) //#endif } +#endif int main( int argc, char* argv[] ) @@ -170,9 +178,10 @@ int main( int argc, char* argv[] ) real t_begin, t_end; /* Remove this debug information later */ - #if defined(__CUDA_DEBUG_LOG__) +#ifdef HAVE_CUDA +#if defined(__CUDA_DEBUG_LOG__) fprintf (stderr, " Size of LR Lookup table %d \n", sizeof (LR_lookup_table) ); - #endif +#endif #if defined( __SM_35__) fprintf (stderr, " nbrs block size: %d \n", NBRS_BLOCK_SIZE); @@ -189,8 +198,7 @@ int main( int argc, char* argv[] ) fprintf (stderr, " General block size: %d \n", DEF_BLOCK_SIZE); #endif - - +#endif /* allocated main datastructures */ system = (reax_system *) smalloc( sizeof(reax_system), "system" ); @@ -213,6 +221,7 @@ int main( int argc, char* argv[] ) smalloc( sizeof(output_controls), "out_control" ); mpi_data = (mpi_datatypes *) smalloc( sizeof(mpi_datatypes), "mpi_data" ); +#ifdef HAVE_CUDA /* allocate the cuda auxiliary data structures */ dev_workspace = (storage *) smalloc( sizeof(storage), "dev_workspace" ); @@ -221,6 +230,7 @@ int main( int argc, char* argv[] ) dev_lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" ); dev_lists[i]->allocated = 0; } +#endif /* Initialize member variables */ system->init_thblist = FALSE; @@ -233,6 +243,7 @@ int main( int argc, char* argv[] ) system->wsize = control->nprocs; system->global_offset = (int *)scalloc(system->wsize+1,sizeof(int),"global_offset"); +#ifdef HAVE_CUDA /* setup the CUDA Device for this process can be on the same machine * or on a different machine, for now use the rank to compute the device * This will only work on a single machine with 2 GPUs*/ @@ -241,6 +252,7 @@ int main( int argc, char* argv[] ) //Cleanup_Cuda_Environment (); print_device_mem_usage (); //fprintf( stderr, "p%d: Total number of GPUs on this node -- %d\n", system->my_rank, my_device_id); +#endif /* read system description files */ Read_System( argv[1], argv[2], argv[3], system, control, @@ -250,51 +262,64 @@ int main( int argc, char* argv[] ) MPI_Barrier( MPI_COMM_WORLD ); #endif +#ifdef HAVE_CUDA /* init the blocks sizes for cuda kernels */ init_blocks (system); +#endif /* measure total simulation time after input is read */ if( system->my_rank == MASTER_NODE ) t_start = Get_Time( ); - /* initialize datastructures */ - //Initialize( system, control, data, workspace, lists, out_control, mpi_data ); + /* initialize datastructures */ +#ifdef HAVE_CUDA Cuda_Initialize( system, control, data, workspace, lists, out_control, mpi_data ); #if defined(__CUDA_DEBUG__) Pure_Initialize( system, control, data, workspace, lists, out_control, mpi_data ); #endif +#else + Initialize( system, control, data, workspace, lists, out_control, mpi_data ); +#endif +#ifdef HAVE_CUDA print_device_mem_usage (); /* init the blocks sizes for cuda kernels */ init_blocks (system); +#endif #if defined(DEBUG) fprintf( stderr, "p%d: initializated data structures\n", system->my_rank ); MPI_Barrier( MPI_COMM_WORLD ); #endif //END OF FIRST STEP - // // compute f_0 Comm_Atoms( system, control, data, workspace, lists, mpi_data, 1 ); +#ifdef HAVE_CUDA Sync_Atoms ( system ); Sync_Grid (&system->my_grid, &system->d_my_grid); init_blocks (system); - #if defined(__CUDA_DENUG_LOG__) +#if defined(__CUDA_DENUG_LOG__) fprintf( stderr, "p%d: Comm_Atoms synchronized \n", system->my_rank ); - #endif +#endif +#endif //Second step +#ifdef HAVE_CUDA Cuda_Reset ( system, control, data, workspace, lists ); #if defined(__CUDA_DEBUG__) Reset( system, control, data, workspace, lists ); #endif //fprintf( stderr, "p%d: Cuda_Reset done...\n", system->my_rank ); +#else + Reset( system, control, data, workspace, lists ); +#endif //Third Step +#ifdef HAVE_CUDA Cuda_Generate_Neighbor_Lists( system, data, workspace, lists ); #if defined(__CUDA_DEBUG__) Generate_Neighbor_Lists( system, data, workspace, lists ); @@ -302,9 +327,13 @@ print_device_mem_usage (); #if defined(__CUDA_DENUG_LOG__) fprintf (stderr, "p%d: Cuda_Generate_Neighbor_Lists done...\n", system->my_rank ); #endif +#else + Generate_Neighbor_Lists( system, data, workspace, lists ); +#endif //Fourth Step +#ifdef HAVE_CUDA #if defined(__CUDA_DEBUG__) fprintf (stderr, " Host Compute Forces begin.... \n"); Compute_Forces( system, control, data, workspace, @@ -315,8 +344,13 @@ print_device_mem_usage (); #if defined(__CUDA_DENUG_LOG__) fprintf (stderr, "p%d: Cuda_Compute_Forces done...\n", system->my_rank ); #endif +#else + Compute_Forces( system, control, data, workspace, + lists, out_control, mpi_data ); +#endif +#ifdef HAVE_CUDA #if defined (__CUDA_DEBUG__) Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D ); #endif @@ -324,7 +358,11 @@ print_device_mem_usage (); #if defined(__CUDA_DENUG_LOG__) fprintf (stderr, "p%d: Cuda_Compute_Kinetic_Energy done ... \n", system->my_rank); #endif +#else + Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D ); +#endif +#ifdef HAVE_CUDA #if defined(__CUDA_DEBUG__) validate_device (system, data, workspace, lists); #endif @@ -333,10 +371,16 @@ print_device_mem_usage (); Output_Results( system, control, data, lists, out_control, mpi_data ); //fprintf (stderr, "p%d: Output_Results done ... \n", system->my_rank); #endif +#else + Output_Results( system, control, data, lists, out_control, mpi_data ); + //fprintf (stderr, "p%d: Output_Results done ... \n", system->my_rank); +#endif +#ifdef HAVE_CUDA #if defined(DEBUG) fprintf( stderr, "p%d: computed forces at t0\n", system->my_rank ); MPI_Barrier( MPI_COMM_WORLD ); +#endif #endif // start the simulation @@ -346,6 +390,7 @@ print_device_mem_usage (); //t_begin = Get_Time (); +#ifdef HAVE_CUDA #if defined(__CUDA_DEBUG__) Evolve( system, control, data, workspace, lists, out_control, mpi_data ); #endif @@ -362,11 +407,18 @@ print_device_mem_usage (); //t_end = Get_Timing_Info (t_begin); //fprintf (stderr, " Post Evolve time: %f \n", t_end); +#else + Evolve( system, control, data, workspace, lists, out_control, mpi_data ); + Post_Evolve(system, control, data, workspace, lists, out_control, mpi_data); +#endif - +#ifdef HAVE_CUDA #if !defined(__CUDA_DEBUG__) Output_Results( system, control, data, lists, out_control, mpi_data ); #endif +#else + Output_Results( system, control, data, lists, out_control, mpi_data ); +#endif //Analysis(system, control, data, workspace, lists, out_control, mpi_data); @@ -385,9 +437,11 @@ print_device_mem_usage (); } +#ifdef HAVE_CUDA //vaildate the results in debug mode #if defined(__CUDA_DEBUG__) validate_device (system, data, workspace, lists); +#endif #endif /* end of the simulation, write total simulation time */ diff --git a/puremd_rc_1003/PG-PuReMD/src/qEq.c b/puremd_rc_1003/PG-PuReMD/src/qEq.c index d7324c9b..47d06c5a 100644 --- a/puremd_rc_1003/PG-PuReMD/src/qEq.c +++ b/puremd_rc_1003/PG-PuReMD/src/qEq.c @@ -26,10 +26,12 @@ #include "linear_solvers.h" #include "tool_box.h" +#ifdef HAVE_CUDA #include "cuda_qEq.h" #include "cuda_linear_solvers.h" #include "validation.h" +#endif int compare_matrix_entry(const void *v1, const void *v2) { @@ -347,6 +349,8 @@ void Calculate_Charges( reax_system *system, storage *workspace, free(q); } + +#ifdef HAVE_CUDA void Cuda_Calculate_Charges( reax_system *system, storage *workspace, mpi_datatypes *mpi_data ) { @@ -375,6 +379,7 @@ void Cuda_Calculate_Charges( reax_system *system, storage *workspace, cuda_charges_updateq ( system, q ); } +#endif void QEq( reax_system *system, control_params *control, simulation_data *data, @@ -432,6 +437,8 @@ void QEq( reax_system *system, control_params *control, simulation_data *data, #endif } + +#ifdef HAVE_CUDA void Cuda_QEq( reax_system *system, control_params *control, simulation_data *data, storage *workspace, output_controls *out_control, mpi_datatypes *mpi_data ) @@ -479,3 +486,4 @@ void Cuda_QEq( reax_system *system, control_params *control, simulation_data *da } #endif } +#endif diff --git a/puremd_rc_1003/PG-PuReMD/src/reax_types.h b/puremd_rc_1003/PG-PuReMD/src/reax_types.h index 55e789bd..d6a99c22 100644 --- a/puremd_rc_1003/PG-PuReMD/src/reax_types.h +++ b/puremd_rc_1003/PG-PuReMD/src/reax_types.h @@ -19,18 +19,48 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ +#if !(defined(__REAX_TYPES_H_) || defined(__CUDA_REAX_TYPES_H_)) + +#ifdef __CUDACC__ + +#ifndef __CUDA_REAX_TYPES_H_ +#define __CUDA_REAX_TYPES_H_ +#define CUDA_HOST __host__ +#define CUDA_DEVICE __device__ +#define CUDA_GLOBAL __global__ +#define CUDA_HOST_DEVICE __host__ __device__ +#endif + +#else + #ifndef __REAX_TYPES_H_ #define __REAX_TYPES_H_ +#define CUDA_HOST +#define CUDA_DEVICE +#define CUDA_GLOBAL +#define CUDA_HOST_DEVICE +#endif + +#endif -#include "ctype.h" -#include "math.h" -#include "mpi.h" -#include "stdio.h" -#include "stdlib.h" -#include "string.h" -#include "sys/time.h" -#include "time.h" -#include "zlib.h" +#if (defined(HAVE_CONFIG_H) && !defined(__CONFIG_H_)) +#define __CONFIG_H_ +#include "config.h" +#endif + +#include <ctype.h> +#include <math.h> +#include <mpi.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <time.h> +#include <zlib.h> + +#ifdef HAVE_CUDA +#include <cuda.h> +#endif #if defined(__IBMC__) #define inline __inline__ @@ -131,11 +161,81 @@ #define RESTART 30 /**************** RESOURCE CONSTANTS **********************/ +#ifdef HAVE_CUDA //#define CUDA_BLOCK_SIZE 256 #define SCRATCH_SIZE (1024 * 1024 * 20) #define HOST_SCRATCH_SIZE (1024 * 1024 * 20) #define RES_SCRATCH 0x90 +/* BLOCK SIZES for kernels */ +#define HB_SYM_BLOCK_SIZE 64 +#define HB_KER_SYM_THREADS_PER_ATOM 16 +#define HB_POST_PROC_BLOCK_SIZE 256 +#define HB_POST_PROC_KER_THREADS_PER_ATOM 32 + +#if defined( __INIT_BLOCK_SIZE__) +#define DEF_BLOCK_SIZE __INIT_BLOCK_SIZE__ /* all utility functions and all */ +#define CUDA_BLOCK_SIZE __INIT_BLOCK_SIZE__ /* init forces */ +#define ST_BLOCK_SIZE __INIT_BLOCK_SIZE__ +#else +#define DEF_BLOCK_SIZE 256 /* all utility functions and all */ +#define CUDA_BLOCK_SIZE 256 /* init forces */ +#define ST_BLOCK_SIZE 256 +#endif + +#if defined( __NBRS_THREADS_PER_ATOM__ ) +#define NB_KER_THREADS_PER_ATOM __NBRS_THREADS_PER_ATOM__ +#else +#define NB_KER_THREADS_PER_ATOM 16 +#endif + +#if defined( __NBRS_BLOCK_SIZE__) +#define NBRS_BLOCK_SIZE __NBRS_BLOCK_SIZE__ +#else +#define NBRS_BLOCK_SIZE 256 +#endif + +#if defined( __HB_THREADS_PER_ATOM__) +#define HB_KER_THREADS_PER_ATOM __HB_THREADS_PER_ATOM__ +#else +#define HB_KER_THREADS_PER_ATOM 32 +#endif + +#if defined(__HB_BLOCK_SIZE__) +#define HB_BLOCK_SIZE __HB_BLOCK_SIZE__ +#else +#define HB_BLOCK_SIZE 256 +#endif + +#if defined( __VDW_THREADS_PER_ATOM__ ) +#define VDW_KER_THREADS_PER_ATOM __VDW_THREADS_PER_ATOM__ +#else +#define VDW_KER_THREADS_PER_ATOM 32 +#endif + +#if defined( __VDW_BLOCK_SIZE__) +#define VDW_BLOCK_SIZE __VDW_BLOCK_SIZE__ +#else +#define VDW_BLOCK_SIZE 256 +#endif + +#if defined( __MATVEC_THREADS_PER_ROW__ ) +#define MATVEC_KER_THREADS_PER_ROW __MATVEC_THREADS_PER_ROW__ +#else +#define MATVEC_KER_THREADS_PER_ROW 32 +#endif + +#if defined( __MATVEC_BLOCK_SIZE__) +#define MATVEC_BLOCK_SIZE __MATVEC_BLOCK_SIZE__ +#else +#define MATVEC_BLOCK_SIZE 512 +#endif + +//Validation +#define GPU_TOLERANCE 1e-5 + +#endif + /******************* ENUMERATIONS *************************/ @@ -622,14 +722,13 @@ typedef struct reax_atom *my_atoms; reax_atom *d_my_atoms; - //CUDA +/*CUDA-specific*/ int max_sparse_entries; int init_thblist; int num_thbodies; int max_bonds; int max_hbonds; - } reax_system; @@ -846,7 +945,7 @@ typedef struct { int scl; far_neighbor_data *ptr; - //CUDA +/*CUDA-specific*/ int sym_index; rvec hb_f; } hbond_data; @@ -882,7 +981,7 @@ typedef struct { rvec dvec; bond_order_data bo_data; - //CUDA +/*CUDA-specific*/ real ae_CdDelta; real va_CdDelta; @@ -895,7 +994,6 @@ typedef struct { rvec hb_f; rvec tf_f; - } bond_data; @@ -1162,9 +1260,7 @@ typedef void (*unpacker) ( reax_system*, int, void*, int, neighbor_proc*, int ); typedef void (*dist_packer) (void*, mpi_out_data*); typedef void (*coll_unpacker) (void*, void*, mpi_out_data*); -//////////////////////////////// -//CUDA SPECIFIC DECLARATIONS -//////////////////////////////// +/*CUDA-specific*/ extern reax_list **dev_lists; extern storage *dev_workspace; extern storage *dev_storage; diff --git a/puremd_rc_1003/PG-PuReMD/src/reduction.h b/puremd_rc_1003/PG-PuReMD/src/reduction.h index 490bb725..5291703b 100644 --- a/puremd_rc_1003/PG-PuReMD/src/reduction.h +++ b/puremd_rc_1003/PG-PuReMD/src/reduction.h @@ -2,7 +2,7 @@ #ifndef __REDUCTION_H__ #define __REDUCTION_H__ -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "reax_types.h" #define INITIAL 0 diff --git a/puremd_rc_1003/PG-PuReMD/src/reset_tools.c b/puremd_rc_1003/PG-PuReMD/src/reset_tools.c index 9803c6f1..b31776e9 100644 --- a/puremd_rc_1003/PG-PuReMD/src/reset_tools.c +++ b/puremd_rc_1003/PG-PuReMD/src/reset_tools.c @@ -20,8 +20,13 @@ ----------------------------------------------------------------------*/ #include "reax_types.h" + #include "index_utils.h" + +#ifdef HAVE_CUDA #include "cuda_reset_tools.h" +#endif + #if defined(PURE_REAX) #include "reset_tools.h" #include "list.h" @@ -261,7 +266,7 @@ void Reset( reax_system *system, control_params *control, } - +#ifdef HAVE_CUDA void Cuda_Reset( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists ) { @@ -281,3 +286,4 @@ void Cuda_Reset( reax_system *system, control_params *control, MPI_Barrier( MPI_COMM_WORLD ); #endif } +#endif diff --git a/puremd_rc_1003/PG-PuReMD/src/system_props.c b/puremd_rc_1003/PG-PuReMD/src/system_props.c index f8e717f3..def8a879 100644 --- a/puremd_rc_1003/PG-PuReMD/src/system_props.c +++ b/puremd_rc_1003/PG-PuReMD/src/system_props.c @@ -20,7 +20,9 @@ ----------------------------------------------------------------------*/ #include "reax_types.h" +#ifdef HAVE_CUDA #include "dev_system_props.h" +#endif #if defined(PURE_REAX) #include "system_props.h" @@ -80,6 +82,8 @@ void Compute_Kinetic_Energy( reax_system* system, simulation_data* data, data->therm.T = ALMOST_ZERO; } + +#ifdef HAVE_CUDA void Cuda_Compute_Kinetic_Energy( reax_system* system, simulation_data* data, MPI_Comm comm ) { @@ -100,7 +104,7 @@ void Cuda_Compute_Kinetic_Energy( reax_system* system, simulation_data* data, if( fabs(data->therm.T) < ALMOST_ZERO ) data->therm.T = ALMOST_ZERO; } - +#endif void Compute_System_Energy( reax_system *system, simulation_data *data, @@ -111,8 +115,10 @@ void Compute_System_Energy( reax_system *system, simulation_data *data, //TODO remove this is an UGLY fix my_en [13] = data->my_en.e_kin; +#ifdef HAVE_CUDA //Cuda Wrapper here dev_sync_simulation_data ( data ); +#endif my_en[0] = data->my_en.e_bond; my_en[1] = data->my_en.e_ov; @@ -182,6 +188,8 @@ void Compute_Total_Mass( reax_system *system, simulation_data *data, data->inv_M = 1. / data->M; } + +#ifdef HAVE_CUDA void Cuda_Compute_Total_Mass( reax_system *system, simulation_data *data, MPI_Comm comm ) { @@ -195,6 +203,7 @@ void Cuda_Compute_Total_Mass( reax_system *system, simulation_data *data, data->inv_M = 1. / data->M; } +#endif void Compute_Center_of_Mass( reax_system *system, simulation_data *data, @@ -312,6 +321,7 @@ void Compute_Center_of_Mass( reax_system *system, simulation_data *data, } +#ifdef HAVE_CUDA void Cuda_Compute_Center_of_Mass( reax_system *system, simulation_data *data, mpi_datatypes *mpi_data, MPI_Comm comm ) { @@ -406,21 +416,7 @@ void Cuda_Compute_Center_of_Mass( reax_system *system, simulation_data *data, data->avcm[0], data->avcm[1], data->avcm[2] ); #endif } - - - - - - - - - - - - - - - +#endif /* IMPORTANT: This function assumes that current kinetic energy diff --git a/puremd_rc_1003/PG-PuReMD/src/traj.c b/puremd_rc_1003/PG-PuReMD/src/traj.c index d31ac1f4..fd982773 100644 --- a/puremd_rc_1003/PG-PuReMD/src/traj.c +++ b/puremd_rc_1003/PG-PuReMD/src/traj.c @@ -933,24 +933,27 @@ int Append_Frame( reax_system *system, control_params *control, #endif Write_Frame_Header( system, control, data, out_control, mpi_data ); - //CUDA Wrappers here if( out_control->write_atoms ) { //Sync atoms here +#ifdef HAVE_CUDA Output_Sync_Atoms ( system ); +#endif Write_Atoms( system, control, out_control, mpi_data ); } - //CUDA Wrappers here if( out_control->write_bonds ) { //sync bonds here +#ifdef HAVE_CUDA Output_Sync_Lists ((*lists + BONDS), (*dev_lists + BONDS), TYP_BOND); +#endif Write_Bonds( system, control, (*lists + BONDS), out_control, mpi_data ); } - //CUDA Wrappers here if( out_control->write_angles ) { //sync three body interactions here +#ifdef HAVE_CUDA Output_Sync_Lists ((*lists + THREE_BODIES), (*dev_lists + THREE_BODIES), TYP_THREE_BODY); +#endif Write_Angles( system, control, (*lists + BONDS), (*lists + THREE_BODIES), out_control, mpi_data ); } diff --git a/puremd_rc_1003/PG-PuReMD/src/validation.cu b/puremd_rc_1003/PG-PuReMD/src/validation.cu index 58bf11ab..694c5db3 100644 --- a/puremd_rc_1003/PG-PuReMD/src/validation.cu +++ b/puremd_rc_1003/PG-PuReMD/src/validation.cu @@ -2,7 +2,7 @@ #include "validation.h" #include "cuda_utils.h" #include "list.h" -#include "cuda_reax_constants.h" +#include "reax_types.h" #include "index_utils.h" #include "vector.h" diff --git a/puremd_rc_1003/PG-PuReMD/src/vector.h b/puremd_rc_1003/PG-PuReMD/src/vector.h index 12878979..bf08deba 100644 --- a/puremd_rc_1003/PG-PuReMD/src/vector.h +++ b/puremd_rc_1003/PG-PuReMD/src/vector.h @@ -23,8 +23,6 @@ #define __VECTOR_H_ #include "reax_types.h" - -#include "cuda_reax_constants.h" #include "random.h" #ifdef __cplusplus -- GitLab