From 52199731a59f309e4fbf2e8117c39f7483b1c1cf Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@msu.edu> Date: Fri, 1 Jul 2016 10:01:00 -0400 Subject: [PATCH] Update CUDA block calculations. --- PG-PuReMD/src/cuda_utils.cu | 58 ++++++++++++++++++++++++++----------- PG-PuReMD/src/reax_types.h | 40 +++++++++++++------------ 2 files changed, 63 insertions(+), 35 deletions(-) diff --git a/PG-PuReMD/src/cuda_utils.cu b/PG-PuReMD/src/cuda_utils.cu index dcd8d61f..1ab8705b 100644 --- a/PG-PuReMD/src/cuda_utils.cu +++ b/PG-PuReMD/src/cuda_utils.cu @@ -1,18 +1,23 @@ #include "cuda_utils.h" -extern "C" void cuda_malloc (void **ptr, int size, int memset, char *msg) { + +extern "C" void cuda_malloc (void **ptr, int size, int mem_set, char *msg) { cudaError_t retVal = cudaSuccess; retVal = cudaMalloc (ptr, size); - if (retVal != cudaSuccess) { + + if (retVal != cudaSuccess) + { fprintf (stderr, "Failed to allocate memory on device for the res: %s... exiting with code: %d size: %d \n", msg, retVal, size); exit (-1); } - if (memset) { + if (mem_set) + { retVal = cudaMemset (*ptr, 0, size); + if (retVal != cudaSuccess) { fprintf (stderr, "Failed to memset memory on device for resource %s\n", msg); @@ -21,50 +26,66 @@ extern "C" void cuda_malloc (void **ptr, int size, int memset, char *msg) { } } + extern "C" void cuda_free (void *ptr, char *msg) { cudaError_t retVal = cudaSuccess; - if (!ptr) return; + + if ( !ptr ) + { + return; + } retVal = cudaFree (ptr); - if (retVal != cudaSuccess) { + if (retVal != cudaSuccess) + { fprintf (stderr, "Failed to release memory on device for res %s... exiting with code %d -- Address %ld\n", msg, retVal, (long int) ptr); return; } } + extern "C" void cuda_memset (void *ptr, int data, size_t count, char *msg){ cudaError_t retVal = cudaSuccess; retVal = cudaMemset (ptr, data, count); - if (retVal != cudaSuccess) { + + if (retVal != cudaSuccess) + { fprintf (stderr, "Failed to memset memory on device for %s, cuda code %d\n", msg, retVal); exit (-1); } } + extern "C" void copy_host_device (void *host, void *dev, int size, enum cudaMemcpyKind dir, char *msg) { - cudaError_t retVal = cudaErrorNotReady; + cudaError_t retVal = cudaErrorNotReady; if (dir == cudaMemcpyHostToDevice) + { retVal = cudaMemcpy (dev, host, size, cudaMemcpyHostToDevice); + } else + { retVal = cudaMemcpy (host, dev, size, cudaMemcpyDeviceToHost); + } - if (retVal != cudaSuccess) { + if (retVal != cudaSuccess) + { fprintf (stderr, "could not copy resource %s from host to device: reason %d \n", msg, retVal); exit (-1); } } + extern "C" void copy_device (void *dest, void *src, int size, char *msg) { - cudaError_t retVal = cudaErrorNotReady; + cudaError_t retVal = cudaErrorNotReady; retVal = cudaMemcpy (dest, src, size, cudaMemcpyDeviceToDevice); if (retVal != cudaSuccess) { @@ -74,31 +95,33 @@ extern "C" void copy_device (void *dest, void *src, int size, char *msg) } } + extern "C" void compute_blocks ( int *blocks, int *block_size, int count ) { *block_size = CUDA_BLOCK_SIZE; - *blocks = (count / CUDA_BLOCK_SIZE ) + (count % CUDA_BLOCK_SIZE == 0 ? 0 : 1); + *blocks = (int) CEIL((double) count / CUDA_BLOCK_SIZE); } + extern "C" void compute_matvec_blocks ( int *blocks, int count ) { - *blocks = ((count * MATVEC_KER_THREADS_PER_ROW) / MATVEC_BLOCK_SIZE) + - (((count * MATVEC_KER_THREADS_PER_ROW) % MATVEC_BLOCK_SIZE) == 0 ? 0 : 1); + *blocks = (int) CEIL((double) count * MATVEC_KER_THREADS_PER_ROW / MATVEC_BLOCK_SIZE); } + extern "C" void compute_nearest_pow_2 (int blocks, int *result) { - int power = 1; - while (power < blocks) power *= 2; - - *result = power; + *result = (int) EXP2( CEIL( LOG2((double) blocks) ) ); } + void print_info () { size_t total, free; + cudaMemGetInfo (&free, &total); - if (cudaGetLastError () != cudaSuccess ) + + if ( cudaGetLastError () != cudaSuccess ) { fprintf (stderr, "Error on the memory call \n"); return; @@ -109,6 +132,7 @@ void print_info () free, free/(1024*1024), free/ (1024*1024*1024) ); } + extern "C" void print_device_mem_usage () { print_info (); diff --git a/PG-PuReMD/src/reax_types.h b/PG-PuReMD/src/reax_types.h index d27c5539..504d8c21 100644 --- a/PG-PuReMD/src/reax_types.h +++ b/PG-PuReMD/src/reax_types.h @@ -85,12 +85,16 @@ #define FALSE 0 #define EXP exp +#define EXP2 exp2 +#define LOG2 log2 #define SQRT sqrt #define POW pow #define ACOS acos #define COS cos #define SIN sin #define TAN tan +#define CEIL ceil +#define FLOOR floor #define SQR(x) ((x)*(x)) #define CUBE(x) ((x)*(x)*(x)) @@ -239,24 +243,24 @@ /******************* ENUMERATIONS *************************/ -enum geo_formats { CUSTOM, PDB, ASCII_RESTART, BINARY_RESTART, GF_N }; - -enum restart_formats { WRITE_ASCII, WRITE_BINARY, RF_N }; - -enum ensembles { NVE, bNVT, nhNVT, sNPT, iNPT, NPT, ens_N }; - -enum lists { BONDS, OLD_BONDS, THREE_BODIES, - HBONDS, FAR_NBRS, DBOS, DDELTAS, LIST_N +enum geo_formats { CUSTOM = 0, PDB = 1, ASCII_RESTART = 2, BINARY_RESTART = 3, GF_N = 4 }; + +enum restart_formats { WRITE_ASCII = 0, WRITE_BINARY = 1, RF_N = 2 }; + +enum ensembles { NVE = 0, bNVT = 1, nhNVT = 2, sNPT = 3, iNPT = 4, NPT = 5, ens_N = 6 }; + +enum lists { BONDS = 0, OLD_BONDS = 1, THREE_BODIES = 2, + HBONDS = 3, FAR_NBRS = 4, DBOS = 5, DDELTAS = 6, LIST_N = 7 }; -enum interactions { TYP_VOID, TYP_BOND, TYP_THREE_BODY, - TYP_HBOND, TYP_FAR_NEIGHBOR, TYP_DBO, TYP_DDELTA, TYP_N +enum interactions { TYP_VOID = 0, TYP_BOND = 1, TYP_THREE_BODY = 2, + TYP_HBOND = 3, TYP_FAR_NEIGHBOR = 4, TYP_DBO = 5, TYP_DDELTA = 6, TYP_N = 7 }; -enum message_tags { INIT, UPDATE, BNDRY, UPDATE_BNDRY, - EXC_VEC1, EXC_VEC2, DIST_RVEC2, COLL_RVEC2, - DIST_RVECS, COLL_RVECS, INIT_DESCS, ATOM_LINES, - BOND_LINES, ANGLE_LINES, RESTART_ATOMS, TAGS_N +enum message_tags { INIT = 0, UPDATE = 1, BNDRY = 2, UPDATE_BNDRY = 3, + EXC_VEC1 = 4, EXC_VEC2 = 5, DIST_RVEC2 = 6, COLL_RVEC2 = 7, + DIST_RVECS = 8, COLL_RVECS = 9, INIT_DESCS = 10, ATOM_LINES = 11, + BOND_LINES = 12, ANGLE_LINES = 13, RESTART_ATOMS = 14, TAGS_N = 15 }; enum errors { FILE_NOT_FOUND = -10, UNKNOWN_ATOM_TYPE = -11, @@ -265,7 +269,7 @@ enum errors { FILE_NOT_FOUND = -10, UNKNOWN_ATOM_TYPE = -11, INVALID_INPUT = -16, INVALID_GEO = -17 }; -enum exchanges { NONE, NEAR_EXCH, FULL_EXCH }; +enum exchanges { NONE = 0, NEAR_EXCH = 1, FULL_EXCH = 2 }; enum gcell_types { NO_NBRS = 0, NEAR_ONLY = 1, HBOND_ONLY = 2, FAR_ONLY = 4, NEAR_HBOND = 3, NEAR_FAR = 5, HBOND_FAR = 6, FULL_NBRS = 7, @@ -276,11 +280,11 @@ enum atoms { C_ATOM = 0, H_ATOM = 1, O_ATOM = 2, N_ATOM = 3, S_ATOM = 4, SI_ATOM = 5, GE_ATOM = 6, X_ATOM = 7 }; -enum traj_methods { REG_TRAJ, MPI_TRAJ, TF_N }; +enum traj_methods { REG_TRAJ = 0, MPI_TRAJ = 1, TF_N = 2 }; -enum molecules { UNKNOWN, WATER }; +enum molecules { UNKNOWN = 0, WATER = 1 }; -enum list_on { TYP_HOST, TYP_DEVICE }; +enum list_on { TYP_HOST = 0, TYP_DEVICE = 1 }; -- GitLab