From 453a4c174ca50082fcf79d12b93926f3924e9517 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@msu.edu> Date: Tue, 6 Dec 2016 09:56:17 -0500 Subject: [PATCH] PuReMD-GPU: fix global CUDA libary variable linkage. --- PuReMD-GPU/Makefile.am | 2 +- PuReMD-GPU/configure.ac | 54 ++++++++++++++++-------------- PuReMD-GPU/src/cuda_environment.cu | 8 +++-- PuReMD-GPU/src/cuda_lin_alg.cu | 3 +- PuReMD-GPU/src/cuda_utils.cu | 6 ++++ PuReMD-GPU/src/cuda_utils.h | 6 ++++ PuReMD-GPU/src/mytypes.h | 9 +++-- PuReMD-GPU/src/testmd.c | 3 +- 8 files changed, 53 insertions(+), 38 deletions(-) diff --git a/PuReMD-GPU/Makefile.am b/PuReMD-GPU/Makefile.am index 016114db..3ab7bdba 100644 --- a/PuReMD-GPU/Makefile.am +++ b/PuReMD-GPU/Makefile.am @@ -3,7 +3,7 @@ ACLOCAL_AMFLAGS = -I ../m4 SUFFIXES = .cu include ../cuda.am -AM_CFLAGS = -Wall -O3 -funroll-loops -fstrict-aliasing +AM_CFLAGS = -Wall -O3 -funroll-loops -fstrict-aliasing -m64 AM_CPPFLAGS = AM_LDFLAGS = diff --git a/PuReMD-GPU/configure.ac b/PuReMD-GPU/configure.ac index 30e7e0bf..38c7cf73 100644 --- a/PuReMD-GPU/configure.ac +++ b/PuReMD-GPU/configure.ac @@ -65,33 +65,35 @@ then fi AC_DEFINE([HAVE_CUDA], [1], [Define to 1 if you have CUDA support enabled.]) -AC_SEARCH_LIBS([cublasDnrm2], [cublas]) -AC_SEARCH_LIBS([cublasDaxpy], [cublas]) -AC_SEARCH_LIBS([cublasDscal], [cublas]) -AC_SEARCH_LIBS([cublasDdot], [cublas]) -AC_SEARCH_LIBS([cudaThreadSynchronize], [cudart]) -AC_SEARCH_LIBS([cudaGetLastError], [cudart]) +AC_CHECK_LIB([cuda], [cuGetErrorString]) AC_CHECK_LIB([cudart], [cudaMalloc]) -AC_SEARCH_LIBS([cusparseCreateMatDescr], [cusparse]) -AC_SEARCH_LIBS([cusparseSetMatType], [cusparse]) -AC_SEARCH_LIBS([cusparseSetMatIndexBase], [cusparse]) - -AC_SEARCH_LIBS([cublasDnrm2], [cublas], - [CUBLAS_FOUND_LIBS="yes"], [CUBLAS_FOUND_LIBS="no"], [-lcublas]) -AS_IF([test "x${CUBLAS_FOUND_LIBS}" != "xyes"], - [AC_MSG_ERROR([Unable to find CUBLAS library.])]) - -AC_SEARCH_LIBS([cusparseSetMatType], [cusparse], - [CUSPARSE_FOUND_LIBS="yes"], [CUSPARSE_FOUND_LIBS="no"], [-lcusparse]) -AS_IF([test "x${CUSPARSE_FOUND_LIBS}" != "xyes"], - [AC_MSG_ERROR([Unable to find CUSPARSE library.])]) - -AC_CHECK_TYPES([cublasHandle_t], [], - [AC_MSG_FAILURE([cublasHandle_t type not found in cublas.h], [1])], [#include<cublas_v2.h>]) -AC_CHECK_TYPES([cusparseHandle_t], [], - [AC_MSG_FAILURE([cusparseHandle_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) -AC_CHECK_TYPES([cusparseMatDescr_t], [], - [AC_MSG_FAILURE([cusparseMatDescr_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) +AC_CHECK_LIB([cublas], [cublasDnrm2]) +AC_CHECK_LIB([cusparse], [cusparseCreateMatDescr]) +#AC_SEARCH_LIBS([cublasDaxpy], [cublas]) +#AC_SEARCH_LIBS([cublasDscal], [cublas]) +#AC_SEARCH_LIBS([cublasDdot], [cublas]) +#AC_SEARCH_LIBS([cudaThreadSynchronize], [cudart]) +#AC_SEARCH_LIBS([cudaGetLastError], [cudart]) +#AC_SEARCH_LIBS([cusparseCreateMatDescr], [cusparse]) +#AC_SEARCH_LIBS([cusparseSetMatType], [cusparse]) +#AC_SEARCH_LIBS([cusparseSetMatIndexBase], [cusparse]) +# +#AC_SEARCH_LIBS([cublasDnrm2], [cublas], +# [CUBLAS_FOUND_LIBS="yes"], [CUBLAS_FOUND_LIBS="no"], [-lcublas]) +#AS_IF([test "x${CUBLAS_FOUND_LIBS}" != "xyes"], +# [AC_MSG_ERROR([Unable to find CUBLAS library.])]) +# +#AC_SEARCH_LIBS([cusparseSetMatType], [cusparse], +# [CUSPARSE_FOUND_LIBS="yes"], [CUSPARSE_FOUND_LIBS="no"], [-lcusparse]) +#AS_IF([test "x${CUSPARSE_FOUND_LIBS}" != "xyes"], +# [AC_MSG_ERROR([Unable to find CUSPARSE library.])]) +# +#AC_CHECK_TYPES([cublasHandle_t], [], +# [AC_MSG_FAILURE([cublasHandle_t type not found in cublas.h], [1])], [#include<cublas_v2.h>]) +#AC_CHECK_TYPES([cusparseHandle_t], [], +# [AC_MSG_FAILURE([cusparseHandle_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) +#AC_CHECK_TYPES([cusparseMatDescr_t], [], +# [AC_MSG_FAILURE([cusparseMatDescr_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) if test "BUILD_PROF" = "true" then diff --git a/PuReMD-GPU/src/cuda_environment.cu b/PuReMD-GPU/src/cuda_environment.cu index cd6ae50d..caae5168 100644 --- a/PuReMD-GPU/src/cuda_environment.cu +++ b/PuReMD-GPU/src/cuda_environment.cu @@ -28,9 +28,6 @@ void Setup_Cuda_Environment( int rank, int nprocs, int gpus_per_node ) int deviceCount = 0; cudaError_t flag; - cublasHandle_t cublasHandle; - cusparseHandle_t cusparseHandle; - cusparseMatDescr_t matdescriptor; flag = cudaGetDeviceCount( &deviceCount ); @@ -44,6 +41,11 @@ void Setup_Cuda_Environment( int rank, int nprocs, int gpus_per_node ) //and assign the GPU for each process //TODO: handle condition where # CPU procs > # GPUs cudaSetDevice( rank % deviceCount ); + cudaCheckError( ); + + /* reset device and clear previous allocations */ + Cleanup_Cuda_Environment( ); + cudaCheckError( ); #if defined(__CUDA_DEBUG__) fprintf( stderr, "p:%d is using GPU: %d \n", rank, rank % deviceCount ); diff --git a/PuReMD-GPU/src/cuda_lin_alg.cu b/PuReMD-GPU/src/cuda_lin_alg.cu index 5dc9eb35..9f6cd459 100644 --- a/PuReMD-GPU/src/cuda_lin_alg.cu +++ b/PuReMD-GPU/src/cuda_lin_alg.cu @@ -364,7 +364,6 @@ int Cublas_GMRES(reax_system *system, static_storage *workspace, real *b, real t real t_start, t_elapsed; real *spad = (real *)scratch; real *g = (real *) calloc ((RESTART+1), REAL_SIZE); - cublasHandle_t cublasHandle; N = H->n; @@ -382,7 +381,7 @@ int Cublas_GMRES(reax_system *system, static_storage *workspace, real *b, real t copy_host_device ( &bnorm, spad + BLOCKS_POW_2, REAL_SIZE, cudaMemcpyDeviceToHost, __LINE__); */ - cublasCheckError (cublasDnrm2 ( cublasHandle, N, b, 1, &bnorm )); + cublasCheckError( cublasDnrm2( cublasHandle, N, b, 1, &bnorm ) ); #ifdef __DEBUG_CUDA__ fprintf (stderr, "Norm of the array is %e \n", bnorm ); diff --git a/PuReMD-GPU/src/cuda_utils.cu b/PuReMD-GPU/src/cuda_utils.cu index c420db76..1efcf28a 100644 --- a/PuReMD-GPU/src/cuda_utils.cu +++ b/PuReMD-GPU/src/cuda_utils.cu @@ -20,6 +20,12 @@ #include "cuda_utils.h" +cublasStatus_t cublasStatus; +cublasHandle_t cublasHandle; +cusparseHandle_t cusparseHandle; +cusparseStatus_t cusparseStatus; +cusparseMatDescr_t matdescriptor; + void cuda_malloc( void **ptr, int size, int memset, int err_code ) { diff --git a/PuReMD-GPU/src/cuda_utils.h b/PuReMD-GPU/src/cuda_utils.h index c8976d08..bea6d3b2 100644 --- a/PuReMD-GPU/src/cuda_utils.h +++ b/PuReMD-GPU/src/cuda_utils.h @@ -28,6 +28,12 @@ #define IDX2C(i,j,ld) (((j)*(ld))+(i)) +extern cublasStatus_t cublasStatus; +extern cublasHandle_t cublasHandle; +extern cusparseHandle_t cusparseHandle; +extern cusparseStatus_t cusparseStatus; +extern cusparseMatDescr_t matdescriptor; + #ifdef __cplusplus extern "C" { diff --git a/PuReMD-GPU/src/mytypes.h b/PuReMD-GPU/src/mytypes.h index 273f9597..0eb1856a 100644 --- a/PuReMD-GPU/src/mytypes.h +++ b/PuReMD-GPU/src/mytypes.h @@ -1118,13 +1118,13 @@ typedef struct typedef void (*interaction_function)(reax_system*, control_params*, - simulation_data*, static_storage*, - list**, output_controls*); + simulation_data*, static_storage*, list**, output_controls*); + extern interaction_function Interaction_Functions[NO_OF_INTERACTIONS]; typedef void (*evolve_function)(reax_system*, control_params*, - simulation_data*, static_storage*, - list**, output_controls*); + simulation_data*, static_storage*, + list**, output_controls*); typedef real (*lookup_function)(real); extern lookup_table Exp, Sqrt, Cube_Root, Four_Third_Root, Cos, Sin, ACos; @@ -1136,7 +1136,6 @@ typedef void (*get_far_neighbors_function)(rvec, rvec, simulation_box*, control_params*, far_neighbor_data*, int*); - /* CUDA structures */ extern list *dev_lists; extern static_storage *dev_workspace; diff --git a/PuReMD-GPU/src/testmd.c b/PuReMD-GPU/src/testmd.c index afe0cd4a..57d8859d 100644 --- a/PuReMD-GPU/src/testmd.c +++ b/PuReMD-GPU/src/testmd.c @@ -315,8 +315,9 @@ int main( int argc, char* argv[] ) fprintf (stderr, " Results does not match between Device and host @ step --> %d \n", data.step); exit (1); } -#endif + fprintf (stderr, "step -> %d <- done. \n", data.step); +#endif } if( out_control.write_steps > 0 ) { -- GitLab