From 453a4c174ca50082fcf79d12b93926f3924e9517 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnku@msu.edu>
Date: Tue, 6 Dec 2016 09:56:17 -0500
Subject: [PATCH] PuReMD-GPU: fix global CUDA libary variable linkage.

---
 PuReMD-GPU/Makefile.am             |  2 +-
 PuReMD-GPU/configure.ac            | 54 ++++++++++++++++--------------
 PuReMD-GPU/src/cuda_environment.cu |  8 +++--
 PuReMD-GPU/src/cuda_lin_alg.cu     |  3 +-
 PuReMD-GPU/src/cuda_utils.cu       |  6 ++++
 PuReMD-GPU/src/cuda_utils.h        |  6 ++++
 PuReMD-GPU/src/mytypes.h           |  9 +++--
 PuReMD-GPU/src/testmd.c            |  3 +-
 8 files changed, 53 insertions(+), 38 deletions(-)

diff --git a/PuReMD-GPU/Makefile.am b/PuReMD-GPU/Makefile.am
index 016114db..3ab7bdba 100644
--- a/PuReMD-GPU/Makefile.am
+++ b/PuReMD-GPU/Makefile.am
@@ -3,7 +3,7 @@ ACLOCAL_AMFLAGS = -I ../m4
 SUFFIXES = .cu
 include ../cuda.am
 
-AM_CFLAGS = -Wall -O3 -funroll-loops -fstrict-aliasing
+AM_CFLAGS = -Wall -O3 -funroll-loops -fstrict-aliasing -m64
 AM_CPPFLAGS =
 AM_LDFLAGS =
 
diff --git a/PuReMD-GPU/configure.ac b/PuReMD-GPU/configure.ac
index 30e7e0bf..38c7cf73 100644
--- a/PuReMD-GPU/configure.ac
+++ b/PuReMD-GPU/configure.ac
@@ -65,33 +65,35 @@ then
 fi
 AC_DEFINE([HAVE_CUDA], [1], [Define to 1 if you have CUDA support enabled.])
 
-AC_SEARCH_LIBS([cublasDnrm2], [cublas])
-AC_SEARCH_LIBS([cublasDaxpy], [cublas])
-AC_SEARCH_LIBS([cublasDscal], [cublas])
-AC_SEARCH_LIBS([cublasDdot], [cublas])
-AC_SEARCH_LIBS([cudaThreadSynchronize], [cudart])
-AC_SEARCH_LIBS([cudaGetLastError], [cudart])
+AC_CHECK_LIB([cuda], [cuGetErrorString])
 AC_CHECK_LIB([cudart], [cudaMalloc])
-AC_SEARCH_LIBS([cusparseCreateMatDescr], [cusparse])
-AC_SEARCH_LIBS([cusparseSetMatType], [cusparse])
-AC_SEARCH_LIBS([cusparseSetMatIndexBase], [cusparse])
-
-AC_SEARCH_LIBS([cublasDnrm2], [cublas],
-	[CUBLAS_FOUND_LIBS="yes"], [CUBLAS_FOUND_LIBS="no"], [-lcublas])
-AS_IF([test "x${CUBLAS_FOUND_LIBS}" != "xyes"],
-	[AC_MSG_ERROR([Unable to find CUBLAS library.])])
-
-AC_SEARCH_LIBS([cusparseSetMatType], [cusparse],
-	[CUSPARSE_FOUND_LIBS="yes"], [CUSPARSE_FOUND_LIBS="no"], [-lcusparse])
-AS_IF([test "x${CUSPARSE_FOUND_LIBS}" != "xyes"],
-	[AC_MSG_ERROR([Unable to find CUSPARSE library.])])
-
-AC_CHECK_TYPES([cublasHandle_t], [], 
-	       [AC_MSG_FAILURE([cublasHandle_t type not found in cublas.h], [1])], [#include<cublas_v2.h>])
-AC_CHECK_TYPES([cusparseHandle_t], [], 
-	       [AC_MSG_FAILURE([cusparseHandle_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>])
-AC_CHECK_TYPES([cusparseMatDescr_t], [], 
-	       [AC_MSG_FAILURE([cusparseMatDescr_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>])
+AC_CHECK_LIB([cublas], [cublasDnrm2])
+AC_CHECK_LIB([cusparse], [cusparseCreateMatDescr])
+#AC_SEARCH_LIBS([cublasDaxpy], [cublas])
+#AC_SEARCH_LIBS([cublasDscal], [cublas])
+#AC_SEARCH_LIBS([cublasDdot], [cublas])
+#AC_SEARCH_LIBS([cudaThreadSynchronize], [cudart])
+#AC_SEARCH_LIBS([cudaGetLastError], [cudart])
+#AC_SEARCH_LIBS([cusparseCreateMatDescr], [cusparse])
+#AC_SEARCH_LIBS([cusparseSetMatType], [cusparse])
+#AC_SEARCH_LIBS([cusparseSetMatIndexBase], [cusparse])
+#
+#AC_SEARCH_LIBS([cublasDnrm2], [cublas],
+#	[CUBLAS_FOUND_LIBS="yes"], [CUBLAS_FOUND_LIBS="no"], [-lcublas])
+#AS_IF([test "x${CUBLAS_FOUND_LIBS}" != "xyes"],
+#	[AC_MSG_ERROR([Unable to find CUBLAS library.])])
+#
+#AC_SEARCH_LIBS([cusparseSetMatType], [cusparse],
+#	[CUSPARSE_FOUND_LIBS="yes"], [CUSPARSE_FOUND_LIBS="no"], [-lcusparse])
+#AS_IF([test "x${CUSPARSE_FOUND_LIBS}" != "xyes"],
+#	[AC_MSG_ERROR([Unable to find CUSPARSE library.])])
+#
+#AC_CHECK_TYPES([cublasHandle_t], [], 
+#	       [AC_MSG_FAILURE([cublasHandle_t type not found in cublas.h], [1])], [#include<cublas_v2.h>])
+#AC_CHECK_TYPES([cusparseHandle_t], [], 
+#	       [AC_MSG_FAILURE([cusparseHandle_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>])
+#AC_CHECK_TYPES([cusparseMatDescr_t], [], 
+#	       [AC_MSG_FAILURE([cusparseMatDescr_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>])
 
 if test "BUILD_PROF" = "true"
 then
diff --git a/PuReMD-GPU/src/cuda_environment.cu b/PuReMD-GPU/src/cuda_environment.cu
index cd6ae50d..caae5168 100644
--- a/PuReMD-GPU/src/cuda_environment.cu
+++ b/PuReMD-GPU/src/cuda_environment.cu
@@ -28,9 +28,6 @@ void Setup_Cuda_Environment( int rank, int nprocs, int gpus_per_node )
 
     int deviceCount = 0;
     cudaError_t flag;
-    cublasHandle_t cublasHandle;
-    cusparseHandle_t cusparseHandle;
-    cusparseMatDescr_t matdescriptor;
     
     flag = cudaGetDeviceCount( &deviceCount );
 
@@ -44,6 +41,11 @@ void Setup_Cuda_Environment( int rank, int nprocs, int gpus_per_node )
     //and assign the GPU for each process
     //TODO: handle condition where # CPU procs > # GPUs
     cudaSetDevice( rank % deviceCount );
+    cudaCheckError( );
+
+    /* reset device and clear previous allocations */
+    Cleanup_Cuda_Environment( );    
+    cudaCheckError( );
 
 #if defined(__CUDA_DEBUG__)
     fprintf( stderr, "p:%d is using GPU: %d \n", rank, rank % deviceCount );
diff --git a/PuReMD-GPU/src/cuda_lin_alg.cu b/PuReMD-GPU/src/cuda_lin_alg.cu
index 5dc9eb35..9f6cd459 100644
--- a/PuReMD-GPU/src/cuda_lin_alg.cu
+++ b/PuReMD-GPU/src/cuda_lin_alg.cu
@@ -364,7 +364,6 @@ int Cublas_GMRES(reax_system *system, static_storage *workspace, real *b, real t
     real t_start, t_elapsed;
     real *spad = (real *)scratch;
     real *g = (real *) calloc ((RESTART+1), REAL_SIZE);
-    cublasHandle_t cublasHandle;
 
     N = H->n;
 
@@ -382,7 +381,7 @@ int Cublas_GMRES(reax_system *system, static_storage *workspace, real *b, real t
        copy_host_device ( &bnorm, spad + BLOCKS_POW_2, REAL_SIZE, cudaMemcpyDeviceToHost, __LINE__);
      */
 
-    cublasCheckError (cublasDnrm2 ( cublasHandle, N, b, 1, &bnorm ));
+    cublasCheckError( cublasDnrm2( cublasHandle, N, b, 1, &bnorm ) );
 
 #ifdef __DEBUG_CUDA__
     fprintf (stderr, "Norm of the array is %e \n", bnorm );
diff --git a/PuReMD-GPU/src/cuda_utils.cu b/PuReMD-GPU/src/cuda_utils.cu
index c420db76..1efcf28a 100644
--- a/PuReMD-GPU/src/cuda_utils.cu
+++ b/PuReMD-GPU/src/cuda_utils.cu
@@ -20,6 +20,12 @@
 
 #include "cuda_utils.h"
 
+cublasStatus_t cublasStatus;
+cublasHandle_t cublasHandle;
+cusparseHandle_t cusparseHandle;
+cusparseStatus_t cusparseStatus;
+cusparseMatDescr_t matdescriptor;
+
 
 void cuda_malloc( void **ptr, int size, int memset, int err_code )
 {
diff --git a/PuReMD-GPU/src/cuda_utils.h b/PuReMD-GPU/src/cuda_utils.h
index c8976d08..bea6d3b2 100644
--- a/PuReMD-GPU/src/cuda_utils.h
+++ b/PuReMD-GPU/src/cuda_utils.h
@@ -28,6 +28,12 @@
 
 #define IDX2C(i,j,ld) (((j)*(ld))+(i))
 
+extern cublasStatus_t cublasStatus;
+extern cublasHandle_t cublasHandle;
+extern cusparseHandle_t cusparseHandle;
+extern cusparseStatus_t cusparseStatus;
+extern cusparseMatDescr_t matdescriptor;
+
 
 #ifdef __cplusplus
 extern "C"  {
diff --git a/PuReMD-GPU/src/mytypes.h b/PuReMD-GPU/src/mytypes.h
index 273f9597..0eb1856a 100644
--- a/PuReMD-GPU/src/mytypes.h
+++ b/PuReMD-GPU/src/mytypes.h
@@ -1118,13 +1118,13 @@ typedef struct
 
 
 typedef void (*interaction_function)(reax_system*, control_params*,
-                                     simulation_data*, static_storage*,
-                                     list**, output_controls*);
+        simulation_data*, static_storage*, list**, output_controls*);
+
 extern interaction_function Interaction_Functions[NO_OF_INTERACTIONS];
 
 typedef void (*evolve_function)(reax_system*, control_params*,
-                                simulation_data*, static_storage*,
-                                list**, output_controls*);
+        simulation_data*, static_storage*,
+        list**, output_controls*);
 
 typedef real (*lookup_function)(real);
 extern lookup_table Exp, Sqrt, Cube_Root, Four_Third_Root, Cos, Sin, ACos;
@@ -1136,7 +1136,6 @@ typedef void (*get_far_neighbors_function)(rvec, rvec, simulation_box*,
         control_params*, far_neighbor_data*,
         int*);
 
-
 /* CUDA structures */
 extern list *dev_lists;
 extern static_storage *dev_workspace;
diff --git a/PuReMD-GPU/src/testmd.c b/PuReMD-GPU/src/testmd.c
index afe0cd4a..57d8859d 100644
--- a/PuReMD-GPU/src/testmd.c
+++ b/PuReMD-GPU/src/testmd.c
@@ -315,8 +315,9 @@ int main( int argc, char* argv[] )
             fprintf (stderr, " Results does not match between Device and host @ step --> %d \n", data.step);
             exit (1);
         }
-#endif
+
         fprintf (stderr, "step -> %d <- done. \n", data.step);
+#endif
     }
 
     if( out_control.write_steps > 0 ) { 
-- 
GitLab