Skip to content
Snippets Groups Projects
Commit ca5e3d11 authored by Kurt A. O'Hearn's avatar Kurt A. O'Hearn
Browse files

PG-PuReMD: fix build system linker argument ordering (LDFLAGS -> LDADD). Fix...

PG-PuReMD: fix build system linker argument ordering (LDFLAGS -> LDADD). Fix inline functions. Disable optimized CUDA code generation by default (added Makefile flags to enable along with preprocessor defs to enable relevant code paths).
parent 26d2e66d
No related branches found
No related tags found
No related merge requests found
......@@ -5,19 +5,13 @@ SUFFIXES = .cu
include ../cuda.am
endif
AM_CFLAGS = -Wall -O3 -funroll-loops -fstrict-aliasing $(MPI_CFLAGS)
AM_CPPFLAGS =
AM_LDFLAGS = $(MPI_LDFLAGS)
if USE_CUDA
# default CUDA nvcc flags
# Note: cc 13 for Tesla
# Note: cc 20 for Fermi
# Note: cc 30 for Kepler K10
# Note: cc 35 for Kepler K20
NVCCFLAGS += -use_fast_math
NVCCFLAGS += -gencode arch=compute_35,code=sm_35
NVCCFLAGS += --compiler-options "$(DEFS) -D__SM_35__ -O3 -funroll-loops -fstrict-aliasing $(MPI_CFLAGS)"
# flags for CUDA compilation via NVCC (see cuda.am)
# Note: cc 13 for Tesla, cc 20 for Fermi, cc 30 for Kepler K10, cc 35 for Kepler K20/K40, etc.
#NVCCFLAGS += -use_fast_math -gencode arch=compute_35,code=sm_35
NVCCFLAGS += -use_fast_math $(NVCC_OPT_CODE)
#NVCCFLAGS += --compiler-options "$(DEFS) -D__SM_35__ -O3 -funroll-loops -fstrict-aliasing $(MPI_CFLAGS)"
NVCCFLAGS += --compiler-options "$(DEFS) $(NVCC_OPT_CODE_DEFS) -O3 -funroll-loops -fstrict-aliasing $(MPI_CFLAGS)"
#NVCCFLAGS += --ptxas-options -v
endif
......@@ -66,11 +60,11 @@ nodist_EXTRA_bin_pg_puremd_SOURCES = src/dummy.c
endif
bin_pg_puremd_CFLAGS = $(AM_CFLAGS) $(CFLAGS)
bin_pg_puremd_CFLAGS = $(AM_CFLAGS) -Wall -O3 -funroll-loops -fstrict-aliasing $(MPI_CFLAGS) $(CFLAGS)
bin_pg_puremd_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS)
bin_pg_puremd_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS)
bin_pg_puremd_LDADD = $(AM_LDADD) $(MPI_LIBS) $(LDADD) -lstdc++
if USE_CUDA
bin_pg_puremd_CFLAGS += $(CUDA_CFLAGS)
bin_pg_puremd_LDFLAGS += $(CUDA_LIBS)
bin_pg_puremd_LDADD += $(CUDA_LIBS)
endif
......@@ -68,17 +68,17 @@ CC="$sav_CC"
CFLAGS="$sav_CFLAGS"
#
# try to set MPI_CFLAGS and MPI_LDFLAGS
# try to set MPI_CFLAGS and MPI_LIBS
#
MPI_CFLAGS=
MPI_LDFLAGS=
MPI_LIBS=
if test "$mpi_vendor" = "OpenMPI"
then
MPI_CFLAGS=`$MPICC --showme:compile`
MPI_LDFLAGS=`$MPICC --showme:link`
MPI_LIBS=`$MPICC --showme:link`
AC_MSG_NOTICE([OpenMPI found])
AC_MSG_NOTICE([MPI_CFLAGS=$MPI_CFLAGS])
AC_MSG_NOTICE([MPI_LDFLAGS=$MPI_LDFLAGS])
AC_MSG_NOTICE([MPI_LIBS=$MPI_LIBS])
elif test "$mpi_vendor" = "MPICH"
then
# build MPI_CFLAGS
......@@ -92,24 +92,24 @@ then
;;
esac
done
# build MPI_LDFLAGS
# build MPI_LIBS
tmp=`$MPICC -link-info | awk '{$1=""; print $0 }'`
for i in $tmp
do
case $i in
[[\\/]]*.a | ?:[[\\/]]*.a | -[[lLRu]]* | -Wl* )
MPI_LDFLAGS="$MPI_LDFLAGS $i"
MPI_LIBS="$MPI_LIBS $i"
;;
esac
done
AC_MSG_NOTICE([MPICH found])
AC_MSG_NOTICE([MPI_CFLAGS=$MPI_CFLAGS])
AC_MSG_NOTICE([MPI_LDFLAGS=$MPI_LDFLAGS])
AC_MSG_NOTICE([MPI_LIBS=$MPI_LIBS])
else
AC_MSG_WARN([Neither OpenMPI and MPICH have been recognized...])
fi
AC_SUBST(MPI_CFLAGS)
AC_SUBST(MPI_LDFLAGS)
AC_SUBST(MPI_LIBS)
# Check for CUDA support.
if test "x$BUILD_GPU" = "xyes"; then
......
......@@ -27,7 +27,7 @@ CUDA_DEVICE static inline int cuda_strcmp (char *a, char *b, int len)
}
CUDA_DEVICE static inline real atomicAdd(real* address, real val)
CUDA_DEVICE static inline real myatomicAdd(real* address, real val)
{
unsigned long long int* address_as_ull =
(unsigned long long int*)address;
......@@ -46,17 +46,17 @@ CUDA_DEVICE static inline real atomicAdd(real* address, real val)
CUDA_DEVICE static inline void atomic_rvecAdd( rvec ret, rvec v )
{
atomicAdd ( &ret[0], v[0] );
atomicAdd ( &ret[1], v[1] );
atomicAdd ( &ret[2], v[2] );
myatomicAdd ( &ret[0], v[0] );
myatomicAdd ( &ret[1], v[1] );
myatomicAdd ( &ret[2], v[2] );
}
CUDA_DEVICE static inline void atomic_rvecScaledAdd( rvec ret, real c, rvec v )
{
atomicAdd ( &ret[0], c * v[0] );
atomicAdd ( &ret[1], c * v[1] );
atomicAdd ( &ret[2], c * v[2] );
myatomicAdd ( &ret[0], c * v[0] );
myatomicAdd ( &ret[1], c * v[1] );
myatomicAdd ( &ret[2], c * v[2] );
}
#endif
......@@ -402,7 +402,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles( reax_atom *my_atoms,
pbond_jk->ta_CdDelta += CEtors3;
bo_ij->Cdbo += (CEtors4 + CEconj1);
bo_jk->Cdbo += (CEtors5 + CEconj2);
atomicAdd ( &pbond_kl->ta_Cdbo, (CEtors6 + CEconj3));
myatomicAdd ( &pbond_kl->ta_Cdbo, (CEtors6 + CEconj3));
if( control->virial == 0 ) {
/* dcos_theta_ijk */
......
......@@ -86,7 +86,8 @@ then
AC_MSG_RESULT([nvcc version : $NVCC_VERSION])
# test if architecture is 64 bits and NVCC version >= 2.3
libdir=lib
#libdir=lib #NOTE: was lib, but changed to lib64 for CUDA 8.0
libdir=lib64
if test "x$host_cpu" = xx86_64 ; then
if test "x$NVCC_VERSION" \> "x2.2" ; then
libdir=lib64
......@@ -215,23 +216,23 @@ then
NVCCFLAGS=" -deviceemu"
fi
#
AS_IF([test "x$want_cuda" = xyes],
[AS_IF([test "x$NVCCFLAGS" = x],
[dnl generate CUDA code for broad spectrum of devices
dnl Note: cc 13 for Tesla
dnl Note: cc 20 for Fermi
dnl Note: cc 30 for Kepler K10
dnl Note: cc 35 for Kepler K20
NVCCFLAGS=["-gencode arch=compute_10,code=sm_10 \
-gencode arch=compute_11,code=sm_11 \
-gencode arch=compute_13,code=sm_13 \
-gencode arch=compute_20,code=sm_20 \
-gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35"]
]
)
]
)
#AS_IF([test "x$want_cuda" = xyes],
# [AS_IF([test "x$NVCCFLAGS" = x],
# [dnl generate CUDA code for broad spectrum of devices
# dnl Note: cc 13 for Tesla
# dnl Note: cc 20 for Fermi
# dnl Note: cc 30 for Kepler K10
# dnl Note: cc 35 for Kepler K20
# NVCCFLAGS=["-gencode arch=compute_10,code=sm_10 \
# -gencode arch=compute_11,code=sm_11 \
# -gencode arch=compute_13,code=sm_13 \
# -gencode arch=compute_20,code=sm_20 \
# -gencode arch=compute_30,code=sm_30 \
# -gencode arch=compute_35,code=sm_35"]
# ]
# )
# ]
# )
if test x$want_fast_math = xyes
then
NVCCFLAGS+=" -use_fast_math"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment