Skip to content
Snippets Groups Projects
Commit 9e484466 authored by Kurt A. O'Hearn's avatar Kurt A. O'Hearn
Browse files

Update .gitignore (restart files). Add missing updates to m4 files (CUDA).

parent ca9977e8
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
*.prs *.prs
*.pot *.pot
*.trj *.trj
*.res*
# TeX # TeX
*.aux *.aux
......
...@@ -11,4 +11,4 @@ AM_V_NVCC_0 = @echo " NVCC " $@; ...@@ -11,4 +11,4 @@ AM_V_NVCC_0 = @echo " NVCC " $@;
AM_V_NVCC_1 = AM_V_NVCC_1 =
.cu.o: .cu.o:
$(AM_V_NVCC)$(NVCC) $(NVCCFLAGS) -o $@ -c $< $(AM_V_NVCC)$(NVCC) $(AM_NVCCFLAGS) $(NVCCFLAGS) -o $@ -c $<
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
# AC_SUBST(CUDA_CFLAGS) # AC_SUBST(CUDA_CFLAGS)
# AC_SUBST(CUDA_LIBS) # AC_SUBST(CUDA_LIBS)
# AC_SUBST(NVCC) # AC_SUBST(NVCC)
# AC_SUBST(NVCCFLAGS) # AC_SUBST(NFLAGS)
# #
AC_DEFUN([AX_CUDA], AC_DEFUN([AX_CUDA],
[ [
...@@ -61,6 +61,27 @@ AC_ARG_WITH([cuda], ...@@ -61,6 +61,27 @@ AC_ARG_WITH([cuda],
want_cuda="yes" want_cuda="yes"
]) ])
AC_ARG_ENABLE([cuda-fast-math],
AC_HELP_STRING([--enable-cuda-fast-math], [Turn on fast, less precise math functions in CUDA]),
[case "${enableval}" in
yes) CUDA_FAST_MATH=true ;;
no) CUDA_FAST_MATH=false ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-cuda-fast-math]) ;;
esac],
[CUDA_FAST_MATH=false]
)
AC_ARG_ENABLE([emu],
AS_HELP_STRING([--enable-emu], [Turn on device emulation for CUDA]),
[case "${enableval}" in
yes) EMULATION=true ;;
no) EMULATION=false ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-emu]) ;;
esac],
[EMULATION=false]
)
#AM_CONDITIONAL(USE_CUDA, test "x${want_cuda}" = xyes) #AM_CONDITIONAL(USE_CUDA, test "x${want_cuda}" = xyes)
if test "$want_cuda" = "yes" if test "$want_cuda" = "yes"
...@@ -141,16 +162,20 @@ then ...@@ -141,16 +162,20 @@ then
AC_LANG_PROGRAM([@%:@include <cuda.h>], AC_LANG_PROGRAM([@%:@include <cuda.h>],
[ [
CUmodule cuModule; CUmodule cuModule;
cuModuleLoad(&cuModule, "myModule.cubin");
CUdeviceptr devPtr; CUdeviceptr devPtr;
CUfunction cuFunction; CUfunction cuFunction;
size_t pitch, width = 250, height = 500; size_t pitch, width = 250, height = 500;
cuMemAllocPitch(&devPtr, &pitch,width * sizeof(float), height, 4);
cuModuleGetFunction(&cuFunction, cuModule, "myKernel"); void main()
cuFuncSetBlockShape(cuFunction, 512, 1, 1); {
cuParamSeti(cuFunction, 0, devPtr); cuModuleLoad(&cuModule, "myModule.cubin");
cuParamSetSize(cuFunction, sizeof(devPtr)); cuMemAllocPitch(&devPtr, &pitch,width * sizeof(float), height, 4);
cuLaunchGrid(cuFunction, 100, 1); cuModuleGetFunction(&cuFunction, cuModule, "myKernel");
cuFuncSetBlockShape(cuFunction, 512, 1, 1);
cuParamSeti(cuFunction, 0, devPtr);
cuParamSetSize(cuFunction, sizeof(devPtr));
cuLaunchGrid(cuFunction, 100, 1);
}
]) ])
], ],
[ [
...@@ -175,68 +200,19 @@ then ...@@ -175,68 +200,19 @@ then
fi fi
fi fi
AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_LIBS)
AC_SUBST(NVCC)
AC_ARG_WITH([cuda-fast-math],
[AC_HELP_STRING([--with-cuda-fast-math],
[Tell nvcc to use -use_fast_math flag])],
[
if test "$withval" = "no"
then
want_fast_math="no"
elif test "$withval" = "yes"
then
want_fast_math="yes"
else
with_fast_math="$withval"
want_fast_math="yes"
fi
],
[
want_fast_math="yes"
]
)
AC_ARG_ENABLE([emu],
AS_HELP_STRING([--enable-emu], [Turn on device emulation for CUDA]),
[case "${enableval}" in
yes) EMULATION=true ;;
no) EMULATION=false ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-emu]) ;;
esac],
[EMULATION=false]
)
# default nvcc flags
if test x$EMULATION = xtrue if test x$EMULATION = xtrue
then then
NVCCFLAGS=" -deviceemu" NFLAGS+=" -deviceemu"
fi fi
#AS_IF([test "x$want_cuda" = xyes], if test x$CUDA_FAST_MATH = xtrue
# [AS_IF([test "x$NVCCFLAGS" = x],
# [dnl generate CUDA code for broad spectrum of devices
# dnl Note: cc 13 for Tesla
# dnl Note: cc 20 for Fermi
# dnl Note: cc 30 for Kepler K10
# dnl Note: cc 35 for Kepler K20
# NVCCFLAGS=["-gencode arch=compute_10,code=sm_10 \
# -gencode arch=compute_11,code=sm_11 \
# -gencode arch=compute_13,code=sm_13 \
# -gencode arch=compute_20,code=sm_20 \
# -gencode arch=compute_30,code=sm_30 \
# -gencode arch=compute_35,code=sm_35"]
# ]
# )
# ]
# )
if test x$want_fast_math = xyes
then then
NVCCFLAGS+=" -use_fast_math" NFLAGS+=" -use_fast_math"
fi fi
AC_MSG_NOTICE([Using NVCCFLAGS=$NVCCFLAGS]) AC_MSG_NOTICE([Using NFLAGS=$NFLAGS])
AC_SUBST(NVCCFLAGS)
AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_LIBS)
AC_SUBST(NVCC)
AC_SUBST(NFLAGS)
]) ])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment