diff --git a/Makefile.am b/Makefile.am index f777f20c02953a750e90ac8e7767e82377d52cff..e46a9e3fbcd6bd0a4d675a31801856f0afde39e5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4,8 +4,8 @@ SUBDIRS = DIST_SUBDIRS = if BUILD_S_OMP -SUBDIRS += sPuReMD -DIST_SUBDIRS += sPuReMD +SUBDIRS += PuReMD-GPU +DIST_SUBDIRS += PuReMD-GPU endif if BUILD_MPI SUBDIRS += PuReMD diff --git a/PuReMD-GPU/Makefile.am b/PuReMD-GPU/Makefile.am index 3ab7bdba493aa8a5d56c64a100239eeff99d4563..f57bed636093bf0d5a822a4c77d5861524705e26 100644 --- a/PuReMD-GPU/Makefile.am +++ b/PuReMD-GPU/Makefile.am @@ -1,7 +1,9 @@ ACLOCAL_AMFLAGS = -I ../m4 +if USE_CUDA SUFFIXES = .cu include ../cuda.am +endif AM_CFLAGS = -Wall -O3 -funroll-loops -fstrict-aliasing -m64 AM_CPPFLAGS = @@ -18,21 +20,35 @@ NVCCFLAGS += --compiler-options "$(DEFS) -D__SM_35__ -O3 -funroll-loops -fstrict #NVCCFLAGS += -Xcompiler -fPIC -dc #NVCCFLAGS += --ptxas-options -v -bin_PROGRAMS = bin/puremd-gpu -bin_puremd_gpu_SOURCES = src/analyze.c src/print_utils.c \ - src/restart.c src/param.c src/pdb_tools.c src/box.c \ - src/lin_alg.c src/QEq.c src/allocate.c src/bond_orders.c \ +bin_PROGRAMS = bin/spuremd +bin_spuremd_SOURCES = src/analyze.c src/print_utils.c \ + src/restart.c src/tool_box.c src/control.c src/ffield.c \ + src/geo_tools.c src/box.c \ + src/lin_alg.c src/qeq.c src/allocate.c src/bond_orders.c \ src/forces.c src/four_body_interactions.c \ src/grid.c src/init_md.c src/integrate.c src/list.c \ src/lookup.c src/neighbors.c \ src/reset_utils.c src/single_body_interactions.c \ src/system_props.c src/three_body_interactions.c \ src/traj.c src/two_body_interactions.c src/vector.c \ - src/testmd.c \ - src/cuda_utils.cu src/cuda_copy.cu src/cuda_init.cu src/cuda_reduction.cu \ - src/cuda_center_mass.cu src/cuda_box.cu src/validation.cu \ + src/testmd.c +include_HEADERS = src/mytypes.h src/analyze.h src/print_utils.h \ + src/restart.h src/tool_box.c src/control.h src/ffield.c \ + src/geo_tools.h src/box.h \ + src/lin_alg.h src/qeq.h src/allocate.h src/bond_orders.h \ + src/forces.h src/four_body_interactions.h \ + src/grid.h src/init_md.h src/integrate.h src/list.h \ + src/lookup.h src/neighbors.h \ + src/reset_utils.h src/single_body_interactions.h \ + src/system_props.h src/three_body_interactions.h \ + src/traj.h src/two_body_interactions.h src/vector.h + +if USE_CUDA +bin_spuremd_SOURCES += src/cuda_utils.cu src/cuda_copy.cu \ + src/cuda_init.cu src/cuda_reduction.cu \ + src/cuda_center_mass.cu src/cuda_box.cu src/cuda_validation.cu \ src/cuda_allocate.cu src/cuda_bond_orders.cu \ - src/cuda_lin_alg.cu src/cuda_QEq.cu \ + src/cuda_lin_alg.cu src/cuda_qeq.cu \ src/cuda_forces.cu src/cuda_four_body_interactions.cu \ src/cuda_grid.cu src/cuda_init_md.cu src/cuda_integrate.cu src/cuda_list.cu \ src/cuda_lookup.cu src/cuda_neighbors.cu \ @@ -40,19 +56,11 @@ bin_puremd_gpu_SOURCES = src/analyze.c src/print_utils.c \ src/cuda_system_props.cu src/cuda_three_body_interactions.cu \ src/cuda_two_body_interactions.cu src/cuda_environment.cu \ src/cuda_post_evolve.cu -include_HEADERS = src/mytypes.h src/analyze.h src/print_utils.h \ - src/restart.h src/param.h src/pdb_tools.h src/box.h \ - src/lin_alg.h src/QEq.h src/allocate.h src/bond_orders.h \ - src/forces.h src/four_body_interactions.h \ - src/grid.h src/init_md.h src/integrate.h src/list.h \ - src/lookup.h src/neighbors.h \ - src/reset_utils.h src/single_body_interactions.h \ - src/system_props.h src/three_body_interactions.h \ - src/traj.h src/two_body_interactions.h src/vector.h \ - src/cuda_utils.h src/cuda_copy.h src/cuda_init.h src/cuda_reduction.h \ - src/cuda_center_mass.h src/cuda_box.h src/validation.h \ +include_HEADERS += src/cuda_utils.h src/cuda_copy.h \ + src/cuda_init.h src/cuda_reduction.h \ + src/cuda_center_mass.h src/cuda_box.h src/cuda_validation.h \ src/cuda_allocate.h src/cuda_bond_orders.h \ - src/cuda_lin_alg.h src/cuda_QEq.h \ + src/cuda_lin_alg.h src/cuda_qeq.h \ src/cuda_forces.h src/cuda_four_body_interactions.h \ src/cuda_grid.h src/cuda_init_md.h src/cuda_integrate.h src/cuda_list.h \ src/cuda_lookup.h src/cuda_neighbors.h \ @@ -62,8 +70,16 @@ include_HEADERS = src/mytypes.h src/analyze.h src/print_utils.h \ src/cuda_post_evolve.h # dummy source to cause C linking -nodist_EXTRA_bin_puremd_gpu_SOURCES = src/dummy.c +nodist_EXTRA_bin_spuremd_SOURCES = src/dummy.c + +endif + + +bin_spuremd_CFLAGS = $(AM_CFLAGS) $(CFLAGS) +bin_spuremd_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS) +bin_spuremd_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS) -bin_puremd_gpu_CFLAGS = $(AM_CFLAGS) $(CFLAGS) $(CUDA_CFLAGS) -bin_puremd_gpu_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS) -bin_puremd_gpu_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS) $(CUDA_LIBS) +if USE_CUDA +bin_spuremd_CFLAGS += $(CUDA_CFLAGS) +bin_spuremd_LDFLAGS += $(CUDA_LIBS) +endif diff --git a/PuReMD-GPU/aclocal.m4 b/PuReMD-GPU/aclocal.m4 index 2e1d098d2159d3a3069bc44cc5e0942cb9e86070..d6bf5baa543b24a4c3d8f9fc06ee2020a1d3f9bf 100644 --- a/PuReMD-GPU/aclocal.m4 +++ b/PuReMD-GPU/aclocal.m4 @@ -1150,4 +1150,5 @@ AC_SUBST([am__tar]) AC_SUBST([am__untar]) ]) # _AM_PROG_TAR +m4_include([../m4/ax_compiler_vendor.m4]) m4_include([../m4/ax_cuda.m4]) diff --git a/PuReMD-GPU/configure.ac b/PuReMD-GPU/configure.ac index 38c7cf737e44056c612a3b48f55708486ab78af4..5d0bca878282d6b7181c47328048ad590cd1afbf 100644 --- a/PuReMD-GPU/configure.ac +++ b/PuReMD-GPU/configure.ac @@ -53,47 +53,137 @@ AC_CHECK_TYPES([gzFile], [], # Checks for library functions. AC_FUNC_MALLOC AC_FUNC_STRTOD -AC_CHECK_FUNCS([memset pow sqrt]) +AC_CHECK_FUNCS([gettimeofday memset pow sqrt]) + +# Check for compiler vendor +AX_COMPILER_VENDOR +if test "x$ax_cv_c_compiler_vendor" = "xgnu"; then + if test "x$BUILD_DEBUG" = "x"; then + CFLAGS="$CFLAGS -Wall -O3 -funroll-loops -fstrict-aliasing" + else + CFLAGS="$CFLAGS -Wall" + fi +fi +if test "x$ax_cv_c_compiler_vendor" = "xintel"; then + if test "x$BUILD_DEBUG" = "x"; then + CFLAGS="$CFLAGS -fast" + fi +fi -# Check for CUDA support. -CONFIGURE_HEADLINE([ CUDA support ]) -AX_CUDA -NVCCFLAGS= -if test "BUILD_DEBUG" = "true" +# Check for OpenMP support. +if test "x$BUILD_OPENMP" = "xyes"; then + AC_OPENMP + if test "x${OPENMP_CFLAGS}" = "x"; then + AC_MSG_WARN([ + ----------------------------------------------- + Unable to find OpenMP support on this system. + Building a single-threaded version. + -----------------------------------------------]) + else + # bug due to recent Intel compiler change (?) + if test "x$ax_cv_c_compiler_vendor" = "xintel"; then + OPENMP_CFLAGS="-qopenmp" + fi + AC_SUBST(AM_CFLAGS, "$OPENMP_CFLAGS") + AC_SUBST(AM_CPPFLAGS, "$OPENMP_CFLAGS") + fi +fi + +if test "x$BUILD_SUPERLU_MT" != "x" then - NVCCFLAGS+=" -g -G" + CPPFLAGS="${CPPFLAGS} -I${BUILD_SUPERLU_MT}/include" + LDFLAGS="${LDFLAGS} -L${BUILD_SUPERLU_MT}/lib" + #TODO: implement better BLAS detection + LIBS="${LIBS} -lblas" +# BLAS_FOUND_LIBS="yes" +# AC_SEARCH_LIBS([dtrsv_], [blas blas_OPENMP], +# [], [BLAS_FOUND_LIBS="no"], []) +# AS_IF([test "x${BLAS_FOUND_LIBS}" != "xyes"], +# [AC_MSG_ERROR([Unable to find BLAS library.])]) + AC_CHECK_HEADERS([slu_mt_ddefs.h], [SUPERLU_MT_FOUND_HEADERS="yes"]) + AS_IF([test "x${SUPERLU_MT_FOUND_HEADERS}" != "xyes"], + [AC_MSG_ERROR([Unable to find SuperLU MT headers.])]) + SUPERLU_MT_FOUND_LIBS="yes" + #TODO: fix issue where multiple -l flags added, one for each call below + AC_SEARCH_LIBS([intMalloc], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) + AC_SEARCH_LIBS([get_perm_c], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) + AC_SEARCH_LIBS([pdgstrf_init], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp -lblas -lblas_OPENMP]) + AC_SEARCH_LIBS([pdgstrf], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp -lblas -lblas_OPENMP]) + AC_SEARCH_LIBS([pxgstrf_finalize], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp -lblas -lblas_OPENMP]) + AC_SEARCH_LIBS([StatAlloc], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) + AC_SEARCH_LIBS([StatInit], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) + AC_SEARCH_LIBS([StatFree], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) + AC_SEARCH_LIBS([Destroy_SuperNode_SCP], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) + AC_SEARCH_LIBS([Destroy_CompCol_NCP], [superlu_mt superlu_mt_OPENMP], + [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) + AS_IF([test "x${SUPERLU_MT_FOUND_LIBS}" != "xyes"], + [AC_MSG_ERROR([Unable to find SuperLU MT library.])]) + AC_DEFINE([HAVE_SUPERLU_MT], [1], [Define to 1 if you have SuperLU_MT support enabled.]) fi -AC_DEFINE([HAVE_CUDA], [1], [Define to 1 if you have CUDA support enabled.]) - -AC_CHECK_LIB([cuda], [cuGetErrorString]) -AC_CHECK_LIB([cudart], [cudaMalloc]) -AC_CHECK_LIB([cublas], [cublasDnrm2]) -AC_CHECK_LIB([cusparse], [cusparseCreateMatDescr]) -#AC_SEARCH_LIBS([cublasDaxpy], [cublas]) -#AC_SEARCH_LIBS([cublasDscal], [cublas]) -#AC_SEARCH_LIBS([cublasDdot], [cublas]) -#AC_SEARCH_LIBS([cudaThreadSynchronize], [cudart]) -#AC_SEARCH_LIBS([cudaGetLastError], [cudart]) -#AC_SEARCH_LIBS([cusparseCreateMatDescr], [cusparse]) -#AC_SEARCH_LIBS([cusparseSetMatType], [cusparse]) -#AC_SEARCH_LIBS([cusparseSetMatIndexBase], [cusparse]) + +# Check for CUDA support. +if test "x$BUILD_GPU" != "x" +then + CONFIGURE_HEADLINE([ CUDA support ]) + AX_CUDA + NVCCFLAGS= + if test "BUILD_DEBUG" = "true" + then + NVCCFLAGS+=" -g -G" + fi + AC_DEFINE([HAVE_CUDA], [1], [Define to 1 if you have CUDA support enabled.]) + + AC_CHECK_LIB([cuda], [cuGetErrorString]) + AC_CHECK_LIB([cudart], [cudaMalloc]) + AC_CHECK_LIB([cublas], [cublasDnrm2]) + AC_CHECK_LIB([cusparse], [cusparseCreateMatDescr]) +# AC_SEARCH_LIBS([cublasDaxpy], [cublas]) +# AC_SEARCH_LIBS([cublasDscal], [cublas]) +# AC_SEARCH_LIBS([cublasDdot], [cublas]) +# AC_SEARCH_LIBS([cudaThreadSynchronize], [cudart]) +# AC_SEARCH_LIBS([cudaGetLastError], [cudart]) +# AC_SEARCH_LIBS([cusparseCreateMatDescr], [cusparse]) +# AC_SEARCH_LIBS([cusparseSetMatType], [cusparse]) +# AC_SEARCH_LIBS([cusparseSetMatIndexBase], [cusparse]) # -#AC_SEARCH_LIBS([cublasDnrm2], [cublas], -# [CUBLAS_FOUND_LIBS="yes"], [CUBLAS_FOUND_LIBS="no"], [-lcublas]) -#AS_IF([test "x${CUBLAS_FOUND_LIBS}" != "xyes"], -# [AC_MSG_ERROR([Unable to find CUBLAS library.])]) +# AC_SEARCH_LIBS([cublasDnrm2], [cublas], +# [CUBLAS_FOUND_LIBS="yes"], [CUBLAS_FOUND_LIBS="no"], [-lcublas]) +# AS_IF([test "x${CUBLAS_FOUND_LIBS}" != "xyes"], +# [AC_MSG_ERROR([Unable to find CUBLAS library.])]) # -#AC_SEARCH_LIBS([cusparseSetMatType], [cusparse], -# [CUSPARSE_FOUND_LIBS="yes"], [CUSPARSE_FOUND_LIBS="no"], [-lcusparse]) -#AS_IF([test "x${CUSPARSE_FOUND_LIBS}" != "xyes"], -# [AC_MSG_ERROR([Unable to find CUSPARSE library.])]) +# AC_SEARCH_LIBS([cusparseSetMatType], [cusparse], +# [CUSPARSE_FOUND_LIBS="yes"], [CUSPARSE_FOUND_LIBS="no"], [-lcusparse]) +# AS_IF([test "x${CUSPARSE_FOUND_LIBS}" != "xyes"], +# [AC_MSG_ERROR([Unable to find CUSPARSE library.])]) # -#AC_CHECK_TYPES([cublasHandle_t], [], -# [AC_MSG_FAILURE([cublasHandle_t type not found in cublas.h], [1])], [#include<cublas_v2.h>]) -#AC_CHECK_TYPES([cusparseHandle_t], [], -# [AC_MSG_FAILURE([cusparseHandle_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) -#AC_CHECK_TYPES([cusparseMatDescr_t], [], -# [AC_MSG_FAILURE([cusparseMatDescr_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) +# AC_CHECK_TYPES([cublasHandle_t], [], +# [AC_MSG_FAILURE([cublasHandle_t type not found in cublas.h], [1])], [#include<cublas_v2.h>]) +# AC_CHECK_TYPES([cusparseHandle_t], [], +# [AC_MSG_FAILURE([cusparseHandle_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) +# AC_CHECK_TYPES([cusparseMatDescr_t], [], +# [AC_MSG_FAILURE([cusparseMatDescr_t type not found in cusparse.h], [1])], [#include<cusparse_v2.h>]) +else + AM_CONDITIONAL(USE_CUDA, test "x" = "xyes") +fi + +if test "x$BUILD_DEBUG" != "x" +then + CFLAGS="${CFLAGS} ${DEBUG_FLAGS}" +fi + +if test "x$BUILD_GPROF" != "x" +then + CFLAGS="${CFLAGS} ${GPROF_FLAGS}" +fi if test "BUILD_PROF" = "true" then diff --git a/PuReMD-GPU/src/QEq.c b/PuReMD-GPU/src/QEq.c deleted file mode 100644 index 8cc638ea90dcc25f86d33f275b162c8e531d82bb..0000000000000000000000000000000000000000 --- a/PuReMD-GPU/src/QEq.c +++ /dev/null @@ -1,396 +0,0 @@ -/*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator - - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu - Hasan Metin Aktulga, haktulga@cs.purdue.edu - Ananth Y Grama, ayg@cs.purdue.edu - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details: - <http://www.gnu.org/licenses/>. - ----------------------------------------------------------------------*/ - -#include "QEq.h" - -#include "allocate.h" -#include "lin_alg.h" -#include "list.h" -#include "print_utils.h" -#include "index_utils.h" -#include "system_props.h" - -#include "sort.h" - - -int compare_matrix_entry(const void *v1, const void *v2) -{ - return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j; -} - - -void Sort_Matrix_Rows( sparse_matrix *A ) -{ - int i, si, ei; - - for( i = 0; i < A->n; ++i ) { - si = A->start[i]; - ei = A->start[i+1]; - qsort( &(A->entries[si]), ei - si, - sizeof(sparse_matrix_entry), compare_matrix_entry ); - } -} - - -void Calculate_Droptol( sparse_matrix *A, real *droptol, real dtol ) -{ - int i, j, k; - real val; - - /* init droptol to 0 */ - for( i = 0; i < A->n; ++i ) - droptol[i] = 0; - - /* calculate sqaure of the norm of each row */ - for( i = 0; i < A->n; ++i ) { - for( k = A->start[i]; k < A->start[i+1]-1; ++k ) { - j = A->entries[k].j; - val = A->entries[k].val; - - droptol[i] += val*val; - droptol[j] += val*val; - } - - val = A->entries[k].val; // diagonal entry - droptol[i] += val*val; - } - - /* calculate local droptol for each row */ - //fprintf( stderr, "droptol: " ); - for( i = 0; i < A->n; ++i ) { - //fprintf( stderr, "%f-->", droptol[i] ); - droptol[i] = SQRT( droptol[i] ) * dtol; - //fprintf( stderr, "%f ", droptol[i] ); - } - //fprintf( stderr, "\n" ); -} - - -int Estimate_LU_Fill( sparse_matrix *A, real *droptol ) -{ - int i, j, pj; - int fillin; - real val; - - fillin = 0; - - //fprintf( stderr, "n: %d\n", A->n ); - for( i = 0; i < A->n; ++i ) - for( pj = A->start[i]; pj < A->start[i+1]-1; ++pj ){ - j = A->entries[pj].j; - val = A->entries[pj].val; - //fprintf( stderr, "i: %d, j: %d", i, j ); - - if( fabs(val) > droptol[i] ) - ++fillin; - } - - return fillin + A->n; -} - - -void ICHOLT( sparse_matrix *A, real *droptol, - sparse_matrix *L, sparse_matrix *U ) -{ - sparse_matrix_entry tmp[1000]; - int i, j, pj, k1, k2, tmptop, Ltop; - real val; - int *Utop; - - Utop = (int*) malloc((A->n+1) * sizeof(int)); - - // clear variables - Ltop = 0; - tmptop = 0; - for( i = 0; i <= A->n; ++i ) - L->start[i] = U->start[i] = 0; - - for( i = 0; i < A->n; ++i ) - Utop[i] = 0; - - //fprintf( stderr, "n: %d\n", A->n ); - for( i = 0; i < A->n; ++i ){ - L->start[i] = Ltop; - tmptop = 0; - - for( pj = A->start[i]; pj < A->start[i+1]-1; ++pj ){ - j = A->entries[pj].j; - val = A->entries[pj].val; - //fprintf( stderr, "i: %d, j: %d", i, j ); - - if( fabs(val) > droptol[i] ){ - k1 = 0; - k2 = L->start[j]; - while( k1 < tmptop && k2 < L->start[j+1] ){ - if( tmp[k1].j < L->entries[k2].j ) - ++k1; - else if( tmp[k1].j > L->entries[k2].j ) - ++k2; - else - val -= (tmp[k1++].val * L->entries[k2++].val); - } - - // L matrix is lower triangular, - // so right before the start of next row comes jth diagonal - val /= L->entries[L->start[j+1]-1].val; - - tmp[tmptop].j = j; - tmp[tmptop].val = val; - ++tmptop; - } - //fprintf( stderr, " -- done\n" ); - } - - // compute the ith diagonal in L - // sanity check - if( A->entries[pj].j != i ) { - fprintf( stderr, "i=%d, badly built A matrix!\n", i ); - exit(999); - } - - val = A->entries[pj].val; - for( k1 = 0; k1 < tmptop; ++k1 ) - val -= (tmp[k1].val * tmp[k1].val); - - tmp[tmptop].j = i; - tmp[tmptop].val = SQRT(val); - - // apply the dropping rule once again - //fprintf( stderr, "row%d: tmptop: %d\n", i, tmptop ); - //for( k1 = 0; k1<= tmptop; ++k1 ) - // fprintf( stderr, "%d(%f) ", tmp[k1].j, tmp[k1].val ); - //fprintf( stderr, "\n" ); - //fprintf( stderr, "row(%d): droptol=%.4f\n", i+1, droptol[i] ); - for( k1 = 0; k1 < tmptop; ++k1 ) - if( fabs(tmp[k1].val) > droptol[i] / tmp[tmptop].val ){ - L->entries[Ltop].j = tmp[k1].j; - L->entries[Ltop].val = tmp[k1].val; - U->start[tmp[k1].j+1]++; - ++Ltop; - //fprintf( stderr, "%d(%.4f) ", tmp[k1].j+1, tmp[k1].val ); - } - // keep the diagonal in any case - L->entries[Ltop].j = tmp[k1].j; - L->entries[Ltop].val = tmp[k1].val; - ++Ltop; - //fprintf( stderr, "%d(%.4f)\n", tmp[k1].j+1, tmp[k1].val ); - } - - L->start[i] = Ltop; - //fprintf( stderr, "nnz(L): %d, max: %d\n", Ltop, L->n * 50 ); - - for( i = 1; i <= U->n; ++i ) - Utop[i] = U->start[i] = U->start[i] + U->start[i-1] + 1; - - for( i = 0; i < L->n; ++i ) - for( pj = L->start[i]; pj < L->start[i+1]; ++pj ){ - j = L->entries[pj].j; - U->entries[Utop[j]].j = i; - U->entries[Utop[j]].val = L->entries[pj].val; - Utop[j]++; - } - - //fprintf( stderr, "nnz(U): %d, max: %d\n", Utop[U->n], U->n * 50 ); -} - - -void Init_MatVec( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list *far_nbrs ) -{ - int i, fillin; - real s_tmp, t_tmp; - //char fname[100]; - - if(control->refactor > 0 && - ((data->step-data->prev_steps)%control->refactor==0 || workspace->L.entries==NULL)) - { - //Print_Linear_System( system, control, workspace, data->step ); - Sort_Matrix_Rows( &workspace->H ); - - //fprintf( stderr, "H matrix sorted\n" ); - - Calculate_Droptol( &workspace->H, workspace->droptol, control->droptol ); - //fprintf( stderr, "drop tolerances calculated\n" ); - - if( workspace->L.entries == NULL ) - { - fillin = Estimate_LU_Fill( &workspace->H, workspace->droptol ); - -#ifdef __DEBUG_CUDA__ - fprintf( stderr, "fillin = %d\n", fillin ); -#endif - - if( Allocate_Matrix( &(workspace->L), far_nbrs->n, fillin ) == 0 || - Allocate_Matrix( &(workspace->U), far_nbrs->n, fillin ) == 0 ) - { - fprintf( stderr, "not enough memory for LU matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); - } - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "fillin = %d\n", fillin ); - fprintf( stderr, "allocated memory: L = U = %ldMB\n", - fillin * sizeof(sparse_matrix_entry) / (1024*1024) ); -#endif - } - - ICHOLT( &workspace->H, workspace->droptol, &workspace->L, &workspace->U ); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "icholt-" ); - //sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); - //Print_Sparse_Matrix2( workspace->L, fname ); - //Print_Sparse_Matrix( U ); -#endif - } - - /* extrapolation for s & t */ - for( i = 0; i < system->N; ++i ) { - // no extrapolation - //s_tmp = workspace->s[0][i]; - //t_tmp = workspace->t[0][i]; - - // linear - //s_tmp = 2 * workspace->s[0][i] - workspace->s[1][i]; - //t_tmp = 2 * workspace->t[0][i] - workspace->t[1][i]; - - // quadratic - //s_tmp = workspace->s[2][i] + 3 * (workspace->s[0][i]-workspace->s[1][i]); - t_tmp = workspace->t[index_wkspace_sys(2,i,system->N)] + 3*(workspace->t[index_wkspace_sys(0,i,system->N)]-workspace->t[index_wkspace_sys(1,i,system->N)]); - - // cubic - s_tmp = 4 * (workspace->s[index_wkspace_sys(0,i,system->N)] + workspace->s[index_wkspace_sys(2,i,system->N)]) - - (6 * workspace->s[index_wkspace_sys(1,i,system->N)] + workspace->s[index_wkspace_sys(3,i,system->N)] ); - //t_tmp = 4 * (workspace->t[0][i] + workspace->t[2][i]) - - // (6 * workspace->t[1][i] + workspace->t[3][i] ); - - // 4th order - //s_tmp = 5 * (workspace->s[0][i] - workspace->s[3][i]) + - // 10 * (-workspace->s[1][i] + workspace->s[2][i] ) + workspace->s[4][i]; - //t_tmp = 5 * (workspace->t[0][i] - workspace->t[3][i]) + - // 10 * (-workspace->t[1][i] + workspace->t[2][i] ) + workspace->t[4][i]; - - workspace->s[index_wkspace_sys(4,i,system->N)] = workspace->s[index_wkspace_sys(3,i,system->N)]; - workspace->s[index_wkspace_sys(3,i,system->N)] = workspace->s[index_wkspace_sys(2,i,system->N)]; - workspace->s[index_wkspace_sys(2,i,system->N)] = workspace->s[index_wkspace_sys(1,i,system->N)]; - workspace->s[index_wkspace_sys(1,i,system->N)] = workspace->s[index_wkspace_sys(0,i,system->N)]; - workspace->s[index_wkspace_sys(0,i,system->N)] = s_tmp; - - workspace->t[index_wkspace_sys(4,i,system->N)] = workspace->t[index_wkspace_sys(3,i,system->N)]; - workspace->t[index_wkspace_sys(3,i,system->N)] = workspace->t[index_wkspace_sys(2,i,system->N)]; - workspace->t[index_wkspace_sys(2,i,system->N)] = workspace->t[index_wkspace_sys(1,i,system->N)]; - workspace->t[index_wkspace_sys(1,i,system->N)] = workspace->t[index_wkspace_sys(0,i,system->N)]; - workspace->t[index_wkspace_sys(0,i,system->N)] = t_tmp; - } -} - - -void Calculate_Charges( reax_system *system, static_storage *workspace ) -{ - int i; - real u, s_sum, t_sum; - - s_sum = t_sum = 0.; - for( i = 0; i < system->N; ++i ) { - s_sum += workspace->s[index_wkspace_sys(0,i,system->N)]; - t_sum += workspace->t[index_wkspace_sys(0,i,system->N)]; - } - - u = s_sum / t_sum; - -#ifdef __DEBUG_CUDA__ - fprintf (stderr, "Host --->s %13.2f, t %13.f, u %13.2f \n", s_sum, t_sum, u ); -#endif - - for( i = 0; i < system->N; ++i ) - { - system->atoms[i].q = workspace->s[index_wkspace_sys(0,i,system->N)] - u * workspace->t[index_wkspace_sys(0,i,system->N)]; - } -} - - -void QEq( reax_system *system, control_params *control, simulation_data *data, - static_storage *workspace, list *far_nbrs, - output_controls *out_control ) -{ - int matvecs; - - //real t_start, t_elapsed; - - //t_start = Get_Time (); - Init_MatVec( system, control, data, workspace, far_nbrs ); - //t_elapsed = Get_Timing_Info ( t_start ); - - //fprintf (stderr, " CPU Init_MatVec timing ----> %f \n", t_elapsed ); - - //if( data->step % 10 == 0 ) - // Print_Linear_System( system, control, workspace, far_nbrs, data->step ); - - //t_start = Get_Time ( ); - matvecs = GMRES( workspace, &workspace->H, - workspace->b_s, control->q_err, &workspace->s[0], out_control->log, system ); - matvecs += GMRES( workspace, &workspace->H, - workspace->b_t, control->q_err, &workspace->t[0], out_control->log, system ); - //t_elapsed = Get_Timing_Info ( t_start ); - - //fprintf (stderr, " CPU GMRES timing ---> %f \n", t_elapsed ); - - //matvecs = GMRES_HouseHolder( workspace, workspace->H, - // workspace->b_s, control->q_err, workspace->s[0], out_control->log ); - //matvecs += GMRES_HouseHolder( workspace, workspace->H, - // workspace->b_t, control->q_err, workspace->t[0], out_control->log ); - - //matvecs = PGMRES( workspace, &workspace->H, workspace->b_s, control->q_err, - // &workspace->L, &workspace->U, &workspace->s[index_wkspace_sys(0,0,system->N)], out_control->log, system ); - //matvecs += PGMRES( workspace, &workspace->H, workspace->b_t, control->q_err, - // &workspace->L, &workspace->U, &workspace->t[index_wkspace_sys(0,0,system->N)], out_control->log, system ); - - //matvecs=PCG( workspace, workspace->H, workspace->b_s, control->q_err, - // workspace->L, workspace->U, workspace->s[0], out_control->log ) + 1; - ///matvecs+=PCG( workspace, workspace->H, workspace->b_t, control->q_err, - // workspace->L, workspace->U, workspace->t[0], out_control->log ) + 1; - - //matvecs = CG( workspace, workspace->H, - // workspace->b_s, control->q_err, workspace->s[0], out_control->log ) + 1; - //matvecs += CG( workspace, workspace->H, - // workspace->b_t, control->q_err, workspace->t[0], out_control->log ) + 1; - - //matvecs = SDM( workspace, workspace->H, - // workspace->b_s, control->q_err, workspace->s[0], out_control->log ) + 1; - //matvecs += SDM( workspace, workspace->H, - // workspace->b_t, control->q_err, workspace->t[0], out_control->log ) + 1; - - //fprintf (stderr, " GMRES done with iterations %d \n", matvecs ); - - data->timing.matvecs += matvecs; -#if defined(DEBUG_FOCUS) - fprintf( stderr, "linsolve-" ); -#endif - - Calculate_Charges( system, workspace ); - //fprintf( stderr, "%d %.9f %.9f %.9f %.9f %.9f %.9f\n", - // data->step, - // workspace->s[0][0], workspace->t[0][0], - // workspace->s[0][1], workspace->t[0][1], - // workspace->s[0][2], workspace->t[0][2] ); - // if( data->step == control->nsteps ) - //Print_Charges( system, control, workspace, data->step ); -} diff --git a/PuReMD-GPU/src/QEq.h b/PuReMD-GPU/src/QEq.h deleted file mode 100644 index 31dfbf61ba05ec79d32313c3ab648eb259f183f2..0000000000000000000000000000000000000000 --- a/PuReMD-GPU/src/QEq.h +++ /dev/null @@ -1,61 +0,0 @@ -/*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator - - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu - Hasan Metin Aktulga, haktulga@cs.purdue.edu - Ananth Y Grama, ayg@cs.purdue.edu - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details: - <http://www.gnu.org/licenses/>. - ----------------------------------------------------------------------*/ - -#ifndef __QEq_H_ -#define __QEq_H_ - -#include "mytypes.h" - - -void QEq( reax_system*, control_params*, simulation_data*, static_storage*, - list*, output_controls* ); - - -static inline HOST_DEVICE void swap(sparse_matrix_entry *array, int index1, int index2) -{ - sparse_matrix_entry temp = array[index1]; - array[index1] = array[index2]; - array[index2] = temp; -} - - -static inline HOST_DEVICE void quick_sort(sparse_matrix_entry *array, int start, int end) -{ - int i = start; - int k = end; - - if (end - start >= 1) - { - int pivot = array[start].j; - - while (k > i) - { - while ((array[i].j <= pivot) && (i <= end) && (k > i)) i++; - while ((array[k].j > pivot) && (k >= start) && (k >= i)) k--; - if (k > i) swap(array, i, k); - } - swap(array, start, k); - quick_sort(array, start, k - 1); - quick_sort(array, k + 1, end); - } -} - - -#endif diff --git a/PuReMD-GPU/src/allocate.c b/PuReMD-GPU/src/allocate.c index 65f0eb2a872673259d508f17fc0da43530a7426f..bbae7dce50b8fdf60e5a0d8c459e50ca36dd67cb 100644 --- a/PuReMD-GPU/src/allocate.c +++ b/PuReMD-GPU/src/allocate.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -21,103 +22,148 @@ #include "allocate.h" #include "list.h" +#include "tool_box.h" + +/* allocate space for atoms */ +int PreAllocate_Space( reax_system *system, control_params *control, + static_storage *workspace ) +{ + int i; + + system->atoms = (reax_atom*) scalloc( system->N, + sizeof(reax_atom), "atoms" ); + workspace->orig_id = (int*) scalloc( system->N, + sizeof(int), "orid_id" ); + + /* space for keeping restriction info, if any */ + if ( control->restrict_bonds ) + { + workspace->restricted = (int*) scalloc( system->N, + sizeof(int), "restricted_atoms" ); + + workspace->restricted_list = (int*) scalloc( system->N, + sizeof(int), "restricted_list" ); + + workspace->restricted_list = (int*) scalloc( MAX_RESTRICT * system->N, + sizeof(int), "restricted_list[i]" ); + } + + return SUCCESS; +} void Reallocate_Neighbor_List( list *far_nbrs, int n, int num_intrs ) { Delete_List( far_nbrs ); - if(!Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, far_nbrs )) + + if (!Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, far_nbrs )) { fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n"); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "num_far = %d, far_nbrs = %d -> reallocating!\n", - num_intrs, far_nbrs->num_intrs ); - fprintf( stderr, "memory allocated: far_nbrs = %ldMB\n", - num_intrs * sizeof(far_neighbor_data) / (1024*1024) ); + num_intrs, far_nbrs->num_intrs ); + fprintf( stderr, "memory allocated: far_nbrs = %ldMB\n", + num_intrs * sizeof(far_neighbor_data) / (1024 * 1024) ); #endif } -HOST int Allocate_Matrix( sparse_matrix *H, int n, int m ) +int Allocate_Matrix( sparse_matrix *pH, int n, int m ) { + sparse_matrix *H; + + if ( (pH = (sparse_matrix*) malloc(sizeof(sparse_matrix))) == NULL ) + { + return FAILURE; + } + + H = pH; H->n = n; H->m = m; - if( (H->start = (int*) malloc(sizeof(int) * n+1)) == NULL ) - return 0; - - if( (H->end = (int*) malloc(sizeof(int) * n+1)) == NULL ) - return 0; - if( (H->entries = - (sparse_matrix_entry*) malloc(sizeof(sparse_matrix_entry)*m)) == NULL ) - return 0; + if ( (H->start = (unsigned int*) malloc(sizeof(int) * (n + 1))) == NULL + || (H->j = (unsigned int*) malloc(sizeof(int) * m)) == NULL + || (H->val = (real*) malloc(sizeof(real) * m)) == NULL ) + { + return FAILURE; + } - return 1; + return SUCCESS; } void Deallocate_Matrix( sparse_matrix *H ) { free(H->start); - free(H->entries); - free(H->end); + free(H->j); + free(H->val); + free(H); } int Reallocate_Matrix( sparse_matrix *H, int n, int m, char *name ) { Deallocate_Matrix( H ); - if( !Allocate_Matrix( H, n, m ) ) { + + if ( Allocate_Matrix( H, n, m ) == FAILURE ) + { fprintf(stderr, "not enough space for %s matrix. terminating!\n", name); - exit( 1 ); + exit( INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "reallocating %s matrix, n = %d, m = %d\n", - name, n, m ); - fprintf( stderr, "memory allocated: %s = %ldMB\n", - name, m * sizeof(sparse_matrix_entry) / (1024*1024) ); + name, n, m ); + fprintf( stderr, "memory allocated: %s = %ldMB\n", + name, m * sizeof(sparse_matrix_entry) / (1024 * 1024) ); #endif - return 1; + + return SUCCESS; } -int Allocate_HBond_List( int n, int num_h, int *h_index, int *hb_top, - list *hbonds ) +int Allocate_HBond_List( int n, int num_h, int *h_index, int *hb_top, + list *hbonds ) { int i, num_hbonds; num_hbonds = 0; /* find starting indexes for each H and the total number of hbonds */ - for( i = 1; i < n; ++i ) - hb_top[i] += hb_top[i-1]; - num_hbonds = hb_top[n-1]; + for ( i = 1; i < n; ++i ) + { + hb_top[i] += hb_top[i - 1]; + } + num_hbonds = hb_top[n - 1]; - if( !Make_List(num_h, num_hbonds, TYP_HBOND, hbonds ) ) + if ( !Make_List(num_h, num_hbonds, TYP_HBOND, hbonds ) ) { fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } - for( i = 0; i < n; ++i ) - if( h_index[i] == 0 ){ - Set_Start_Index( 0, 0, hbonds ); - Set_End_Index( 0, 0, hbonds ); + for ( i = 0; i < n; ++i ) + { + if ( h_index[i] == 0 ) + { + Set_Start_Index( 0, 0, hbonds ); + Set_End_Index( 0, 0, hbonds ); } - else if( h_index[i] > 0 ){ - Set_Start_Index( h_index[i], hb_top[i-1], hbonds ); - Set_End_Index( h_index[i], hb_top[i-1], hbonds ); + else if ( h_index[i] > 0 ) + { + Set_Start_Index( h_index[i], hb_top[i - 1], hbonds ); + Set_End_Index( h_index[i], hb_top[i - 1], hbonds ); } + } #if defined(DEBUG_FOCUS) fprintf( stderr, "allocating hbonds - num_hbonds: %d\n", num_hbonds ); - fprintf( stderr, "memory allocated: hbonds = %ldMB\n", - num_hbonds * sizeof(hbond_data) / (1024*1024) ); + fprintf( stderr, "memory allocated: hbonds = %ldMB\n", + num_hbonds * sizeof(hbond_data) / (1024 * 1024) ); #endif - return 1; + return SUCCESS; } @@ -129,10 +175,14 @@ int Reallocate_HBonds_List( int n, int num_h, int *h_index, list *hbonds ) #if defined(DEBUG_FOCUS) fprintf( stderr, "reallocating hbonds\n" ); #endif - hb_top = (int *)calloc( n, sizeof(int) ); - for( i = 0; i < n; ++i ) - if( h_index[i] >= 0 ) - hb_top[i] = MAX(Num_Entries(h_index[i],hbonds)*SAFE_HBONDS, MIN_HBONDS); + hb_top = calloc( n, sizeof(int) ); + for ( i = 0; i < n; ++i ) + { + if ( h_index[i] >= 0 ) + { + hb_top[i] = MAX(Num_Entries(h_index[i], hbonds) * SAFE_HBONDS, MIN_HBONDS); + } + } Delete_List( hbonds ); @@ -140,7 +190,7 @@ int Reallocate_HBonds_List( int n, int num_h, int *h_index, list *hbonds ) free( hb_top ); - return 1; + return SUCCESS; } @@ -150,29 +200,32 @@ int Allocate_Bond_List( int n, int *bond_top, list *bonds ) num_bonds = 0; /* find starting indexes for each atom and the total number of bonds */ - for( i = 1; i < n; ++i ) - bond_top[i] += bond_top[i-1]; - num_bonds = bond_top[n-1]; + for ( i = 1; i < n; ++i ) + { + bond_top[i] += bond_top[i - 1]; + } + num_bonds = bond_top[n - 1]; - if( !Make_List(n, num_bonds, TYP_BOND, bonds ) ) + if ( !Make_List(n, num_bonds, TYP_BOND, bonds ) ) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } - Set_Start_Index( 0, 0, bonds ); - Set_End_Index( 0, 0, bonds ); - for( i = 1; i < n; ++i ) { - Set_Start_Index( i, bond_top[i-1], bonds ); - Set_End_Index( i, bond_top[i-1], bonds ); + Set_Start_Index( 0, 0, bonds ); + Set_End_Index( 0, 0, bonds ); + for ( i = 1; i < n; ++i ) + { + Set_Start_Index( i, bond_top[i - 1], bonds ); + Set_End_Index( i, bond_top[i - 1], bonds ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "allocating bonds - num_bonds: %d\n", num_bonds ); - fprintf( stderr, "memory allocated: bonds = %ldMB\n", - num_bonds * sizeof(bond_data) / (1024*1024) ); + fprintf( stderr, "memory allocated: bonds = %ldMB\n", + num_bonds * sizeof(bond_data) / (1024 * 1024) ); #endif - return 1; + return SUCCESS; } @@ -184,9 +237,10 @@ int Reallocate_Bonds_List( int n, list *bonds, int *num_bonds, int *est_3body ) #if defined(DEBUG_FOCUS) fprintf( stderr, "reallocating bonds\n" ); #endif - bond_top = (int *)calloc( n, sizeof(int) ); + bond_top = calloc( n, sizeof(int) ); *est_3body = 0; - for( i = 0; i < n; ++i ){ + for ( i = 0; i < n; ++i ) + { *est_3body += SQR( Num_Entries( i, bonds ) ); bond_top[i] = MAX( Num_Entries( i, bonds ) * 2, MIN_BONDS ); } @@ -194,17 +248,18 @@ int Reallocate_Bonds_List( int n, list *bonds, int *num_bonds, int *est_3body ) Delete_List( bonds ); Allocate_Bond_List( n, bond_top, bonds ); - *num_bonds = bond_top[n-1]; + *num_bonds = bond_top[n - 1]; free( bond_top ); - return 1; + return SUCCESS; } -void Reallocate( reax_system *system, static_storage *workspace, list **lists, - int nbr_flag ) +void Reallocate( reax_system *system, static_storage *workspace, list **lists, + int nbr_flag ) { + int i, j, k; int num_bonds, est_3body; reallocate_data *realloc; grid *g; @@ -212,70 +267,75 @@ void Reallocate( reax_system *system, static_storage *workspace, list **lists, realloc = &(workspace->realloc); g = &(system->g); - if( realloc->num_far > 0 && nbr_flag ) { - fprintf (stderr, " Reallocating neighbors \n"); - Reallocate_Neighbor_List( (*lists)+FAR_NBRS, - system->N, realloc->num_far * SAFE_ZONE ); + if ( realloc->num_far > 0 && nbr_flag ) + { + Reallocate_Neighbor_List( (*lists) + FAR_NBRS, + system->N, realloc->num_far * SAFE_ZONE ); realloc->num_far = -1; } - if( realloc->Htop > 0 ){ - fprintf (stderr, " Reallocating Matrix \n"); - Reallocate_Matrix(&(workspace->H), system->N, realloc->Htop*SAFE_ZONE,"H"); + if ( realloc->Htop > 0 ) + { + Reallocate_Matrix(workspace->H, system->N, realloc->Htop * SAFE_ZONE, "H"); realloc->Htop = -1; - Deallocate_Matrix( &workspace->L ); - Deallocate_Matrix( &workspace->U ); + Deallocate_Matrix( workspace->L ); + Deallocate_Matrix( workspace->U ); + workspace->L = NULL; + workspace->U = NULL; } - if( realloc->hbonds > 0 ){ - fprintf (stderr, " Reallocating hbonds \n"); + if ( realloc->hbonds > 0 ) + { Reallocate_HBonds_List(system->N, workspace->num_H, workspace->hbond_index, - (*lists)+HBONDS ); + (*lists) + HBONDS ); realloc->hbonds = -1; } num_bonds = est_3body = -1; - if( realloc->bonds > 0 ){ - fprintf (stderr, " Reallocating bonds \n"); - Reallocate_Bonds_List( system->N, (*lists)+BONDS, &num_bonds, &est_3body ); + if ( realloc->bonds > 0 ) + { + Reallocate_Bonds_List( system->N, (*lists) + BONDS, &num_bonds, &est_3body ); realloc->bonds = -1; realloc->num_3body = MAX( realloc->num_3body, est_3body ); } - if( realloc->num_3body > 0 ) { - fprintf (stderr, " Reallocating 3Body \n"); - Delete_List( (*lists)+THREE_BODIES ); + if ( realloc->num_3body > 0 ) + { + Delete_List( (*lists) + THREE_BODIES ); - if( num_bonds == -1 ) - num_bonds = ((*lists)+BONDS)->num_intrs; + if ( num_bonds == -1 ) + num_bonds = ((*lists) + BONDS)->num_intrs; realloc->num_3body *= SAFE_ZONE; - if( !Make_List( num_bonds, realloc->num_3body, - TYP_THREE_BODY, (*lists)+THREE_BODIES ) ) + if ( !Make_List( num_bonds, realloc->num_3body, + TYP_THREE_BODY, (*lists) + THREE_BODIES ) ) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } realloc->num_3body = -1; #if defined(DEBUG_FOCUS) fprintf( stderr, "reallocating 3 bodies\n" ); fprintf( stderr, "reallocated - num_bonds: %d\n", num_bonds ); fprintf( stderr, "reallocated - num_3body: %d\n", realloc->num_3body ); - fprintf( stderr, "reallocated 3body memory: %ldMB\n", - realloc->num_3body*sizeof(three_body_interaction_data)/ - (1024*1024) ); + fprintf( stderr, "reallocated 3body memory: %ldMB\n", + realloc->num_3body * sizeof(three_body_interaction_data) / + (1024 * 1024) ); #endif } - if( realloc->gcell_atoms > -1 ){ + if ( realloc->gcell_atoms > -1 ) + { #if defined(DEBUG_FOCUS) fprintf(stderr, "reallocating gcell: g->max_atoms: %d\n", g->max_atoms); #endif free (g->atoms); - g->atoms = (int *) calloc ( g->ncell[0]*g->ncell[1]*g->ncell[2], - sizeof (int) * workspace->realloc.gcell_atoms); + g->atoms = (int *) calloc( g->ncell[0]*g->ncell[1]*g->ncell[2], + sizeof(int) * workspace->realloc.gcell_atoms ); + realloc->gcell_atoms = -1; + realloc->gcell_atoms = -1; } } diff --git a/PuReMD-GPU/src/allocate.h b/PuReMD-GPU/src/allocate.h index b03ed80b34f153b9929ccaa80bc5c27fbf6ce540..72f724dac852c626e7c70018bba95c5f21dc5b51 100644 --- a/PuReMD-GPU/src/allocate.h +++ b/PuReMD-GPU/src/allocate.h @@ -28,9 +28,12 @@ extern "C" { #endif +int PreAllocate_Space( reax_system*, control_params*, static_storage* ); + void Reallocate( reax_system*, static_storage*, list**, int ); int Allocate_Matrix( sparse_matrix*, int, int ); + void Deallocate_Matrix( sparse_matrix *); int Allocate_HBond_List( int, int, int*, int*, list* ); diff --git a/PuReMD-GPU/src/analyze.c b/PuReMD-GPU/src/analyze.c index 8eef938a372add29eb044846b12c77eddd47aad0..014eea8f46093e1a381e0a5e44241cfdc3d7b719 100644 --- a/PuReMD-GPU/src/analyze.c +++ b/PuReMD-GPU/src/analyze.c @@ -772,17 +772,16 @@ void Calculate_Drift( reax_system *system, control_params *control, &(system->box), driftvec ); if ( fabs( driftvec[0] ) >= system->box.box_norms[0] / 2.0 - 2.0 || - fabs( driftvec[0] ) >= system->box.box_norms[0] / 2.0 - 2.0 || - fabs( driftvec[0] ) >= system->box.box_norms[0] / 2.0 - 2.0 ) + fabs( driftvec[1] ) >= system->box.box_norms[1] / 2.0 - 2.0 || + fabs( driftvec[2] ) >= system->box.box_norms[2] / 2.0 - 2.0 ) { /* the atom has moved almost half the box size. exclude it from further drift computations as it might have an improper contribution due to periodic boudnaries. */ + workspace->x_old[i][0] = -999999999.0; + workspace->x_old[i][1] = -999999999.0; + workspace->x_old[i][2] = -999999999.0; - //TODO -- check this one. may be not initializing this properly - //workspace->x_old[i][0] = workspace->x_old[i][2] = workspace->x_old[i][2] = -999999999999.0; - workspace->x_old[i][0] = workspace->x_old[i][2] = workspace->x_old[i][2] = -999999999.0; - //TODO -- check this one. may be not initializing this properly continue; } diff --git a/PuReMD-GPU/src/box.c b/PuReMD-GPU/src/box.c index e42395c5556042493c0f707879772d84e9d18658..a7911fda49b1dccbbfccf24ae2fa445c608e0d5e 100644 --- a/PuReMD-GPU/src/box.c +++ b/PuReMD-GPU/src/box.c @@ -1,58 +1,189 @@ /*---------------------------------------------------------------------- - PuReMD-GPu - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ #include "box.h" + +#include "tool_box.h" #include "vector.h" -void Init_Box_From_CRYST(real a, real b, real c, - real alpha, real beta, real gamma, +void Make_Consistent( simulation_box* box ) +{ + real one_vol; + + box->volume = + box->box[0][0] * (box->box[1][1] * box->box[2][2] - + box->box[2][1] * box->box[2][1]) + + box->box[0][1] * (box->box[2][0] * box->box[1][2] - + box->box[1][0] * box->box[2][2]) + + box->box[0][2] * (box->box[1][0] * box->box[2][1] - + box->box[2][0] * box->box[1][1]); + + one_vol = 1.0 / box->volume; + + box->box_inv[0][0] = (box->box[1][1] * box->box[2][2] - + box->box[1][2] * box->box[2][1]) * one_vol; + box->box_inv[0][1] = (box->box[0][2] * box->box[2][1] - + box->box[0][1] * box->box[2][2]) * one_vol; + box->box_inv[0][2] = (box->box[0][1] * box->box[1][2] - + box->box[0][2] * box->box[1][1]) * one_vol; + + box->box_inv[1][0] = (box->box[1][2] * box->box[2][0] - + box->box[1][0] * box->box[2][2]) * one_vol; + box->box_inv[1][1] = (box->box[0][0] * box->box[2][2] - + box->box[0][2] * box->box[2][0]) * one_vol; + box->box_inv[1][2] = (box->box[0][2] * box->box[1][0] - + box->box[0][0] * box->box[1][2]) * one_vol; + + box->box_inv[2][0] = (box->box[1][0] * box->box[2][1] - + box->box[1][1] * box->box[2][0]) * one_vol; + box->box_inv[2][1] = (box->box[0][1] * box->box[2][0] - + box->box[0][0] * box->box[2][1]) * one_vol; + box->box_inv[2][2] = (box->box[0][0] * box->box[1][1] - + box->box[0][1] * box->box[1][0]) * one_vol; + + box->box_norms[0] = SQRT( SQR(box->box[0][0]) + + SQR(box->box[0][1]) + + SQR(box->box[0][2]) ); + box->box_norms[1] = SQRT( SQR(box->box[1][0]) + + SQR(box->box[1][1]) + + SQR(box->box[1][2]) ); + box->box_norms[2] = SQRT( SQR(box->box[2][0]) + + SQR(box->box[2][1]) + + SQR(box->box[2][2]) ); + + box->trans[0][0] = box->box[0][0] / box->box_norms[0]; + box->trans[0][1] = box->box[1][0] / box->box_norms[0]; + box->trans[0][2] = box->box[2][0] / box->box_norms[0]; + + box->trans[1][0] = box->box[0][1] / box->box_norms[1]; + box->trans[1][1] = box->box[1][1] / box->box_norms[1]; + box->trans[1][2] = box->box[2][1] / box->box_norms[1]; + + box->trans[2][0] = box->box[0][2] / box->box_norms[2]; + box->trans[2][1] = box->box[1][2] / box->box_norms[2]; + box->trans[2][2] = box->box[2][2] / box->box_norms[2]; + + one_vol = box->box_norms[0] * box->box_norms[1] * box->box_norms[2] * one_vol; + + box->trans_inv[0][0] = (box->trans[1][1] * box->trans[2][2] - + box->trans[1][2] * box->trans[2][1]) * one_vol; + box->trans_inv[0][1] = (box->trans[0][2] * box->trans[2][1] - + box->trans[0][1] * box->trans[2][2]) * one_vol; + box->trans_inv[0][2] = (box->trans[0][1] * box->trans[1][2] - + box->trans[0][2] * box->trans[1][1]) * one_vol; + + box->trans_inv[1][0] = (box->trans[1][2] * box->trans[2][0] - + box->trans[1][0] * box->trans[2][2]) * one_vol; + box->trans_inv[1][1] = (box->trans[0][0] * box->trans[2][2] - + box->trans[0][2] * box->trans[2][0]) * one_vol; + box->trans_inv[1][2] = (box->trans[0][2] * box->trans[1][0] - + box->trans[0][0] * box->trans[1][2]) * one_vol; + + box->trans_inv[2][0] = (box->trans[1][0] * box->trans[2][1] - + box->trans[1][1] * box->trans[2][0]) * one_vol; + box->trans_inv[2][1] = (box->trans[0][1] * box->trans[2][0] - + box->trans[0][0] * box->trans[2][1]) * one_vol; + box->trans_inv[2][2] = (box->trans[0][0] * box->trans[1][1] - + box->trans[0][1] * box->trans[1][0]) * one_vol; + +// for (i=0; i < 3; i++) +// { +// for (j=0; j < 3; j++) +// fprintf(stderr,"%lf\t",box->trans[i][j]); +// fprintf(stderr,"\n"); +// } +// fprintf(stderr,"\n"); +// for (i=0; i < 3; i++) +// { +// for (j=0; j < 3; j++) +// fprintf(stderr,"%lf\t",box->trans_inv[i][j]); +// fprintf(stderr,"\n"); +// } + + box->g[0][0] = box->box[0][0] * box->box[0][0] + + box->box[0][1] * box->box[0][1] + + box->box[0][2] * box->box[0][2]; + box->g[1][0] = + box->g[0][1] = box->box[0][0] * box->box[1][0] + + box->box[0][1] * box->box[1][1] + + box->box[0][2] * box->box[1][2]; + box->g[2][0] = + box->g[0][2] = box->box[0][0] * box->box[2][0] + + box->box[0][1] * box->box[2][1] + + box->box[0][2] * box->box[2][2]; + + box->g[1][1] = box->box[1][0] * box->box[1][0] + + box->box[1][1] * box->box[1][1] + + box->box[1][2] * box->box[1][2]; + box->g[1][2] = + box->g[2][1] = box->box[1][0] * box->box[2][0] + + box->box[1][1] * box->box[2][1] + + box->box[1][2] * box->box[2][2]; + + box->g[2][2] = box->box[2][0] * box->box[2][0] + + box->box[2][1] * box->box[2][1] + + box->box[2][2] * box->box[2][2]; + + // These proportions are only used for isotropic_NPT! + box->side_prop[0] = box->box[0][0] / box->box[0][0]; + box->side_prop[1] = box->box[1][1] / box->box[0][0]; + box->side_prop[2] = box->box[2][2] / box->box[0][0]; +} + + +/* setup the simulation box */ +void Setup_Box( real a, real b, real c, real alpha, real beta, real gamma, simulation_box* box ) { double c_alpha, c_beta, c_gamma, s_gamma, zi; - c_alpha = cos(DEG2RAD(alpha)); - c_beta = cos(DEG2RAD(beta)); - c_gamma = cos(DEG2RAD(gamma)); - s_gamma = sin(DEG2RAD(gamma)); + if ( IS_NAN_REAL(a) || IS_NAN_REAL(b) || IS_NAN_REAL(c) + || IS_NAN_REAL(alpha) || IS_NAN_REAL(beta) || IS_NAN_REAL(gamma) ) + { + fprintf( stderr, "Invalid simulation box boundaries for big box (NaN). Terminating...\n" ); + exit( INVALID_INPUT ); + } - zi = (c_alpha - c_beta * c_gamma)/s_gamma; + c_alpha = COS(DEG2RAD(alpha)); + c_beta = COS(DEG2RAD(beta)); + c_gamma = COS(DEG2RAD(gamma)); + s_gamma = SIN(DEG2RAD(gamma)); + zi = (c_alpha - c_beta * c_gamma) / s_gamma; - box->box[0][0] = a; - box->box[0][1] = 0.0; + box->box[0][0] = a; + box->box[0][1] = 0.0; box->box[0][2] = 0.0; - - box->box[1][0] = b * c_gamma; - box->box[1][1] = b * s_gamma; - box->box[1][2] = 0.0; - + box->box[1][0] = b * c_gamma; + box->box[1][1] = b * s_gamma; + box->box[1][2] = 0.0; box->box[2][0] = c * c_beta; box->box[2][1] = c * zi; box->box[2][2] = c * SQRT(1.0 - SQR(c_beta) - SQR(zi)); +#if defined(DEBUG) + fprintf( stderr, "box is %8.2f x %8.2f x %8.2f\n", + box->box[0][0], box->box[1][1], box->box[2][2] ); +#endif Make_Consistent( box ); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "box is %8.2f x %8.2f x %8.2f\n", - box->box[0][0], box->box[1][1], box->box[2][2] ); -#endif } @@ -60,8 +191,8 @@ void Update_Box( rtensor box_tensor, simulation_box* box ) { int i, j; - for (i=0; i < 3; i++) - for (j=0; j < 3; j++) + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) box->box[i][j] = box_tensor[i][j]; Make_Consistent( box ); @@ -70,200 +201,37 @@ void Update_Box( rtensor box_tensor, simulation_box* box ) void Update_Box_Isotropic( simulation_box *box, real mu ) { - /*box->box[0][0] = + /*box->box[0][0] = POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 ); - box->box[1][1] = box->box[0][0] * box->side_prop[1]; - box->box[2][2] = box->box[0][0] * box->side_prop[2]; - */ + box->box[1][1] = box->box[0][0] * box->side_prop[1]; + box->box[2][2] = box->box[0][0] * box->side_prop[2]; + */ rtensor_Copy( box->old_box, box->box ); box->box[0][0] *= mu; box->box[1][1] *= mu; box->box[2][2] *= mu; - box->volume = box->box[0][0]*box->box[1][1]*box->box[2][2]; + box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2]; Make_Consistent(box/*, periodic*/); } void Update_Box_SemiIsotropic( simulation_box *box, rvec mu ) { - /*box->box[0][0] = + /*box->box[0][0] = POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 ); - box->box[1][1] = box->box[0][0] * box->side_prop[1]; - box->box[2][2] = box->box[0][0] * box->side_prop[2]; */ + box->box[1][1] = box->box[0][0] * box->side_prop[1]; + box->box[2][2] = box->box[0][0] * box->side_prop[2]; */ rtensor_Copy( box->old_box, box->box ); box->box[0][0] *= mu[0]; box->box[1][1] *= mu[1]; box->box[2][2] *= mu[2]; - box->volume = box->box[0][0]*box->box[1][1]*box->box[2][2]; + box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2]; Make_Consistent(box); } -void Make_Consistent(simulation_box* box) -{ - real one_vol; - - box->volume = - box->box[0][0] * (box->box[1][1]*box->box[2][2] - - box->box[2][1]*box->box[2][1]) + - box->box[0][1] * (box->box[2][0]*box->box[1][2] - - box->box[1][0]*box->box[2][2]) + - box->box[0][2] * (box->box[1][0]*box->box[2][1] - - box->box[2][0]*box->box[1][1]); - - one_vol = 1.0/box->volume; - - box->box_inv[0][0] = (box->box[1][1]*box->box[2][2] - - box->box[1][2]*box->box[2][1]) * one_vol; - box->box_inv[0][1] = (box->box[0][2]*box->box[2][1] - - box->box[0][1]*box->box[2][2]) * one_vol; - box->box_inv[0][2] = (box->box[0][1]*box->box[1][2] - - box->box[0][2]*box->box[1][1]) * one_vol; - - box->box_inv[1][0] = (box->box[1][2]*box->box[2][0] - - box->box[1][0]*box->box[2][2]) * one_vol; - box->box_inv[1][1] = (box->box[0][0]*box->box[2][2] - - box->box[0][2]*box->box[2][0]) * one_vol; - box->box_inv[1][2] = (box->box[0][2]*box->box[1][0] - - box->box[0][0]*box->box[1][2]) * one_vol; - - box->box_inv[2][0] = (box->box[1][0]*box->box[2][1] - - box->box[1][1]*box->box[2][0]) * one_vol; - box->box_inv[2][1] = (box->box[0][1]*box->box[2][0] - - box->box[0][0]*box->box[2][1]) * one_vol; - box->box_inv[2][2] = (box->box[0][0]*box->box[1][1] - - box->box[0][1]*box->box[1][0]) * one_vol; - - box->box_norms[0] = SQRT( SQR(box->box[0][0]) + - SQR(box->box[0][1]) + - SQR(box->box[0][2]) ); - box->box_norms[1] = SQRT( SQR(box->box[1][0]) + - SQR(box->box[1][1]) + - SQR(box->box[1][2]) ); - box->box_norms[2] = SQRT( SQR(box->box[2][0]) + - SQR(box->box[2][1]) + - SQR(box->box[2][2]) ); - - box->trans[0][0] = box->box[0][0]/box->box_norms[0]; - box->trans[0][1] = box->box[1][0]/box->box_norms[0]; - box->trans[0][2] = box->box[2][0]/box->box_norms[0]; - - box->trans[1][0] = box->box[0][1]/box->box_norms[1]; - box->trans[1][1] = box->box[1][1]/box->box_norms[1]; - box->trans[1][2] = box->box[2][1]/box->box_norms[1]; - - box->trans[2][0] = box->box[0][2]/box->box_norms[2]; - box->trans[2][1] = box->box[1][2]/box->box_norms[2]; - box->trans[2][2] = box->box[2][2]/box->box_norms[2]; - - one_vol = box->box_norms[0]*box->box_norms[1]*box->box_norms[2]*one_vol; - - box->trans_inv[0][0] = (box->trans[1][1]*box->trans[2][2] - - box->trans[1][2]*box->trans[2][1]) * one_vol; - box->trans_inv[0][1] = (box->trans[0][2]*box->trans[2][1] - - box->trans[0][1]*box->trans[2][2]) * one_vol; - box->trans_inv[0][2] = (box->trans[0][1]*box->trans[1][2] - - box->trans[0][2]*box->trans[1][1]) * one_vol; - - box->trans_inv[1][0] = (box->trans[1][2]*box->trans[2][0] - - box->trans[1][0]*box->trans[2][2]) * one_vol; - box->trans_inv[1][1] = (box->trans[0][0]*box->trans[2][2] - - box->trans[0][2]*box->trans[2][0]) * one_vol; - box->trans_inv[1][2] = (box->trans[0][2]*box->trans[1][0] - - box->trans[0][0]*box->trans[1][2]) * one_vol; - - box->trans_inv[2][0] = (box->trans[1][0]*box->trans[2][1] - - box->trans[1][1]*box->trans[2][0]) * one_vol; - box->trans_inv[2][1] = (box->trans[0][1]*box->trans[2][0] - - box->trans[0][0]*box->trans[2][1]) * one_vol; - box->trans_inv[2][2] = (box->trans[0][0]*box->trans[1][1] - - box->trans[0][1]*box->trans[1][0]) * one_vol; - - // for (i=0; i < 3; i++) - // { - // for (j=0; j < 3; j++) - // fprintf(stderr,"%lf\t",box->trans[i][j]); - // fprintf(stderr,"\n"); - // } - // fprintf(stderr,"\n"); - // for (i=0; i < 3; i++) - // { - // for (j=0; j < 3; j++) - // fprintf(stderr,"%lf\t",box->trans_inv[i][j]); - // fprintf(stderr,"\n"); - // } - - - box->g[0][0] = box->box[0][0] * box->box[0][0] + - box->box[0][1] * box->box[0][1] + - box->box[0][2] * box->box[0][2]; - box->g[1][0] = - box->g[0][1] = box->box[0][0] * box->box[1][0] + - box->box[0][1] * box->box[1][1] + - box->box[0][2] * box->box[1][2]; - box->g[2][0] = - box->g[0][2] = box->box[0][0] * box->box[2][0] + - box->box[0][1] * box->box[2][1] + - box->box[0][2] * box->box[2][2]; - - box->g[1][1] = box->box[1][0] * box->box[1][0] + - box->box[1][1] * box->box[1][1] + - box->box[1][2] * box->box[1][2]; - box->g[1][2] = - box->g[2][1] = box->box[1][0] * box->box[2][0] + - box->box[1][1] * box->box[2][1] + - box->box[1][2] * box->box[2][2]; - - box->g[2][2] = box->box[2][0] * box->box[2][0] + - box->box[2][1] * box->box[2][1] + - box->box[2][2] * box->box[2][2]; - - // These proportions are only used for isotropic_NPT! - box->side_prop[0] = box->box[0][0] / box->box[0][0]; - box->side_prop[1] = box->box[1][1] / box->box[0][0]; - box->side_prop[2] = box->box[2][2] / box->box[0][0]; -} - - -void Transform( rvec x1, simulation_box *box, char flag, rvec x2 ) -{ - int i, j; - real tmp; - - // printf(">x1: (%lf, %lf, %lf)\n",x1[0],x1[1],x1[2]); - - if (flag > 0) { - for (i=0; i < 3; i++) { - tmp = 0.0; - for (j=0; j < 3; j++) - tmp += box->trans[i][j]*x1[j]; - x2[i] = tmp; - } - } - else { - for (i=0; i < 3; i++) { - tmp = 0.0; - for (j=0; j < 3; j++) - tmp += box->trans_inv[i][j]*x1[j]; - x2[i] = tmp; - } - } - // printf(">x2: (%lf, %lf, %lf)\n", x2[0], x2[1], x2[2]); -} - - -void Transform_to_UnitBox( rvec x1, simulation_box *box, char flag, rvec x2 ) -{ - Transform( x1, box, flag, x2 ); - - x2[0] /= box->box_norms[0]; - x2[1] /= box->box_norms[1]; - x2[2] /= box->box_norms[2]; -} - - void Distance_on_T3_Gen( rvec x1, rvec x2, simulation_box* box, rvec r ) { rvec xa, xb, ra; @@ -301,12 +269,12 @@ void Inc_on_T3_Gen( rvec x, rvec dx, simulation_box* box ) real Metric_Product( rvec x1, rvec x2, simulation_box* box ) { int i, j; - real dist=0.0, tmp; + real dist = 0.0, tmp; - for( i = 0; i < 3; i++ ) + for ( i = 0; i < 3; i++ ) { tmp = 0.0; - for( j = 0; j < 3; j++ ) + for ( j = 0; j < 3; j++ ) tmp += box->g[i][j] * x2[j]; dist += x1[i] * tmp; } @@ -315,12 +283,59 @@ real Metric_Product( rvec x1, rvec x2, simulation_box* box ) } -/* Determines if the distance between x1 and x2 is < vlist_cut. +int Are_Far_Neighbors( rvec x1, rvec x2, simulation_box *box, + real cutoff, far_neighbor_data *data ) +{ + real norm_sqr, d, tmp; + int i; + + norm_sqr = 0; + + for ( i = 0; i < 3; i++ ) + { + d = x2[i] - x1[i]; + tmp = SQR(d); + + if ( tmp >= SQR( box->box_norms[i] / 2.0 ) ) + { + if ( x2[i] > x1[i] ) + { + d -= box->box_norms[i]; + data->rel_box[i] = -1; + } + else + { + d += box->box_norms[i]; + data->rel_box[i] = +1; + } + + data->dvec[i] = d; + norm_sqr += SQR(d); + } + else + { + data->dvec[i] = d; + norm_sqr += tmp; + data->rel_box[i] = 0; + } + } + + if ( norm_sqr <= SQR(cutoff) ) + { + data->d = sqrt(norm_sqr); + return TRUE; + } + + return FALSE; +} + + +/* Determines if the distance between x1 and x2 is < vlist_cut. If so, this neighborhood is added to the list of far neighbors. Periodic boundary conditions do not apply. */ -void Get_NonPeriodic_Far_Neighbors( rvec x1, rvec x2, simulation_box *box, - control_params *control, - far_neighbor_data *new_nbrs, int *count ) +void Get_NonPeriodic_Far_Neighbors( rvec x1, rvec x2, simulation_box *box, + control_params *control, + far_neighbor_data *new_nbrs, int *count ) { real norm_sqr; @@ -328,7 +343,8 @@ void Get_NonPeriodic_Far_Neighbors( rvec x1, rvec x2, simulation_box *box, norm_sqr = rvec_Norm_Sqr( new_nbrs[0].dvec ); - if( norm_sqr <= SQR( control->vlist_cut ) ) { + if ( norm_sqr <= SQR( control->vlist_cut ) ) + { *count = 1; new_nbrs[0].d = SQRT( norm_sqr ); @@ -341,11 +357,11 @@ void Get_NonPeriodic_Far_Neighbors( rvec x1, rvec x2, simulation_box *box, /* Finds periodic neighbors in a 'big_box'. Here 'big_box' means: the current simulation box has all dimensions > 2 *vlist_cut. - If the periodic distance between x1 and x2 is than vlist_cut, this + If the periodic distance between x1 and x2 is than vlist_cut, this neighborhood is added to the list of far neighbors. */ -void Get_Periodic_Far_Neighbors_Big_Box( rvec x1, rvec x2, simulation_box *box, - control_params *control, - far_neighbor_data *periodic_nbrs, +void Get_Periodic_Far_Neighbors_Big_Box( rvec x1, rvec x2, simulation_box *box, + control_params *control, + far_neighbor_data *periodic_nbrs, int *count ) { real norm_sqr, d, tmp; @@ -353,19 +369,23 @@ void Get_Periodic_Far_Neighbors_Big_Box( rvec x1, rvec x2, simulation_box *box, norm_sqr = 0; - for( i = 0; i < 3; i++ ) { + for ( i = 0; i < 3; i++ ) + { d = x2[i] - x1[i]; tmp = SQR(d); // fprintf(out,"Inside Sq_Distance_on_T3, %d, %lf, %lf\n", // i,tmp,SQR(box->box_norms[i]/2.0)); - if( tmp >= SQR( box->box_norms[i] / 2.0 ) ) { - if( x2[i] > x1[i] ) { + if ( tmp >= SQR( box->box_norms[i] / 2.0 ) ) + { + if ( x2[i] > x1[i] ) + { d -= box->box_norms[i]; periodic_nbrs[0].rel_box[i] = -1; // periodic_nbrs[0].ext_factor[i] = +1; } - else { + else + { d += box->box_norms[i]; periodic_nbrs[0].rel_box[i] = +1; // periodic_nbrs[0].ext_factor[i] = -1; @@ -374,15 +394,17 @@ void Get_Periodic_Far_Neighbors_Big_Box( rvec x1, rvec x2, simulation_box *box, periodic_nbrs[0].dvec[i] = d; norm_sqr += SQR(d); } - else { + else + { periodic_nbrs[0].dvec[i] = d; norm_sqr += tmp; periodic_nbrs[0].rel_box[i] = 0; // periodic_nbrs[0].ext_factor[i] = 0; - } + } } - if( norm_sqr <= SQR( control->vlist_cut ) ) { + if ( norm_sqr <= SQR( control->vlist_cut ) ) + { *count = 1; periodic_nbrs[0].d = SQRT( norm_sqr ); } @@ -390,16 +412,16 @@ void Get_Periodic_Far_Neighbors_Big_Box( rvec x1, rvec x2, simulation_box *box, } -/* Finds all periodic far neighborhoods between x1 and x2 +/* Finds all periodic far neighborhoods between x1 and x2 ((dist(x1, x2') < vlist_cut, periodic images of x2 are also considered). Here the box is 'small' meaning that at least one dimension is < 2*vlist_cut. -IMPORTANT: This part might need some improvement. In NPT, the simulation box -might get too small (such as <5 A!). In this case we have to consider the -periodic images of x2 that are two boxs away!!! - */ + IMPORTANT: This part might need some improvement. In NPT, the simulation box + might get too small (such as <5 A!). In this case we have to consider the + periodic images of x2 that are two boxs away!!! +*/ void Get_Periodic_Far_Neighbors_Small_Box( rvec x1, rvec x2, simulation_box *box, - control_params *control, - far_neighbor_data *periodic_nbrs, + control_params *control, + far_neighbor_data *periodic_nbrs, int *count ) { int i, j, k; @@ -418,14 +440,18 @@ void Get_Periodic_Far_Neighbors_Small_Box( rvec x1, rvec x2, simulation_box *box imax, jmax, kmax ); */ - for( i = -imax; i <= imax; ++i ) - if(fabs(d_i=((x2[0]+i*box->box_norms[0])-x1[0]))<=control->vlist_cut) { - for( j = -jmax; j <= jmax; ++j ) - if(fabs(d_j=((x2[1]+j*box->box_norms[1])-x1[1]))<=control->vlist_cut) { - for( k = -kmax; k <= kmax; ++k ) - if(fabs(d_k=((x2[2]+k*box->box_norms[2])-x1[2]))<=control->vlist_cut) { + for ( i = -imax; i <= imax; ++i ) + if (fabs(d_i = ((x2[0] + i * box->box_norms[0]) - x1[0])) <= control->vlist_cut) + { + for ( j = -jmax; j <= jmax; ++j ) + if (fabs(d_j = ((x2[1] + j * box->box_norms[1]) - x1[1])) <= control->vlist_cut) + { + for ( k = -kmax; k <= kmax; ++k ) + if (fabs(d_k = ((x2[2] + k * box->box_norms[2]) - x1[2])) <= control->vlist_cut) + { sqr_norm = SQR(d_i) + SQR(d_j) + SQR(d_k); - if( sqr_norm <= SQR(control->vlist_cut) ) { + if ( sqr_norm <= SQR(control->vlist_cut) ) + { periodic_nbrs[ *count ].d = SQRT( sqr_norm ); periodic_nbrs[ *count ].dvec[0] = d_i; @@ -466,21 +492,21 @@ void Get_Periodic_Far_Neighbors_Small_Box( rvec x1, rvec x2, simulation_box *box /* Returns the mapping for the neighbor box pointed by (ix,iy,iz) */ /*int Get_Nbr_Box( simulation_box *box, int ix, int iy, int iz ) - { - return (9 * ix + 3 * iy + iz + 13); -// 13 is to handle negative indexes properly +{ + return (9 * ix + 3 * iy + iz + 13); + // 13 is to handle negative indexes properly }*/ /* Returns total pressure vector for the neighbor box pointed by (ix,iy,iz) */ /*rvec Get_Nbr_Box_Press( simulation_box *box, int ix, int iy, int iz ) - { +{ int map; - map = 9 * ix + 3 * iy + iz + 13; -// 13 is to adjust -1,-1,-1 correspond to index 0 + map = 9 * ix + 3 * iy + iz + 13; + // 13 is to adjust -1,-1,-1 correspond to index 0 -return box->nbr_box_press[map]; + return box->nbr_box_press[map]; }*/ @@ -489,53 +515,53 @@ return box->nbr_box_press[map]; { int map; - map = 9 * ix + 3 * iy + iz + 13; -// 13 is to adjust -1,-1,-1 correspond to index 0 + map = 9 * ix + 3 * iy + iz + 13; + // 13 is to adjust -1,-1,-1 correspond to index 0 -rvec_Add( box->nbr_box_press[map], v ); + rvec_Add( box->nbr_box_press[map], v ); }*/ /* Increments the total pressure vector for the neighbor box mapped to 'map' */ /*void Inc_Nbr_Box_Press( simulation_box *box, int map, rvec v ) - { +{ rvec_Add( box->nbr_box_press[map], v ); - }*/ +}*/ -void Print_Box_Information( simulation_box* box, FILE *out ) +void Print_Box( simulation_box* box, FILE *out ) { int i, j; fprintf( out, "box: {" ); - for( i = 0; i < 3; ++i ) + for ( i = 0; i < 3; ++i ) { fprintf( out, "{" ); - for( j = 0; j < 3; ++j ) + for ( j = 0; j < 3; ++j ) fprintf( out, "%8.3f ", box->box[i][j] ); fprintf( out, "}" ); } fprintf( out, "}\n" ); - fprintf( out, "V: %8.3f\tdims: {%8.3f, %8.3f, %8.3f}\n", - box->volume, - box->box_norms[0], box->box_norms[1], box->box_norms[2] ); + fprintf( out, "V: %8.3f\tdims: {%8.3f, %8.3f, %8.3f}\n", + box->volume, + box->box_norms[0], box->box_norms[1], box->box_norms[2] ); fprintf( out, "box_trans: {" ); - for( i = 0; i < 3; ++i ) + for ( i = 0; i < 3; ++i ) { fprintf( out, "{" ); - for( j = 0; j < 3; ++j ) + for ( j = 0; j < 3; ++j ) fprintf( out, "%8.3f ", box->trans[i][j] ); fprintf( out, "}" ); } fprintf( out, "}\n" ); fprintf( out, "box_trinv: {" ); - for( i = 0; i < 3; ++i ) + for ( i = 0; i < 3; ++i ) { fprintf( out, "{" ); - for( j = 0; j < 3; ++j ) + for ( j = 0; j < 3; ++j ) fprintf( out, "%8.3f ", box->trans_inv[i][j] ); fprintf( out, "}" ); } diff --git a/PuReMD-GPU/src/box.h b/PuReMD-GPU/src/box.h index 418aa6208a81fb05ee56ff09afc6ff76751f75c9..84f8371becafbc86180e75b7b8a189c299c82493 100644 --- a/PuReMD-GPU/src/box.h +++ b/PuReMD-GPU/src/box.h @@ -25,9 +25,7 @@ #include "mytypes.h" -/* Initializes box from CRYST1 line of PDB */ -void Init_Box_From_CRYST(real, real, real, real, real, real, - simulation_box*/*, int*/); +void Setup_Box( real, real, real, real, real, real, simulation_box* ); /* Initializes box from box rtensor */ void Update_Box(rtensor, simulation_box* /*, int*/); @@ -36,14 +34,9 @@ void Update_Box_SemiIsotropic( simulation_box*, rvec /*, int*/ ); /* Computes all the transformations, metric and other quantities from box rtensor */ -void Make_Consistent(simulation_box*/*, int*/ ); - -/* Applies transformation to and from - Cartesian to Triclinic coordinates based on flag */ -/* Use -1 flag for Cartesian -> Triclinic and +1 for otherway */ -void Transform( rvec, simulation_box*, char, rvec ); -void Transform_to_UnitBox( rvec, simulation_box*, char, rvec ); +void Make_Consistent( simulation_box* ); +int Are_Far_Neighbors( rvec, rvec, simulation_box*, real, far_neighbor_data* ); void Get_NonPeriodic_Far_Neighbors( rvec, rvec, simulation_box*, control_params*, far_neighbor_data*, int* ); void Get_Periodic_Far_Neighbors_Big_Box( rvec, rvec, simulation_box*, @@ -59,10 +52,6 @@ void Inc_Nbr_Box_Press( simulation_box*, int, int, int, rvec );*/ /* These functions assume that the coordinates are in triclinic system */ /* this function returns cartesian norm but triclinic distance vector */ -real Metric_Product( rvec, rvec, simulation_box* ); - -void Print_Box_Information( simulation_box*, FILE* ); - static inline HOST_DEVICE real Sq_Distance_on_T3( rvec x1, rvec x2, simulation_box* box, rvec r) { @@ -113,5 +102,9 @@ static inline HOST_DEVICE void Inc_on_T3( rvec x, rvec dx, simulation_box *box ) } } +real Metric_Product( rvec, rvec, simulation_box* ); + +void Print_Box( simulation_box*, FILE* ); + #endif diff --git a/PuReMD-GPU/src/control.c b/PuReMD-GPU/src/control.c new file mode 100644 index 0000000000000000000000000000000000000000..41f744969f1615ba621d85db852d998e92719b86 --- /dev/null +++ b/PuReMD-GPU/src/control.c @@ -0,0 +1,560 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include <ctype.h> + +#include "control.h" +#include "traj.h" +#include "tool_box.h" + + +char Read_Control_File( FILE* fp, reax_system *system, control_params* control, + output_controls *out_control ) +{ + char *s, **tmp; + int c, i; + real val; + int ival; + + /* assign default values */ + strcpy( control->sim_name, "default.sim" ); + + control->restart = 0; + out_control->restart_format = WRITE_BINARY; + out_control->restart_freq = 0; + strcpy( control->restart_from, "default.res" ); + out_control->restart_freq = 0; + control->random_vel = 0; + + control->reposition_atoms = 0; + + control->ensemble = NVE; + control->nsteps = 0; + control->dt = 0.25; + + control->geo_format = PDB; + control->restrict_bonds = 0; + + control->periodic_boundaries = 1; + control->periodic_images[0] = 0; + control->periodic_images[1] = 0; + control->periodic_images[2] = 0; + + control->reneighbor = 1; + control->vlist_cut = 0; + control->nbr_cut = 4.; + control->r_cut = 10.; + control->r_sp_cut = 10.; + control->max_far_nbrs = 1000; + control->bo_cut = 0.01; + control->thb_cut = 0.001; + control->hb_cut = 7.50; + + control->tabulate = 0; + + control->qeq_solver_type = GMRES_S; + control->qeq_solver_q_err = 0.000001; + control->qeq_domain_sparsify_enabled = FALSE; + control->qeq_domain_sparsity = 1.0; + control->pre_comp_type = ICHOLT_PC; + control->pre_comp_sweeps = 3; + control->pre_comp_refactor = 100; + control->pre_comp_droptol = 0.01; + control->pre_app_type = TRI_SOLVE_PA; + control->pre_app_jacobi_iters = 50; + + control->T_init = 0.; + control->T_final = 300.; + control->Tau_T = 1.0; + control->T_mode = 0.; + control->T_rate = 1.; + control->T_freq = 1.; + + control->P[0] = 0.000101325; + control->P[1] = 0.000101325; + control->P[2] = 0.000101325; + control->Tau_P[0] = 500.0; + control->Tau_P[1] = 500.0; + control->Tau_P[2] = 500.0; + control->Tau_PT = 500.0; + control->compressibility = 1.0; + control->press_mode = 0; + + control->remove_CoM_vel = 25; + + out_control->debug_level = 0; + out_control->energy_update_freq = 10; + + out_control->write_steps = 100; + out_control->traj_compress = 0; + out_control->write = fprintf; + out_control->traj_format = 0; + out_control->write_header = + (int (*)( reax_system*, control_params*, + static_storage*, void* )) Write_Custom_Header; + out_control->append_traj_frame = + (int (*)( reax_system*, control_params*, simulation_data*, + static_storage*, list **, void* )) Append_Custom_Frame; + + strcpy( out_control->traj_title, "default_title" ); + out_control->atom_format = 0; + out_control->bond_info = 0; + out_control->angle_info = 0; + + control->molec_anal = NO_ANALYSIS; + control->freq_molec_anal = 0; + control->bg_cut = 0.3; + control->num_ignored = 0; + memset( control->ignore, 0, sizeof(int)*MAX_ATOM_TYPES ); + + control->dipole_anal = 0; + control->freq_dipole_anal = 0; + + control->diffusion_coef = 0; + control->freq_diffusion_coef = 0; + control->restrict_type = 0; + + /* memory allocations */ + s = (char*) malloc(sizeof(char) * MAX_LINE); + tmp = (char**) malloc(sizeof(char*)*MAX_TOKENS); + for (i = 0; i < MAX_TOKENS; i++) + tmp[i] = (char*) malloc(sizeof(char) * MAX_LINE); + + /* read control parameters file */ + while (fgets(s, MAX_LINE, fp)) + { + c = Tokenize(s, &tmp); + + if ( strcmp(tmp[0], "simulation_name") == 0 ) + { + strcpy( control->sim_name, tmp[1] ); + } + //else if( strcmp(tmp[0], "restart") == 0 ) { + // ival = atoi(tmp[1]); + // control->restart = ival; + //} + else if ( strcmp(tmp[0], "restart_format") == 0 ) + { + ival = atoi(tmp[1]); + out_control->restart_format = ival; + } + else if ( strcmp(tmp[0], "restart_freq") == 0 ) + { + ival = atoi(tmp[1]); + out_control->restart_freq = ival; + } + else if ( strcmp(tmp[0], "random_vel") == 0 ) + { + ival = atoi(tmp[1]); + control->random_vel = ival; + } + else if ( strcmp(tmp[0], "reposition_atoms") == 0 ) + { + ival = atoi(tmp[1]); + control->reposition_atoms = ival; + } + else if ( strcmp(tmp[0], "ensemble_type") == 0 ) + { + ival = atoi(tmp[1]); + control->ensemble = ival; + } + else if ( strcmp(tmp[0], "nsteps") == 0 ) + { + ival = atoi(tmp[1]); + control->nsteps = ival; + } + else if ( strcmp(tmp[0], "dt") == 0 ) + { + val = atof(tmp[1]); + control->dt = val * 1.e-3; // convert dt from fs to ps! + } + else if ( strcmp(tmp[0], "periodic_boundaries") == 0 ) + { + ival = atoi( tmp[1] ); + control->periodic_boundaries = ival; + } + else if ( strcmp(tmp[0], "periodic_images") == 0 ) + { + ival = atoi(tmp[1]); + control->periodic_images[0] = ival; + ival = atoi(tmp[2]); + control->periodic_images[1] = ival; + ival = atoi(tmp[3]); + control->periodic_images[2] = ival; + } + else if ( strcmp(tmp[0], "geo_format") == 0 ) + { + ival = atoi( tmp[1] ); + control->geo_format = ival; + } + else if ( strcmp(tmp[0], "restrict_bonds") == 0 ) + { + ival = atoi( tmp[1] ); + control->restrict_bonds = ival; + } + else if ( strcmp(tmp[0], "tabulate_long_range") == 0 ) + { + ival = atoi( tmp[1] ); + control->tabulate = ival; + } + else if ( strcmp(tmp[0], "reneighbor") == 0 ) + { + ival = atoi( tmp[1] ); + control->reneighbor = ival; + } + else if ( strcmp(tmp[0], "vlist_buffer") == 0 ) + { + val = atof(tmp[1]); + control->vlist_cut = val; + } + else if ( strcmp(tmp[0], "nbrhood_cutoff") == 0 ) + { + val = atof(tmp[1]); + control->nbr_cut = val; + } + else if ( strcmp(tmp[0], "thb_cutoff") == 0 ) + { + val = atof(tmp[1]); + control->thb_cut = val; + } + else if ( strcmp(tmp[0], "hbond_cutoff") == 0 ) + { + val = atof( tmp[1] ); + control->hb_cut = val; + } + else if ( strcmp(tmp[0], "qeq_solver_type") == 0 ) + { + ival = atoi( tmp[1] ); + control->qeq_solver_type = ival; + } + else if ( strcmp(tmp[0], "qeq_solver_q_err") == 0 ) + { + val = atof( tmp[1] ); + control->qeq_solver_q_err = val; + } + else if ( strcmp(tmp[0], "qeq_domain_sparsity") == 0 ) + { + val = atof( tmp[1] ); + control->qeq_domain_sparsity = val; + control->qeq_domain_sparsify_enabled = TRUE; + } + else if ( strcmp(tmp[0], "pre_comp_type") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_comp_type = ival; + } + else if ( strcmp(tmp[0], "pre_comp_refactor") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_comp_refactor = ival; + } + else if ( strcmp(tmp[0], "pre_comp_droptol") == 0 ) + { + val = atof( tmp[1] ); + control->pre_comp_droptol = val; + } + else if ( strcmp(tmp[0], "pre_comp_sweeps") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_comp_sweeps = ival; + } + else if ( strcmp(tmp[0], "pre_app_type") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_app_type = ival; + } + else if ( strcmp(tmp[0], "pre_app_jacobi_iters") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_app_jacobi_iters = ival; + } + else if ( strcmp(tmp[0], "temp_init") == 0 ) + { + val = atof(tmp[1]); + control->T_init = val; + + if ( control->T_init < 0.001 ) + control->T_init = 0.001; + } + else if ( strcmp(tmp[0], "temp_final") == 0 ) + { + val = atof(tmp[1]); + control->T_final = val; + + if ( control->T_final < 0.1 ) + control->T_final = 0.1; + } + else if ( strcmp(tmp[0], "t_mass") == 0 ) + { + val = atof(tmp[1]); + control->Tau_T = val * 1.e-3; // convert t_mass from fs to ps + } + else if ( strcmp(tmp[0], "t_mode") == 0 ) + { + ival = atoi(tmp[1]); + control->T_mode = ival; + } + else if ( strcmp(tmp[0], "t_rate") == 0 ) + { + val = atof(tmp[1]); + control->T_rate = val; + } + else if ( strcmp(tmp[0], "t_freq") == 0 ) + { + val = atof(tmp[1]); + control->T_freq = val; + } + else if ( strcmp(tmp[0], "pressure") == 0 ) + { + if ( control->ensemble == iNPT ) + { + val = atof(tmp[1]); + control->P[0] = control->P[1] = control->P[2] = val; + } + else if ( control->ensemble == sNPT ) + { + val = atof(tmp[1]); + control->P[0] = val; + + val = atof(tmp[2]); + control->P[1] = val; + + val = atof(tmp[3]); + control->P[2] = val; + } + } + else if ( strcmp(tmp[0], "p_mass") == 0 ) + { + if ( control->ensemble == iNPT ) + { + val = atof(tmp[1]); + control->Tau_P[0] = val * 1.e-3; // convert p_mass from fs to ps + } + else if ( control->ensemble == sNPT ) + { + val = atof(tmp[1]); + control->Tau_P[0] = val * 1.e-3; // convert p_mass from fs to ps + + val = atof(tmp[2]); + control->Tau_P[1] = val * 1.e-3; // convert p_mass from fs to ps + + val = atof(tmp[3]); + control->Tau_P[2] = val * 1.e-3; // convert p_mass from fs to ps + } + } + else if ( strcmp(tmp[0], "pt_mass") == 0 ) + { + val = atof(tmp[1]); + control->Tau_PT = val * 1.e-3; // convert pt_mass from fs to ps + } + else if ( strcmp(tmp[0], "compress") == 0 ) + { + val = atof(tmp[1]); + control->compressibility = val; + } + else if ( strcmp(tmp[0], "press_mode") == 0 ) + { + val = atoi(tmp[1]); + control->press_mode = val; + } + else if ( strcmp(tmp[0], "remove_CoM_vel") == 0 ) + { + val = atoi(tmp[1]); + control->remove_CoM_vel = val; + } + else if ( strcmp(tmp[0], "debug_level") == 0 ) + { + ival = atoi(tmp[1]); + out_control->debug_level = ival; + } + else if ( strcmp(tmp[0], "energy_update_freq") == 0 ) + { + ival = atoi(tmp[1]); + out_control->energy_update_freq = ival; + } + else if ( strcmp(tmp[0], "write_freq") == 0 ) + { + ival = atoi(tmp[1]); + out_control->write_steps = ival; + } + else if ( strcmp(tmp[0], "traj_compress") == 0 ) + { + ival = atoi(tmp[1]); + out_control->traj_compress = ival; + + if ( out_control->traj_compress ) + out_control->write = (int (*)(FILE *, const char *, ...)) gzprintf; + else out_control->write = fprintf; + } + else if ( strcmp(tmp[0], "traj_format") == 0 ) + { + ival = atoi(tmp[1]); + out_control->traj_format = ival; + + if ( out_control->traj_format == 0 ) + { + out_control->write_header = + (int (*)( reax_system*, control_params*, + static_storage*, void* )) Write_Custom_Header; + out_control->append_traj_frame = + (int (*)(reax_system*, control_params*, simulation_data*, + static_storage*, list **, void*)) Append_Custom_Frame; + } + else if ( out_control->traj_format == 1 ) + { + out_control->write_header = + (int (*)( reax_system*, control_params*, + static_storage*, void* )) Write_xyz_Header; + out_control->append_traj_frame = + (int (*)( reax_system*, control_params*, simulation_data*, + static_storage*, list **, void* )) Append_xyz_Frame; + } + } + else if ( strcmp(tmp[0], "traj_title") == 0 ) + { + strcpy( out_control->traj_title, tmp[1] ); + } + else if ( strcmp(tmp[0], "atom_info") == 0 ) + { + ival = atoi(tmp[1]); + out_control->atom_format += ival * 4; + } + else if ( strcmp(tmp[0], "atom_velocities") == 0 ) + { + ival = atoi(tmp[1]); + out_control->atom_format += ival * 2; + } + else if ( strcmp(tmp[0], "atom_forces") == 0 ) + { + ival = atoi(tmp[1]); + out_control->atom_format += ival * 1; + } + else if ( strcmp(tmp[0], "bond_info") == 0 ) + { + ival = atoi(tmp[1]); + out_control->bond_info = ival; + } + else if ( strcmp(tmp[0], "angle_info") == 0 ) + { + ival = atoi(tmp[1]); + out_control->angle_info = ival; + } + else if ( strcmp(tmp[0], "test_forces") == 0 ) + { + ival = atoi(tmp[1]); + } + else if ( strcmp(tmp[0], "molec_anal") == 0 ) + { + ival = atoi(tmp[1]); + control->molec_anal = ival; + } + else if ( strcmp(tmp[0], "freq_molec_anal") == 0 ) + { + ival = atoi(tmp[1]); + control->freq_molec_anal = ival; + } + else if ( strcmp(tmp[0], "bond_graph_cutoff") == 0 ) + { + val = atof(tmp[1]); + control->bg_cut = val; + } + else if ( strcmp(tmp[0], "ignore") == 0 ) + { + control->num_ignored = atoi(tmp[1]); + for ( i = 0; i < control->num_ignored; ++i ) + control->ignore[atoi(tmp[i + 2])] = 1; + } + else if ( strcmp(tmp[0], "dipole_anal") == 0 ) + { + ival = atoi(tmp[1]); + control->dipole_anal = ival; + } + else if ( strcmp(tmp[0], "freq_dipole_anal") == 0 ) + { + ival = atoi(tmp[1]); + control->freq_dipole_anal = ival; + } + else if ( strcmp(tmp[0], "diffusion_coef") == 0 ) + { + ival = atoi(tmp[1]); + control->diffusion_coef = ival; + } + else if ( strcmp(tmp[0], "freq_diffusion_coef") == 0 ) + { + ival = atoi(tmp[1]); + control->freq_diffusion_coef = ival; + } + else if ( strcmp(tmp[0], "restrict_type") == 0 ) + { + ival = atoi(tmp[1]); + control->restrict_type = ival; + } + else + { + fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] ); + exit( UNKNOWN_OPTION ); + } + } + + if (ferror(fp)) + { + fprintf(stderr, "Error reading control file. Terminating.\n"); + exit( INVALID_INPUT ); + } + + /* determine target T */ + if ( control->T_mode == 0 ) + control->T = control->T_final; + else control->T = control->T_init; + + + /* near neighbor and far neighbor cutoffs */ + control->bo_cut = 0.01 * system->reaxprm.gp.l[29]; + control->r_low = system->reaxprm.gp.l[11]; + control->r_cut = system->reaxprm.gp.l[12]; + control->r_sp_cut = control->r_cut * control->qeq_domain_sparsity; + control->vlist_cut += control->r_cut; + + system->g.cell_size = control->vlist_cut / 2.; + for ( i = 0; i < 3; ++i ) + { + system->g.spread[i] = 2; + } + + /* free memory allocations at the top */ + for ( i = 0; i < MAX_TOKENS; i++ ) + { + free( tmp[i] ); + } + free( tmp ); + free( s ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, + "en=%d steps=%d dt=%.5f opt=%d T=%.5f P=%.5f %.5f %.5f\n", + control->ensemble, control->nsteps, control->dt, control->tabulate, + control->T, control->P[0], control->P[1], control->P[2] ); + + fprintf(stderr, "control file read\n" ); +#endif + + return SUCCESS; +} diff --git a/PuReMD-GPU/src/control.h b/PuReMD-GPU/src/control.h new file mode 100644 index 0000000000000000000000000000000000000000..66d0dde7b4901d7a7b42512414328a8e6b256d83 --- /dev/null +++ b/PuReMD-GPU/src/control.h @@ -0,0 +1,29 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __CONTROL_H_ +#define __CONTROL_H_ + +#include "mytypes.h" + +char Read_Control_File( FILE*, reax_system*, control_params*, output_controls* ); + +#endif diff --git a/PuReMD-GPU/src/cuda_forces.cu b/PuReMD-GPU/src/cuda_forces.cu index bf277b391ce0df0c5336ea0a0653b6863ca14fec..754668c9d9be6601aa9e9a649eb920899e39833c 100644 --- a/PuReMD-GPU/src/cuda_forces.cu +++ b/PuReMD-GPU/src/cuda_forces.cu @@ -36,10 +36,10 @@ #include "cuda_three_body_interactions.h" #include "cuda_four_body_interactions.h" #include "cuda_list.h" -#include "cuda_QEq.h" +#include "cuda_qeq.h" #include "cuda_reduction.h" #include "cuda_system_props.h" -#include "validation.h" +#include "cuda_validation.h" #include "cudaProfiler.h" diff --git a/PuReMD-GPU/src/cuda_init_md.cu b/PuReMD-GPU/src/cuda_init_md.cu index 1a205506e4c5ff767e02398a3859f838818c1e1a..f0252a2f564a035d7317ea0698712318ee96df5e 100644 --- a/PuReMD-GPU/src/cuda_init_md.cu +++ b/PuReMD-GPU/src/cuda_init_md.cu @@ -49,7 +49,7 @@ #include "cuda_reduction.h" #include "cuda_reset_utils.h" #include "cuda_system_props.h" -#include "validation.h" +#include "cuda_validation.h" void Cuda_Init_System( reax_system *system, control_params *control, @@ -116,7 +116,9 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, Reset_Simulation_Data( data ); if( !control->restart ) + { data->step = data->prev_steps = 0; + } switch( control->ensemble ) { case NVE: @@ -124,16 +126,18 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, *Evolve = Cuda_Velocity_Verlet_NVE; break; - case NVT: data->N_f = 3 * system->N + 1; //control->Tau_T = 100 * data->N_f * K_B * control->T_final; - if( !control->restart || (control->restart && control->random_vel) ) { + + if( !control->restart || (control->restart && control->random_vel) ) + { data->therm.G_xi = control->Tau_T * (2.0 * data->E_Kin - data->N_f * K_B * control->T ); data->therm.v_xi = data->therm.G_xi * control->dt; data->therm.v_xi_old = 0; data->therm.xi = 0; + #if defined(DEBUG_FOCUS) fprintf( stderr, "init_md: G_xi=%f Tau_T=%f E_kin=%f N_f=%f v_xi=%f\n", data->therm.G_xi, control->Tau_T, data->E_Kin, @@ -144,12 +148,13 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, *Evolve = Cuda_Velocity_Verlet_Nose_Hoover_NVT_Klein; break; - case NPT: // Anisotropic NPT fprintf( stderr, "THIS OPTION IS NOT YET IMPLEMENTED! TERMINATING...\n" ); exit( UNKNOWN_OPTION ); data->N_f = 3 * system->N + 9; - if( !control->restart ) { + + if( !control->restart ) + { data->therm.G_xi = control->Tau_T * (2.0 * data->E_Kin - data->N_f * K_B * control->T ); data->therm.v_xi = data->therm.G_xi * control->dt; @@ -160,7 +165,6 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, *Evolve = Velocity_Verlet_Berendsen_Isotropic_NPT; break; - case sNPT: // Semi-Isotropic NPT fprintf( stderr, "THIS OPTION IS NOT YET IMPLEMENTED! TERMINATING...\n" ); exit( UNKNOWN_OPTION ); @@ -168,7 +172,6 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, *Evolve = Velocity_Verlet_Berendsen_SemiIsotropic_NPT; break; - case iNPT: // Isotropic NPT fprintf( stderr, "THIS OPTION IS NOT YET IMPLEMENTED! TERMINATING...\n" ); exit( UNKNOWN_OPTION ); diff --git a/PuReMD-GPU/src/cuda_integrate.cu b/PuReMD-GPU/src/cuda_integrate.cu index cba0b79c39b4f9b66e5b506d11dcffb81adc488d..ab4d203139e4a8235d9035ab566fbdca7ded5c82 100644 --- a/PuReMD-GPU/src/cuda_integrate.cu +++ b/PuReMD-GPU/src/cuda_integrate.cu @@ -36,10 +36,10 @@ #include "cuda_forces.h" #include "cuda_grid.h" #include "cuda_neighbors.h" -#include "cuda_QEq.h" +#include "cuda_qeq.h" #include "cuda_reset_utils.h" #include "cuda_system_props.h" -#include "validation.h" +#include "cuda_validation.h" GLOBAL void Cuda_Velocity_Verlet_NVE_atoms1 (reax_atom *atoms, diff --git a/PuReMD-GPU/src/cuda_neighbors.cu b/PuReMD-GPU/src/cuda_neighbors.cu index 876b6b9913e4d825e0cc8be5a2fc1d092c56d9f8..5cfe03dea5f5c314a7dfb60ef3d4df2e6c8f61ae 100644 --- a/PuReMD-GPU/src/cuda_neighbors.cu +++ b/PuReMD-GPU/src/cuda_neighbors.cu @@ -265,11 +265,10 @@ GLOBAL void k_Generate_Neighbor_Lists ( reax_atom *sys_atoms, nbr_atoms = &(atoms [index_grid_atoms (x, y, z, 0, &g) ]); max = top [index_grid_3d(x, y, z, &g)]; - for (m = 0; m < max; m++) { + for (m = 0; m < max; m++) + { atom2 = nbr_atoms[m]; - //nbr_data = & ( far_nbrs.select.far_nbr_list[atom1 * g.max_cuda_nbrs + num_far] ); - //CHANGE ORIGINAL /* if (atom1 > atom2) { diff --git a/PuReMD-GPU/src/cuda_QEq.cu b/PuReMD-GPU/src/cuda_qeq.cu similarity index 99% rename from PuReMD-GPU/src/cuda_QEq.cu rename to PuReMD-GPU/src/cuda_qeq.cu index 033945338aa76aa4c02909f90d2ca3eb1dacad58..21c2e334be0d6ec46d0a42f4c4f8a44dfc0ebffc 100644 --- a/PuReMD-GPU/src/cuda_QEq.cu +++ b/PuReMD-GPU/src/cuda_qeq.cu @@ -18,14 +18,15 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include "cuda_QEq.h" +#include "cuda_qeq.h" -#include "QEq.h" +#include "qeq.h" #include "allocate.h" #include "lin_alg.h" #include "list.h" #include "print_utils.h" #include "index_utils.h" +#include "sort.h" #include "system_props.h" #include "cuda_copy.h" @@ -33,9 +34,7 @@ #include "cuda_utils.h" #include "cuda_lin_alg.h" #include "cuda_reduction.h" - -#include "sort.h" -#include "validation.h" +#include "cuda_validation.h" GLOBAL void Cuda_Sort_Matrix_Rows( sparse_matrix A ) diff --git a/PuReMD-GPU/src/cuda_QEq.h b/PuReMD-GPU/src/cuda_qeq.h similarity index 100% rename from PuReMD-GPU/src/cuda_QEq.h rename to PuReMD-GPU/src/cuda_qeq.h diff --git a/PuReMD-GPU/src/cuda_utils.cu b/PuReMD-GPU/src/cuda_utils.cu index 1efcf28aa432f563749e49c68c67cc6b132e711e..6867857a4d58771f571fbb6810efbf926677f80f 100644 --- a/PuReMD-GPU/src/cuda_utils.cu +++ b/PuReMD-GPU/src/cuda_utils.cu @@ -29,7 +29,7 @@ cusparseMatDescr_t matdescriptor; void cuda_malloc( void **ptr, int size, int memset, int err_code ) { - cudaError_t retVal = cudaSuccess; + cudaError_t retVal; //fprintf (stderr, "&ptr --. %ld \n", &ptr); //fprintf (stderr, "ptr --> %ld \n", ptr ); @@ -45,7 +45,8 @@ void cuda_malloc( void **ptr, int size, int memset, int err_code ) //fprintf (stderr, "&ptr --. %ld \n", &ptr); //fprintf (stderr, "ptr --> %ld \n", ptr ); - if ( memset ) { + if ( memset ) + { retVal = cudaMemset( *ptr, 0, size ); if ( retVal != cudaSuccess ) { @@ -59,8 +60,12 @@ void cuda_malloc( void **ptr, int size, int memset, int err_code ) void cuda_free( void *ptr, int err_code ) { - cudaError_t retVal = cudaSuccess; - if (!ptr) return; + cudaError_t retVal; + + if (!ptr) + { + return; + } retVal = cudaFree( ptr ); @@ -75,9 +80,10 @@ void cuda_free( void *ptr, int err_code ) void cuda_memset( void *ptr, int data, size_t count, int err_code ) { - cudaError_t retVal = cudaSuccess; + cudaError_t retVal; retVal = cudaMemset( ptr, data, count ); + if (retVal != cudaSuccess) { fprintf( stderr, "ptr passed is %ld, value: %ld \n", ptr, &ptr ); fprintf( stderr, " size to memset: %d \n", count ); @@ -91,7 +97,7 @@ void cuda_memset( void *ptr, int data, size_t count, int err_code ) void copy_host_device( void *host, void *dev, int size, enum cudaMemcpyKind dir, int resid ) { - cudaError_t retVal = cudaErrorNotReady; + cudaError_t retVal; if ( dir == cudaMemcpyHostToDevice ) { @@ -112,9 +118,10 @@ void copy_host_device( void *host, void *dev, int size, enum cudaMemcpyKind dir, void copy_device( void *dest, void *src, int size, int resid ) { - cudaError_t retVal = cudaErrorNotReady; + cudaError_t retVal; retVal = cudaMemcpy( dest, src, size, cudaMemcpyDeviceToDevice ); + if ( retVal != cudaSuccess ) { fprintf( stderr, "could not copy resource %d from host to device: reason %d \n", @@ -134,6 +141,7 @@ void compute_blocks( int *blocks, int *block_size, int count ) void compute_nearest_pow_2( int blocks, int *result ) { int power = 1; + while (power < blocks) { power *= 2; @@ -146,7 +154,9 @@ void compute_nearest_pow_2( int blocks, int *result ) void print_device_mem_usage( ) { size_t total, free; + cudaMemGetInfo( &free, &total ); + if ( cudaGetLastError() != cudaSuccess ) { fprintf( stderr, "Error on the memory call \n" ); diff --git a/PuReMD-GPU/src/validation.cu b/PuReMD-GPU/src/cuda_validation.cu similarity index 99% rename from PuReMD-GPU/src/validation.cu rename to PuReMD-GPU/src/cuda_validation.cu index 21cd2145e689621ee0b3827889b106ed7c05af7f..b5348eba0871017a7225fc311e98d95531136792 100644 --- a/PuReMD-GPU/src/validation.cu +++ b/PuReMD-GPU/src/cuda_validation.cu @@ -18,13 +18,13 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include "validation.h" +#include "cuda_validation.h" -#include "cuda_utils.h" +#include "index_utils.h" #include "list.h" - #include "sort.h" -#include "index_utils.h" + +#include "cuda_utils.h" int check_zero (real p1, real p2) diff --git a/PuReMD-GPU/src/validation.h b/PuReMD-GPU/src/cuda_validation.h similarity index 100% rename from PuReMD-GPU/src/validation.h rename to PuReMD-GPU/src/cuda_validation.h diff --git a/PuReMD-GPU/src/param.c b/PuReMD-GPU/src/ffield.c similarity index 58% rename from PuReMD-GPU/src/param.c rename to PuReMD-GPU/src/ffield.c index 42e9ef612ec6c81d593f74acd796a7f564f96896..a5377e6f2b75dc6c6bec586d36517127bca40d50 100644 --- a/PuReMD-GPU/src/param.c +++ b/PuReMD-GPU/src/ffield.c @@ -1,9 +1,10 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or @@ -18,85 +19,10 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include "param.h" -#include "traj.h" -#include "ctype.h" - - -int Get_Atom_Type( reax_interaction *reaxprm, char *s ) -{ - int i; - - for ( i = 0; i < reaxprm->num_atom_types; ++i ) - if ( !strcmp( reaxprm->sbp[i].name, s ) ) - return i; - - fprintf( stderr, "Unknown atom type %s. Terminating...\n", s ); - exit( UNKNOWN_ATOM_TYPE_ERR ); -} - - -int Tokenize(char* s, char*** tok) -{ - char test[MAX_LINE]; - char *sep = "\t \n!="; - char *word; - int count = 0; - - strncpy( test, s, MAX_LINE ); - - // fprintf( stderr, "|%s|\n", test ); - - for ( word = strtok(test, sep); word; word = strtok(NULL, sep) ) - { - strncpy( (*tok)[count], word, MAX_LINE ); - count++; - } - - return count; -} - - -/* Initialize Taper params */ -void Init_Taper( control_params *control ) -{ - real d1, d7; - real swa, swa2, swa3; - real swb, swb2, swb3; - - swa = control->r_low; - swb = control->r_cut; - - if ( fabs( swa ) > 0.01 ) - fprintf( stderr, "Warning: non-zero value for lower Taper-radius cutoff\n" ); - - if ( swb < 0 ) - { - fprintf( stderr, "Negative value for upper Taper-radius cutoff\n" ); - exit( INVALID_INPUT ); - } - else if ( swb < 5 ) - fprintf( stderr, "Warning: low value for upper Taper-radius cutoff:%f\n", - swb ); - - d1 = swb - swa; - d7 = POW( d1, 7.0 ); - swa2 = SQR( swa ); - swa3 = CUBE( swa ); - swb2 = SQR( swb ); - swb3 = CUBE( swb ); - - control->Tap7 = 20.0 / d7; - control->Tap6 = -70.0 * (swa + swb) / d7; - control->Tap5 = 84.0 * (swa2 + 3.0 * swa * swb + swb2) / d7; - control->Tap4 = -35.0 * (swa3 + 9.0 * swa2 * swb + 9.0 * swa * swb2 + swb3 ) / d7; - control->Tap3 = 140.0 * (swa3 * swb + 3.0 * swa2 * swb2 + swa * swb3 ) / d7; - control->Tap2 = -210.0 * (swa3 * swb2 + swa2 * swb3) / d7; - control->Tap1 = 140.0 * swa3 * swb3 / d7; - control->Tap0 = (-35.0 * swa3 * swb2 * swb2 + 21.0 * swa2 * swb3 * swb2 + - 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; -} +#include <ctype.h> +#include "ffield.h" +#include "tool_box.h" char Read_Force_Field( FILE* fp, reax_interaction* reax ) @@ -106,20 +32,20 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) char *tor_flag; int c, i, j, k, l, m, n, o, p, cnt; real val; - int __N; int index1, index2; s = (char*) malloc(sizeof(char) * MAX_LINE); tmp = (char**) malloc(sizeof(char*)*MAX_TOKENS); for (i = 0; i < MAX_TOKENS; i++) + { tmp[i] = (char*) malloc(sizeof(char) * MAX_TOKEN_LEN); + } /* reading first header comment */ fgets( s, MAX_LINE, fp ); - /* line 2 is number of global parameters */ fgets( s, MAX_LINE, fp ); c = Tokenize( s, &tmp ); @@ -129,7 +55,7 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) if (n < 1) { fprintf( stderr, "WARNING: number of globals in ffield file is 0!\n" ); - return 1; + exit( INVALID_INPUT ); } reax->gp.n_global = n; @@ -146,20 +72,17 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) reax->gp.l[i] = val; } - /* next line is number of atom types and some comments */ fgets( s, MAX_LINE, fp ); c = Tokenize( s, &tmp ); reax->num_atom_types = atoi(tmp[0]); __N = reax->num_atom_types; - /* 3 lines of comments */ fgets(s, MAX_LINE, fp); fgets(s, MAX_LINE, fp); fgets(s, MAX_LINE, fp); - /* Allocating structures in reax_interaction */ reax->sbp = (single_body_parameters*) calloc( reax->num_atom_types, sizeof(single_body_parameters) ); @@ -194,7 +117,9 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) c = Tokenize( s, &tmp ); for ( j = 0; j < strlen( tmp[0] ); ++j ) + { reax->sbp[i].name[j] = toupper( tmp[0][j] ); + } val = atof(tmp[1]); reax->sbp[i].r_s = val; @@ -281,6 +206,7 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) if ( reax->sbp[i].gamma_w > 0.5 ) // Shielding vdWaals { if ( reax->gp.vdw_type != 0 && reax->gp.vdw_type != 3 ) + { fprintf( stderr, "Warning: inconsistent vdWaals-parameters\n" \ "Force field parameters for element %s\n" \ "indicate inner wall+shielding, but earlier\n" \ @@ -288,9 +214,11 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) "This may cause division-by-zero errors.\n" \ "Keeping vdWaals-setting for earlier atoms.\n", reax->sbp[i].name ); + } else { reax->gp.vdw_type = 3; + #if defined(DEBUG) fprintf( stderr, "vdWaals type for element %s: Shielding+inner-wall", reax->sbp[i].name ); @@ -300,6 +228,7 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) else // No shielding vdWaals parameters present { if ( reax->gp.vdw_type != 0 && reax->gp.vdw_type != 2 ) + { fprintf( stderr, "Warning: inconsistent vdWaals-parameters\n" \ "Force field parameters for element %s\n" \ "indicate inner wall without shielding, but earlier\n" \ @@ -307,9 +236,11 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) "This may cause division-by-zero errors.\n" \ "Keeping vdWaals-setting for earlier atoms.\n", reax->sbp[i].name ); + } else { reax->gp.vdw_type = 2; + #if defined(DEBUG) fprintf( stderr, "vdWaals type for element%s: No Shielding,inner-wall", reax->sbp[i].name ); @@ -348,7 +279,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } } - /* next line is number of two body combination and some comments */ fgets(s, MAX_LINE, fp); c = Tokenize(s, &tmp); @@ -430,6 +360,7 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) /* calculating combination rules and filling up remaining fields. */ for (i = 0; i < reax->num_atom_types; i++) + { for (j = i; j < reax->num_atom_types; j++) { index1 = i * __N + j; @@ -450,7 +381,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) reax->tbp[index2].r_pp = 0.5 * (reax->sbp[j].r_pi_pi + reax->sbp[i].r_pi_pi); - reax->tbp[index1].p_boc3 = sqrt(reax->sbp[i].b_o_132 * reax->sbp[j].b_o_132); @@ -472,7 +402,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) sqrt(reax->sbp[j].b_o_133 * reax->sbp[i].b_o_133); - reax->tbp[index1].D = sqrt(reax->sbp[i].epsilon * reax->sbp[j].epsilon); @@ -505,9 +434,8 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) reax->tbp[index2].gamma = POW(reax->sbp[j].gamma * reax->sbp[i].gamma, -1.5); - } - + } /* next line is number of 2-body offdiagonal combinations and some comments */ /* these are two body offdiagonal terms that are different from the @@ -572,7 +500,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } } - /* 3-body parameters - supports multi-well potentials (upto MAX_3BODY_PARAM in mytypes.h) */ /* clear entries first */ @@ -636,7 +563,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } } - /* 4-body parameters are entered in compact form. i.e. 0-X-Y-0 correspond to any type of pair of atoms in 1 and 4 position. However, explicit X-Y-Z-W takes precedence over the @@ -647,13 +573,19 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) /* clear all entries first */ for ( i = 0; i < reax->num_atom_types; ++i ) + { for ( j = 0; j < reax->num_atom_types; ++j ) + { for ( k = 0; k < reax->num_atom_types; ++k ) + { for ( m = 0; m < reax->num_atom_types; ++m ) { reax->fbp[i * __N * __N * __N + j * __N * __N + k * __N + m].cnt = 0; tor_flag[i * __N * __N * __N + j * __N * __N + k * __N + m] = 0; } + } + } + } /* next line is number of 4-body params and some comments */ fgets( s, MAX_LINE, fp ); @@ -714,7 +646,9 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) else /* This means the entry is of the form 0-X-Y-0 */ { if ( k < reax->num_atom_types && m < reax->num_atom_types ) + { for ( p = 0; p < reax->num_atom_types; p++ ) + { for ( o = 0; o < reax->num_atom_types; o++ ) { index1 = p * __N * __N * __N + k * __N * __N + m * __N + o; @@ -743,11 +677,12 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) reax->fbp[index2].prm[0].p_cot1 = atof(tmp[8]); } } + } + } } } - /* next line is number of hydrogen bond params and some comments */ fgets( s, MAX_LINE, fp ); c = Tokenize( s, &tmp ); @@ -781,14 +716,14 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } } - /* deallocate helper storage */ for ( i = 0; i < MAX_TOKENS; i++ ) + { free( tmp[i] ); + } free( tmp ); free( s ); - /* deallocate tor_flag */ free( tor_flag ); @@ -796,497 +731,5 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) fprintf( stderr, "force field read\n" ); #endif - return 0; -} - - -char Read_Control_File( FILE* fp, reax_system *system, control_params* control, - output_controls *out_control ) -{ - char *s, **tmp; - int c, i; - real val; - int ival; - - /* assign default values */ - strcpy( control->sim_name, "default.sim" ); - - control->restart = 0; - out_control->restart_format = 1; - out_control->restart_freq = 0; - strcpy( control->restart_from, "default.res" ); - out_control->restart_freq = 0; - control->random_vel = 0; - - control->reposition_atoms = 0; - - control->ensemble = 0; - control->nsteps = 0; - control->dt = 0.25; - - control->geo_format = 1; - control->restrict_bonds = 0; - - control->periodic_boundaries = 1; - control->periodic_images[0] = 0; - control->periodic_images[1] = 0; - control->periodic_images[2] = 0; - - control->reneighbor = 1; - control->vlist_cut = 0; - control->nbr_cut = 4.; - control->r_cut = 10; - control->max_far_nbrs = 1000; - control->bo_cut = 0.01; - control->thb_cut = 0.001; - control->hb_cut = 7.50; - - control->q_err = 0.000001; - control->tabulate = 0; - //TODO - control->refactor = 100; - //TODO -- change this to 5. - - control->droptol = 0.01; - - control->T_init = 0.; - control->T_final = 300.; - control->Tau_T = 1.0; - control->T_mode = 0.; - control->T_rate = 1.; - control->T_freq = 1.; - - control->P[0] = 0.000101325; - control->P[1] = 0.000101325; - control->P[2] = 0.000101325; - control->Tau_P[0] = 500.0; - control->Tau_P[1] = 500.0; - control->Tau_P[2] = 500.0; - control->Tau_PT = 500.0; - control->compressibility = 1.0; - control->press_mode = 0; - - control->remove_CoM_vel = 25; - - out_control->debug_level = 0; - out_control->energy_update_freq = 10; - - out_control->write_steps = 100; - out_control->traj_compress = 0; - out_control->write = fprintf; - out_control->traj_format = 0; - out_control->write_header = - (int (*)( reax_system*, control_params*, - static_storage*, void* )) Write_Custom_Header; - out_control->append_traj_frame = - (int (*)( reax_system*, control_params*, simulation_data*, - static_storage*, list **, void* )) Append_Custom_Frame; - - strcpy( out_control->traj_title, "default_title" ); - out_control->atom_format = 0; - out_control->bond_info = 0; - out_control->angle_info = 0; - - control->molec_anal = 0; - control->freq_molec_anal = 0; - control->bg_cut = 0.3; - control->num_ignored = 0; - memset( control->ignore, 0, sizeof(int)*MAX_ATOM_TYPES ); - - control->dipole_anal = 0; - control->freq_dipole_anal = 0; - - control->diffusion_coef = 0; - control->freq_diffusion_coef = 0; - control->restrict_type = 0; - - /* memory allocations */ - s = (char*) malloc(sizeof(char) * MAX_LINE); - tmp = (char**) malloc(sizeof(char*)*MAX_TOKENS); - for (i = 0; i < MAX_TOKENS; i++) - tmp[i] = (char*) malloc(sizeof(char) * MAX_LINE); - - /* read control parameters file */ - while (!feof(fp)) - { - fgets(s, MAX_LINE, fp); - c = Tokenize(s, &tmp); - - if ( strcmp(tmp[0], "simulation_name") == 0 ) - { - strcpy( control->sim_name, tmp[1] ); - } - //else if( strcmp(tmp[0], "restart") == 0 ) { - // ival = atoi(tmp[1]); - // control->restart = ival; - //} - else if ( strcmp(tmp[0], "restart_format") == 0 ) - { - ival = atoi(tmp[1]); - out_control->restart_format = ival; - } - else if ( strcmp(tmp[0], "restart_freq") == 0 ) - { - ival = atoi(tmp[1]); - out_control->restart_freq = ival; - } - else if ( strcmp(tmp[0], "random_vel") == 0 ) - { - ival = atoi(tmp[1]); - control->random_vel = ival; - } - else if ( strcmp(tmp[0], "reposition_atoms") == 0 ) - { - ival = atoi(tmp[1]); - control->reposition_atoms = ival; - } - else if ( strcmp(tmp[0], "ensemble_type") == 0 ) - { - ival = atoi(tmp[1]); - control->ensemble = ival; - } - else if ( strcmp(tmp[0], "nsteps") == 0 ) - { - ival = atoi(tmp[1]); - control->nsteps = ival; - } - else if ( strcmp(tmp[0], "dt") == 0 ) - { - val = atof(tmp[1]); - control->dt = val * 1.e-3; // convert dt from fs to ps! - } - else if ( strcmp(tmp[0], "periodic_boundaries") == 0 ) - { - ival = atoi( tmp[1] ); - control->periodic_boundaries = ival; - } - else if ( strcmp(tmp[0], "periodic_images") == 0 ) - { - ival = atoi(tmp[1]); - control->periodic_images[0] = ival; - ival = atoi(tmp[2]); - control->periodic_images[1] = ival; - ival = atoi(tmp[3]); - control->periodic_images[2] = ival; - } - else if ( strcmp(tmp[0], "geo_format") == 0 ) - { - ival = atoi( tmp[1] ); - control->geo_format = ival; - } - else if ( strcmp(tmp[0], "restrict_bonds") == 0 ) - { - ival = atoi( tmp[1] ); - control->restrict_bonds = ival; - } - else if ( strcmp(tmp[0], "tabulate_long_range") == 0 ) - { - ival = atoi( tmp[1] ); - control->tabulate = ival; - } - else if ( strcmp(tmp[0], "reneighbor") == 0 ) - { - ival = atoi( tmp[1] ); - control->reneighbor = ival; - } - else if ( strcmp(tmp[0], "vlist_buffer") == 0 ) - { - val = atof(tmp[1]); - control->vlist_cut = val; - } - else if ( strcmp(tmp[0], "nbrhood_cutoff") == 0 ) - { - val = atof(tmp[1]); - control->nbr_cut = val; - } - else if ( strcmp(tmp[0], "thb_cutoff") == 0 ) - { - val = atof(tmp[1]); - control->thb_cut = val; - } - else if ( strcmp(tmp[0], "hbond_cutoff") == 0 ) - { - val = atof( tmp[1] ); - control->hb_cut = val; - } - else if ( strcmp(tmp[0], "q_err") == 0 ) - { - val = atof( tmp[1] ); - control->q_err = val; - } - else if ( strcmp(tmp[0], "ilu_refactor") == 0 ) - { - ival = atoi( tmp[1] ); - control->refactor = ival; - } - else if ( strcmp(tmp[0], "ilu_droptol") == 0 ) - { - val = atof( tmp[1] ); - control->droptol = val; - } - else if ( strcmp(tmp[0], "temp_init") == 0 ) - { - val = atof(tmp[1]); - control->T_init = val; - - if ( control->T_init < 0.001 ) - control->T_init = 0.001; - } - else if ( strcmp(tmp[0], "temp_final") == 0 ) - { - val = atof(tmp[1]); - control->T_final = val; - - if ( control->T_final < 0.1 ) - control->T_final = 0.1; - } - else if ( strcmp(tmp[0], "t_mass") == 0 ) - { - val = atof(tmp[1]); - control->Tau_T = val * 1.e-3; // convert t_mass from fs to ps - } - else if ( strcmp(tmp[0], "t_mode") == 0 ) - { - ival = atoi(tmp[1]); - control->T_mode = ival; - } - else if ( strcmp(tmp[0], "t_rate") == 0 ) - { - val = atof(tmp[1]); - control->T_rate = val; - } - else if ( strcmp(tmp[0], "t_freq") == 0 ) - { - val = atof(tmp[1]); - control->T_freq = val; - } - else if ( strcmp(tmp[0], "pressure") == 0 ) - { - if ( control->ensemble == iNPT ) - { - val = atof(tmp[1]); - control->P[0] = control->P[1] = control->P[2] = val; - } - else if ( control->ensemble == sNPT ) - { - val = atof(tmp[1]); - control->P[0] = val; - - val = atof(tmp[2]); - control->P[1] = val; - - val = atof(tmp[3]); - control->P[2] = val; - } - } - else if ( strcmp(tmp[0], "p_mass") == 0 ) - { - if ( control->ensemble == iNPT ) - { - val = atof(tmp[1]); - control->Tau_P[0] = val * 1.e-3; // convert p_mass from fs to ps - } - else if ( control->ensemble == sNPT ) - { - val = atof(tmp[1]); - control->Tau_P[0] = val * 1.e-3; // convert p_mass from fs to ps - - val = atof(tmp[2]); - control->Tau_P[1] = val * 1.e-3; // convert p_mass from fs to ps - - val = atof(tmp[3]); - control->Tau_P[2] = val * 1.e-3; // convert p_mass from fs to ps - } - } - else if ( strcmp(tmp[0], "pt_mass") == 0 ) - { - val = atof(tmp[1]); - control->Tau_PT = val * 1.e-3; // convert pt_mass from fs to ps - } - else if ( strcmp(tmp[0], "compress") == 0 ) - { - val = atof(tmp[1]); - control->compressibility = val; - } - else if ( strcmp(tmp[0], "press_mode") == 0 ) - { - val = atoi(tmp[1]); - control->press_mode = val; - } - else if ( strcmp(tmp[0], "remove_CoM_vel") == 0 ) - { - val = atoi(tmp[1]); - control->remove_CoM_vel = val; - } - else if ( strcmp(tmp[0], "debug_level") == 0 ) - { - ival = atoi(tmp[1]); - out_control->debug_level = ival; - } - else if ( strcmp(tmp[0], "energy_update_freq") == 0 ) - { - ival = atoi(tmp[1]); - out_control->energy_update_freq = ival; - } - else if ( strcmp(tmp[0], "write_freq") == 0 ) - { - ival = atoi(tmp[1]); - out_control->write_steps = ival; - } - else if ( strcmp(tmp[0], "traj_compress") == 0 ) - { - ival = atoi(tmp[1]); - out_control->traj_compress = ival; - - if ( out_control->traj_compress ) - out_control->write = (int (*)(FILE *, const char *, ...)) gzprintf; - else out_control->write = fprintf; - } - else if ( strcmp(tmp[0], "traj_format") == 0 ) - { - ival = atoi(tmp[1]); - out_control->traj_format = ival; - - if ( out_control->traj_format == 0 ) - { - out_control->write_header = - (int (*)( reax_system*, control_params*, - static_storage*, void* )) Write_Custom_Header; - out_control->append_traj_frame = - (int (*)(reax_system*, control_params*, simulation_data*, - static_storage*, list **, void*)) Append_Custom_Frame; - } - else if ( out_control->traj_format == 1 ) - { - out_control->write_header = - (int (*)( reax_system*, control_params*, - static_storage*, void* )) Write_xyz_Header; - out_control->append_traj_frame = - (int (*)( reax_system*, control_params*, simulation_data*, - static_storage*, list **, void* )) Append_xyz_Frame; - } - } - else if ( strcmp(tmp[0], "traj_title") == 0 ) - { - strcpy( out_control->traj_title, tmp[1] ); - } - else if ( strcmp(tmp[0], "atom_info") == 0 ) - { - ival = atoi(tmp[1]); - out_control->atom_format += ival * 4; - } - else if ( strcmp(tmp[0], "atom_velocities") == 0 ) - { - ival = atoi(tmp[1]); - out_control->atom_format += ival * 2; - } - else if ( strcmp(tmp[0], "atom_forces") == 0 ) - { - ival = atoi(tmp[1]); - out_control->atom_format += ival * 1; - } - else if ( strcmp(tmp[0], "bond_info") == 0 ) - { - ival = atoi(tmp[1]); - out_control->bond_info = ival; - } - else if ( strcmp(tmp[0], "angle_info") == 0 ) - { - ival = atoi(tmp[1]); - out_control->angle_info = ival; - } - else if ( strcmp(tmp[0], "test_forces") == 0 ) - { - ival = atoi(tmp[1]); - } - else if ( strcmp(tmp[0], "molec_anal") == 0 ) - { - ival = atoi(tmp[1]); - control->molec_anal = ival; - } - else if ( strcmp(tmp[0], "freq_molec_anal") == 0 ) - { - ival = atoi(tmp[1]); - control->freq_molec_anal = ival; - } - else if ( strcmp(tmp[0], "bond_graph_cutoff") == 0 ) - { - val = atof(tmp[1]); - control->bg_cut = val; - } - else if ( strcmp(tmp[0], "ignore") == 0 ) - { - control->num_ignored = atoi(tmp[1]); - for ( i = 0; i < control->num_ignored; ++i ) - control->ignore[atoi(tmp[i + 2])] = 1; - } - else if ( strcmp(tmp[0], "dipole_anal") == 0 ) - { - ival = atoi(tmp[1]); - control->dipole_anal = ival; - } - else if ( strcmp(tmp[0], "freq_dipole_anal") == 0 ) - { - ival = atoi(tmp[1]); - control->freq_dipole_anal = ival; - } - else if ( strcmp(tmp[0], "diffusion_coef") == 0 ) - { - ival = atoi(tmp[1]); - control->diffusion_coef = ival; - } - else if ( strcmp(tmp[0], "freq_diffusion_coef") == 0 ) - { - ival = atoi(tmp[1]); - control->freq_diffusion_coef = ival; - } - else if ( strcmp(tmp[0], "restrict_type") == 0 ) - { - ival = atoi(tmp[1]); - control->restrict_type = ival; - } - else - { - fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] ); - exit( 15 ); - } - } - - - /* determine target T */ - if ( control->T_mode == 0 ) - control->T = control->T_final; - else control->T = control->T_init; - - - /* near neighbor and far neighbor cutoffs */ - control->bo_cut = 0.01 * system->reaxprm.gp.l[29]; - control->r_low = system->reaxprm.gp.l[11]; - control->r_cut = system->reaxprm.gp.l[12]; - control->vlist_cut += control->r_cut; - - system->g.cell_size = control->vlist_cut / 2.; - for ( i = 0; i < 3; ++i ) - system->g.spread[i] = 2; - - - /* Initialize Taper function */ - Init_Taper( control ); - - - /* free memory allocations at the top */ - for ( i = 0; i < MAX_TOKENS; i++ ) - free( tmp[i] ); - free( tmp ); - free( s ); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, - "en=%d steps=%d dt=%.5f opt=%d T=%.5f P=%.5f %.5f %.5f\n", - control->ensemble, control->nsteps, control->dt, control->tabulate, - control->T, control->P[0], control->P[1], control->P[2] ); - - fprintf(stderr, "control file read\n" ); -#endif - return 0; + return SUCCESS; } diff --git a/PuReMD-GPU/src/param.h b/PuReMD-GPU/src/ffield.h similarity index 67% rename from PuReMD-GPU/src/param.h rename to PuReMD-GPU/src/ffield.h index 2b24b056983233840966a8de29ce902ca6beb981..4aaf32a644861b069e8cf87e2eec68aadf4d3c84 100644 --- a/PuReMD-GPU/src/param.h +++ b/PuReMD-GPU/src/ffield.h @@ -1,9 +1,10 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or @@ -18,24 +19,10 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#ifndef __PARAM_H_ -#define __PARAM_H_ +#ifndef __FFIELD_H_ +#define __FFIELD_H_ #include "mytypes.h" - -#define MAX_LINE 1024 -#define MAX_TOKENS 20 -#define MAX_TOKEN_LEN 1024 - - -int Get_Atom_Type( reax_interaction*, char* ); - -int Tokenize( char*, char*** ); - char Read_Force_Field( FILE*, reax_interaction* ); -char Read_Control_File( FILE*, reax_system*, control_params*, - output_controls* ); - - #endif diff --git a/PuReMD-GPU/src/forces.c b/PuReMD-GPU/src/forces.c index c95d4896e32f60e954d79b0b623520afb042e9ea..debe6ac171f281a9f09a77c2c17ec1d315bc7f9b 100644 --- a/PuReMD-GPU/src/forces.c +++ b/PuReMD-GPU/src/forces.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -26,30 +27,31 @@ #include "two_body_interactions.h" #include "three_body_interactions.h" #include "four_body_interactions.h" +#include "index_utils.h" #include "list.h" #include "print_utils.h" +#include "qeq.h" #include "system_props.h" -#include "QEq.h" +#include "tool_box.h" #include "vector.h" -#include "index_utils.h" -void Dummy_Interaction( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) +void Dummy_Interaction( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, list **lists, + output_controls *out_control ) { } void Init_Bonded_Force_Functions( control_params *control ) -{ +{ Interaction_Functions[0] = Calculate_Bond_Orders; Interaction_Functions[1] = Bond_Energy; //*/Dummy_Interaction; Interaction_Functions[2] = LonePair_OverUnder_Coordination_Energy; //*/Dummy_Interaction; Interaction_Functions[3] = Three_Body_Interactions; //*/Dummy_Interaction; Interaction_Functions[4] = Four_Body_Interactions; //*/Dummy_Interaction; - if( control->hb_cut > 0 ) + if ( control->hb_cut > 0 ) Interaction_Functions[5] = Hydrogen_Bonds; //*/Dummy_Interaction; else Interaction_Functions[5] = Dummy_Interaction; Interaction_Functions[6] = Dummy_Interaction; //empty @@ -59,127 +61,123 @@ void Init_Bonded_Force_Functions( control_params *control ) } -void Compute_Bonded_Forces( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) +void Compute_Bonded_Forces( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, + list **lists, output_controls *out_control ) { int i; - real t_start, t_elapsed; + // real t_start, t_end, t_elapsed; #ifdef TEST_ENERGY /* Mark beginning of a new timestep in each energy file */ - fprintf( out_control->ebond, "step: %d\n%6s%6s%12s%12s%12s\n", - data->step, "atom1", "atom2", "bo", "ebond", "total" ); - fprintf( out_control->elp, "step: %d\n%6s%12s%12s%12s\n", - data->step, "atom", "nlp", "elp", "total" ); - fprintf( out_control->eov, "step: %d\n%6s%12s%12s\n", - data->step, "atom", "eov", "total" ); - fprintf( out_control->eun, "step: %d\n%6s%12s%12s\n", - data->step, "atom", "eun", "total" ); - fprintf( out_control->eval, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s%12s\n", - data->step, "atom1", "atom2", "atom3", - "angle", "bo(12)", "bo(23)", "eval", "epen", "total" ); - fprintf( out_control->epen, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", - data->step, "atom1", "atom2", "atom3", - "angle", "bo(12)", "bo(23)", "epen", "total" ); - fprintf( out_control->ecoa, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", - data->step, "atom1", "atom2", "atom3", - "angle", "bo(12)", "bo(23)", "ecoa", "total" ); - fprintf( out_control->ehb, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", - data->step, "atom1", "atom2", "atom3", - "r(23)", "angle", "bo(12)", "ehb", "total" ); - fprintf( out_control->etor, "step: %d\n%6s%6s%6s%6s%12s%12s%12s%12s\n", - data->step, "atom1", "atom2", "atom3", "atom4", - "phi", "bo(23)", "etor", "total" ); + fprintf( out_control->ebond, "step: %d\n%6s%6s%12s%12s%12s\n", + data->step, "atom1", "atom2", "bo", "ebond", "total" ); + fprintf( out_control->elp, "step: %d\n%6s%12s%12s%12s\n", + data->step, "atom", "nlp", "elp", "total" ); + fprintf( out_control->eov, "step: %d\n%6s%12s%12s\n", + data->step, "atom", "eov", "total" ); + fprintf( out_control->eun, "step: %d\n%6s%12s%12s\n", + data->step, "atom", "eun", "total" ); + fprintf( out_control->eval, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s%12s\n", + data->step, "atom1", "atom2", "atom3", + "angle", "bo(12)", "bo(23)", "eval", "epen", "total" ); + fprintf( out_control->epen, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", + data->step, "atom1", "atom2", "atom3", + "angle", "bo(12)", "bo(23)", "epen", "total" ); + fprintf( out_control->ecoa, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", + data->step, "atom1", "atom2", "atom3", + "angle", "bo(12)", "bo(23)", "ecoa", "total" ); + fprintf( out_control->ehb, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", + data->step, "atom1", "atom2", "atom3", + "r(23)", "angle", "bo(12)", "ehb", "total" ); + fprintf( out_control->etor, "step: %d\n%6s%6s%6s%6s%12s%12s%12s%12s\n", + data->step, "atom1", "atom2", "atom3", "atom4", + "phi", "bo(23)", "etor", "total" ); fprintf( out_control->econ, "step:%d\n%6s%6s%6s%6s%12s%12s%12s%12s%12s%12s\n", - data->step, "atom1", "atom2", "atom3", "atom4", - "phi", "bo(12)", "bo(23)", "bo(34)", "econ", "total" ); -#endif - - /* Implement all the function calls as function pointers */ - for( i = 0; i < NO_OF_INTERACTIONS; i++ ) { - //for( i = 0; i < 5; i++ ) { - t_start = Get_Time (); - (Interaction_Functions[i])(system, control, data, workspace, - lists, out_control); - t_elapsed = Get_Timing_Info ( t_start ); - -#ifdef __DEBUG_CUDA__ - fprintf( stderr, "function %d tme %lf - \n", i, t_elapsed ); + data->step, "atom1", "atom2", "atom3", "atom4", + "phi", "bo(12)", "bo(23)", "bo(34)", "econ", "total" ); #endif + /* Implement all the function calls as function pointers */ + for ( i = 0; i < NO_OF_INTERACTIONS; i++ ) + { + (Interaction_Functions[i])(system, control, data, workspace, + lists, out_control); #if defined(DEBUG_FOCUS) fprintf( stderr, "f%d-", i ); #endif #ifdef TEST_FORCES - (Print_Interactions[i])(system, control, data, workspace, - lists, out_control); + (Print_Interactions[i])(system, control, data, workspace, + lists, out_control); #endif } } -void Compute_NonBonded_Forces( reax_system *system, control_params *control, - simulation_data *data,static_storage *workspace, - list** lists, output_controls *out_control ) +void Compute_NonBonded_Forces( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, + list** lists, output_controls *out_control ) { real t_start, t_elapsed; #ifdef TEST_ENERGY fprintf( out_control->evdw, "step: %d\n%6s%6s%12s%12s%12s\n", - data->step, "atom1", "atom2", "r12", "evdw", "total" ); + data->step, "atom1", "atom2", "r12", "evdw", "total" ); fprintf( out_control->ecou, "step: %d\n%6s%6s%12s%12s%12s%12s%12s\n", - data->step, "atom1", "atom2", "r12", "q1", "q2", "ecou", "total" ); + data->step, "atom1", "atom2", "r12", "q1", "q2", "ecou", "total" ); #endif t_start = Get_Time( ); QEq( system, control, data, workspace, lists[FAR_NBRS], out_control ); t_elapsed = Get_Timing_Info( t_start ); data->timing.QEq += t_elapsed; - #if defined(DEBUG_FOCUS) fprintf( stderr, "qeq - " ); #endif if ( control->tabulate == 0) + { vdW_Coulomb_Energy( system, control, data, workspace, lists, out_control ); + } else - Tabulated_vdW_Coulomb_Energy( system, control, data, workspace, - lists, out_control ); - + { + Tabulated_vdW_Coulomb_Energy( system, control, data, workspace, + lists, out_control ); + } #if defined(DEBUG_FOCUS) fprintf( stderr, "nonb forces - " ); #endif #ifdef TEST_FORCES - Print_vdW_Coulomb_Forces( system, control, data, workspace, - lists, out_control ); + Print_vdW_Coulomb_Forces( system, control, data, workspace, + lists, out_control ); #endif } -/* This version of Compute_Total_Force computes forces from coefficients +/* This version of Compute_Total_Force computes forces from coefficients accumulated by all interaction functions. Saves enormous time & space! */ -void Compute_Total_Force( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists ) +void Compute_Total_Force( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, + list **lists ) { int i, pj; list *bonds = (*lists) + BONDS; - for( i = 0; i < system->N; ++i ) - for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) - if( i < bonds->select.bond_list[pj].nbr ) { - if( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT) + for ( i = 0; i < system->N; ++i ) + for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) + if ( i < bonds->select.bond_list[pj].nbr ) + { + if ( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT) Add_dBond_to_Forces( i, pj, system, data, workspace, lists ); - else + else Add_dBond_to_Forces_NPT( i, pj, system, data, workspace, lists ); } } void Validate_Lists( static_storage *workspace, list **lists, int step, int n, - int Hmax, int Htop, int num_bonds, int num_hbonds ) + int Hmax, int Htop, int num_bonds, int num_hbonds ) { int i, flag; list *bonds, *hbonds; @@ -188,92 +186,104 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n, hbonds = *lists + HBONDS; /* far neighbors */ - if( Htop > Hmax * DANGER_ZONE ) { + if ( Htop > Hmax * DANGER_ZONE ) + { workspace->realloc.Htop = Htop; - if( Htop > Hmax ) { - fprintf( stderr, - "step%d - ran out of space on H matrix: Htop=%d, max = %d", - step, Htop, Hmax ); - exit(INSUFFICIENT_SPACE); + if ( Htop > Hmax ) + { + fprintf( stderr, + "step%d - ran out of space on H matrix: Htop=%d, max = %d", + step, Htop, Hmax ); + exit( INSUFFICIENT_MEMORY ); } } /* bond list */ flag = -1; workspace->realloc.num_bonds = num_bonds; - for( i = 0; i < n-1; ++i ) - if( End_Index(i, bonds) >= Start_Index(i+1, bonds)-2 ) { + for ( i = 0; i < n - 1; ++i ) + if ( End_Index(i, bonds) >= Start_Index(i + 1, bonds) - 2 ) + { workspace->realloc.bonds = 1; - if( End_Index(i, bonds) > Start_Index(i+1, bonds) ) + if ( End_Index(i, bonds) > Start_Index(i + 1, bonds) ) flag = i; } - if( flag > -1 ) { + if ( flag > -1 ) + { fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n", - step, flag, End_Index(flag,bonds), Start_Index(flag+1,bonds) ); - exit(INSUFFICIENT_SPACE); - } + step, flag, End_Index(flag, bonds), Start_Index(flag + 1, bonds) ); + exit( INSUFFICIENT_MEMORY ); + } - if( End_Index(i, bonds) >= bonds->num_intrs-2 ) { + if ( End_Index(i, bonds) >= bonds->num_intrs - 2 ) + { workspace->realloc.bonds = 1; - if( End_Index(i, bonds) > bonds->num_intrs ) { + if ( End_Index(i, bonds) > bonds->num_intrs ) + { fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d bond_end=%d\n", - step, flag, End_Index(i,bonds), bonds->num_intrs ); - exit(INSUFFICIENT_SPACE); + step, flag, End_Index(i, bonds), bonds->num_intrs ); + exit( INSUFFICIENT_MEMORY ); } } /* hbonds list */ - if( workspace->num_H > 0 ) { + if ( workspace->num_H > 0 ) + { flag = -1; workspace->realloc.num_hbonds = num_hbonds; - for( i = 0; i < workspace->num_H-1; ++i ) - if( Num_Entries(i, hbonds) >= - (Start_Index(i+1, hbonds) - Start_Index(i, hbonds)) * DANGER_ZONE ) { + for ( i = 0; i < workspace->num_H - 1; ++i ) + if ( Num_Entries(i, hbonds) >= + (Start_Index(i + 1, hbonds) - Start_Index(i, hbonds)) * DANGER_ZONE ) + { workspace->realloc.hbonds = 1; - if( End_Index(i, hbonds) > Start_Index(i+1, hbonds) ) + if ( End_Index(i, hbonds) > Start_Index(i + 1, hbonds) ) flag = i; } - if( flag > -1 ) { + if ( flag > -1 ) + { fprintf( stderr, "step%d-hbondchk failed: i=%d end(i)=%d str(i+1)=%d\n", - step, flag, End_Index(flag,hbonds), Start_Index(flag+1,hbonds) ); - exit(INSUFFICIENT_SPACE); + step, flag, End_Index(flag, hbonds), Start_Index(flag + 1, hbonds) ); + exit( INSUFFICIENT_MEMORY ); } - if( Num_Entries(i,hbonds) >= - (hbonds->num_intrs - Start_Index(i,hbonds)) * DANGER_ZONE ) { + if ( Num_Entries(i, hbonds) >= + (hbonds->num_intrs - Start_Index(i, hbonds)) * DANGER_ZONE ) + { workspace->realloc.hbonds = 1; - if( End_Index(i, hbonds) > hbonds->num_intrs ) { + if ( End_Index(i, hbonds) > hbonds->num_intrs ) + { fprintf( stderr, "step%d-hbondchk failed: i=%d end(i)=%d hbondend=%d\n", - step, flag, End_Index(i,hbonds), hbonds->num_intrs ); - exit(INSUFFICIENT_SPACE); + step, flag, End_Index(i, hbonds), hbonds->num_intrs ); + exit( INSUFFICIENT_MEMORY ); } } } } -void Init_Forces( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) { +void Init_Forces( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, + list **lists, output_controls *out_control ) +{ int i, j, pj; int start_i, end_i; int type_i, type_j; - int Htop, btop_i, btop_j, num_bonds, num_hbonds; + int Htop, H_sp_top, btop_i, btop_j, num_bonds, num_hbonds; int ihb, jhb, ihb_top, jhb_top; - int flag; + int flag, flag_sp; real r_ij, r2, self_coef; real dr3gamij_1, dr3gamij_3, Tap; //real val, dif, base; real C12, C34, C56; real Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2; real BO, BO_s, BO_pi, BO_pi2; - real p_boc1, p_boc2; - sparse_matrix *H; + real p_boc1, p_boc2; + sparse_matrix *H, *H_sp; list *far_nbrs, *bonds, *hbonds; single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; @@ -287,44 +297,67 @@ void Init_Forces( reax_system *system, control_params *control, bonds = *lists + BONDS; hbonds = *lists + HBONDS; - H = &workspace->H; + H = workspace->H; + H_sp = workspace->H_sp; Htop = 0; + H_sp_top = 0; num_bonds = 0; num_hbonds = 0; btop_i = btop_j = 0; p_boc1 = system->reaxprm.gp.l[0]; p_boc2 = system->reaxprm.gp.l[1]; - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { atom_i = &(system->atoms[i]); type_i = atom_i->type; start_i = Start_Index(i, far_nbrs); end_i = End_Index(i, far_nbrs); H->start[i] = Htop; + H_sp->start[i] = H_sp_top; btop_i = End_Index( i, bonds ); sbp_i = &(system->reaxprm.sbp[type_i]); ihb = ihb_top = -1; - if( control->hb_cut > 0 && (ihb=sbp_i->p_hbond) == 1 ) + if ( control->hb_cut > 0 && (ihb = sbp_i->p_hbond) == 1 ) ihb_top = End_Index( workspace->hbond_index[i], hbonds ); - for( pj = start_i; pj < end_i; ++pj ) { + for ( pj = start_i; pj < end_i; ++pj ) + { nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); j = nbr_pj->nbr; atom_j = &(system->atoms[j]); flag = 0; - if((data->step-data->prev_steps) % control->reneighbor == 0) { - if( nbr_pj->d <= control->r_cut) + flag_sp = 0; + if ((data->step - data->prev_steps) % control->reneighbor == 0) + { + if ( nbr_pj->d <= control->r_cut ) + { flag = 1; - else flag = 0; + if ( nbr_pj->d <= control->r_sp_cut ) + { + flag_sp = 1; + } + } + else + { + flag = 0; + flag_sp = 0; + } } - else if((nbr_pj->d=Sq_Distance_on_T3(atom_i->x,atom_j->x,&(system->box), - nbr_pj->dvec))<=SQR(control->r_cut)){ - nbr_pj->d = sqrt(nbr_pj->d); + else if ((nbr_pj->d = Sq_Distance_on_T3(atom_i->x, atom_j->x, &(system->box), + nbr_pj->dvec)) <= SQR(control->r_cut)) + { + if ( nbr_pj->d <= SQR(control->r_sp_cut)) + { + flag_sp = 1; + } + nbr_pj->d = SQRT( nbr_pj->d ); flag = 1; } - if( flag ){ + if ( flag ) + { type_j = system->atoms[j].type; r_ij = nbr_pj->d; sbp_j = &(system->reaxprm.sbp[type_j]); @@ -338,63 +371,79 @@ void Init_Forces( reax_system *system, control_params *control, Tap = Tap * r_ij + control->Tap3; Tap = Tap * r_ij + control->Tap2; Tap = Tap * r_ij + control->Tap1; - Tap = Tap * r_ij + control->Tap0; + Tap = Tap * r_ij + control->Tap0; dr3gamij_1 = ( r_ij * r_ij * r_ij + twbp->gamma ); dr3gamij_3 = POW( dr3gamij_1 , 0.33333333333333 ); - H->entries[Htop].j = j; - H->entries[Htop].val = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3; + H->j[Htop] = j; + H->val[Htop] = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3; ++Htop; - /* hydrogen bond lists */ - if( control->hb_cut > 0 && (ihb==1 || ihb==2) && - nbr_pj->d <= control->hb_cut ) { + /* H_sp matrix entry */ + if ( flag_sp ) + { + H_sp->j[H_sp_top] = j; + H_sp->val[H_sp_top] = H->val[Htop - 1]; + ++H_sp_top; + } + + /* hydrogen bond lists */ + if ( control->hb_cut > 0 && (ihb == 1 || ihb == 2) && + nbr_pj->d <= control->hb_cut ) + { // fprintf( stderr, "%d %d\n", atom1, atom2 ); jhb = sbp_j->p_hbond; - if( ihb == 1 && jhb == 2 ) { + if ( ihb == 1 && jhb == 2 ) + { hbonds->select.hbond_list[ihb_top].nbr = j; hbonds->select.hbond_list[ihb_top].scl = 1; hbonds->select.hbond_list[ihb_top].ptr = nbr_pj; ++ihb_top; ++num_hbonds; } - else if( ihb == 2 && jhb == 1 ) { + else if ( ihb == 2 && jhb == 1 ) + { jhb_top = End_Index( workspace->hbond_index[j], hbonds ); hbonds->select.hbond_list[jhb_top].nbr = i; hbonds->select.hbond_list[jhb_top].scl = -1; hbonds->select.hbond_list[jhb_top].ptr = nbr_pj; - Set_End_Index( workspace->hbond_index[j], jhb_top+1, hbonds ); + Set_End_Index( workspace->hbond_index[j], jhb_top + 1, hbonds ); ++num_hbonds; } } /* uncorrected bond orders */ - if( far_nbrs->select.far_nbr_list[pj].d <= control->nbr_cut ) { + if ( far_nbrs->select.far_nbr_list[pj].d <= control->nbr_cut ) + { r2 = SQR(r_ij); - if( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0) { + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0) + { C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 ); BO_s = (1.0 + control->bo_cut) * EXP( C12 ); } else BO_s = C12 = 0.0; - if( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) { + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) + { C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 ); BO_pi = EXP( C34 ); } else BO_pi = C34 = 0.0; - if( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) { - C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); - BO_pi2= EXP( C56 ); + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) + { + C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); + BO_pi2 = EXP( C56 ); } else BO_pi2 = C56 = 0.0; /* Initially BO values are the uncorrected ones, page 1 */ BO = BO_s + BO_pi + BO_pi2; - if( BO >= control->bo_cut ) { + if ( BO >= control->bo_cut ) + { num_bonds += 2; /****** bonds i-j and j-i ******/ ibond = &( bonds->select.bond_list[btop_i] ); @@ -414,7 +463,7 @@ void Init_Forces( reax_system *system, control_params *control, ibond->sym_index = btop_j; jbond->sym_index = btop_i; ++btop_i; - Set_End_Index( j, btop_j+1, bonds ); + Set_End_Index( j, btop_j + 1, bonds ); bo_ij = &( ibond->bo_data ); bo_ji = &( jbond->bo_data ); @@ -428,22 +477,22 @@ void Init_Forces( reax_system *system, control_params *control, Cln_BOp_pi = twbp->p_bo4 * C34 / r2; Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2; - /* Only dln_BOp_xx wrt. dr_i is stored here, note that + /* Only dln_BOp_xx wrt. dr_i is stored here, note that dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ - rvec_Scale(bo_ij->dln_BOp_s,-bo_ij->BO_s*Cln_BOp_s,ibond->dvec); - rvec_Scale(bo_ij->dln_BOp_pi,-bo_ij->BO_pi*Cln_BOp_pi,ibond->dvec); + rvec_Scale(bo_ij->dln_BOp_s, -bo_ij->BO_s * Cln_BOp_s, ibond->dvec); + rvec_Scale(bo_ij->dln_BOp_pi, -bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec); rvec_Scale(bo_ij->dln_BOp_pi2, - -bo_ij->BO_pi2*Cln_BOp_pi2,ibond->dvec); + -bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec); rvec_Scale(bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s); rvec_Scale(bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi ); rvec_Scale(bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 ); - /* Only dBOp wrt. dr_i is stored here, note that + /* Only dBOp wrt. dr_i is stored here, note that dBOp/dr_i = -dBOp/dr_j and all others are 0 */ - rvec_Scale( bo_ij->dBOp, - -(bo_ij->BO_s * Cln_BOp_s + - bo_ij->BO_pi * Cln_BOp_pi + - bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); + rvec_Scale( bo_ij->dBOp, + -(bo_ij->BO_s * Cln_BOp_s + + bo_ij->BO_pi * Cln_BOp_pi + + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp ); rvec_Add( workspace->dDeltap_self[i], bo_ij->dBOp ); @@ -461,79 +510,91 @@ void Init_Forces( reax_system *system, control_params *control, /*fprintf( stderr, "%d %d %g %g %g\n", i+1, j+1, bo_ij->BO, bo_ij->BO_pi, bo_ij->BO_pi2 );*/ - /*fprintf( stderr, "Cln_BOp_s: %f, pbo2: %f, C12:%f\n", + /*fprintf( stderr, "Cln_BOp_s: %f, pbo2: %f, C12:%f\n", Cln_BOp_s, twbp->p_bo2, C12 ); - fprintf( stderr, "Cln_BOp_pi: %f, pbo4: %f, C34:%f\n", + fprintf( stderr, "Cln_BOp_pi: %f, pbo4: %f, C34:%f\n", Cln_BOp_pi, twbp->p_bo4, C34 ); fprintf( stderr, "Cln_BOp_pi2: %f, pbo6: %f, C56:%f\n", Cln_BOp_pi2, twbp->p_bo6, C56 );*/ /*fprintf(stderr, "pbo1: %f, pbo2:%f\n", twbp->p_bo1, twbp->p_bo2); fprintf(stderr, "pbo3: %f, pbo4:%f\n", twbp->p_bo3, twbp->p_bo4); fprintf(stderr, "pbo5: %f, pbo6:%f\n", twbp->p_bo5, twbp->p_bo6); - fprintf( stderr, "r_s: %f, r_p: %f, r_pp: %f\n", + fprintf( stderr, "r_s: %f, r_p: %f, r_pp: %f\n", twbp->r_s, twbp->r_p, twbp->r_pp ); fprintf( stderr, "C12: %g, C34:%g, C56:%g\n", C12, C34, C56 );*/ /*fprintf( stderr, "\tfactors: %g %g %g\n", - -(bo_ij->BO_s * Cln_BOp_s + bo_ij->BO_pi * Cln_BOp_pi + + -(bo_ij->BO_s * Cln_BOp_s + bo_ij->BO_pi * Cln_BOp_pi + bo_ij->BO_pi2 * Cln_BOp_pp), -bo_ij->BO_pi * Cln_BOp_pi, -bo_ij->BO_pi2 * Cln_BOp_pi2 );*/ - /*fprintf( stderr, "dBOpi:\t[%g, %g, %g]\n", + /*fprintf( stderr, "dBOpi:\t[%g, %g, %g]\n", bo_ij->dBOp[0], bo_ij->dBOp[1], bo_ij->dBOp[2] ); - fprintf( stderr, "dBOpi:\t[%g, %g, %g]\n", - bo_ij->dln_BOp_pi[0], bo_ij->dln_BOp_pi[1], + fprintf( stderr, "dBOpi:\t[%g, %g, %g]\n", + bo_ij->dln_BOp_pi[0], bo_ij->dln_BOp_pi[1], bo_ij->dln_BOp_pi[2] ); fprintf( stderr, "dBOpi2:\t[%g, %g, %g]\n\n", - bo_ij->dln_BOp_pi2[0], bo_ij->dln_BOp_pi2[1], + bo_ij->dln_BOp_pi2[0], bo_ij->dln_BOp_pi2[1], bo_ij->dln_BOp_pi2[2] );*/ - Set_End_Index( j, btop_j+1, bonds ); + Set_End_Index( j, btop_j + 1, bonds ); } } } } - H->entries[Htop].j = i; - H->entries[Htop].val = system->reaxprm.sbp[type_i].eta; + /* diagonal entry */ + H->j[Htop] = i; + H->val[Htop] = system->reaxprm.sbp[type_i].eta; ++Htop; + /* diagonal entry */ + H_sp->j[H_sp_top] = i; + H_sp->val[H_sp_top] = H->val[Htop - 1]; + ++H_sp_top; + Set_End_Index( i, btop_i, bonds ); - if( ihb == 1 ) + if ( ihb == 1 ) Set_End_Index( workspace->hbond_index[i], ihb_top, hbonds ); - //fprintf( stderr, "%d bonds start: %d, end: %d\n", + //fprintf( stderr, "%d bonds start: %d, end: %d\n", // i, Start_Index( i, bonds ), End_Index( i, bonds ) ); } +// printf("Htop = %d\n", Htop); +// printf("H_sp_top = %d\n", H_sp_top); + // mark the end of j list - H->start[i] = Htop; + H->start[i] = Htop; + H_sp->start[i] = H_sp_top; /* validate lists - decide if reallocation is required! */ - Validate_Lists( workspace, lists, - data->step, system->N, H->m, Htop, num_bonds, num_hbonds ); + Validate_Lists( workspace, lists, + data->step, system->N, H->m, Htop, num_bonds, num_hbonds ); #if defined(DEBUG_FOCUS) - fprintf( stderr, "step%d: Htop = %d, num_bonds = %d, num_hbonds = %d\n", - data->step, Htop, num_bonds, num_hbonds ); + fprintf( stderr, "step%d: Htop = %d, num_bonds = %d, num_hbonds = %d\n", + data->step, Htop, num_bonds, num_hbonds ); + #endif } -void Init_Forces_Tab( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) { +void Init_Forces_Tab( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, + list **lists, output_controls *out_control ) +{ int i, j, pj; int start_i, end_i; int type_i, type_j; - int Htop, btop_i, btop_j, num_bonds, num_hbonds; + int Htop, H_sp_top, btop_i, btop_j, num_bonds, num_hbonds; int tmin, tmax, r; int ihb, jhb, ihb_top, jhb_top; - int flag; + int flag, flag_sp; real r_ij, r2, self_coef; real val, dif, base; real C12, C34, C56; real Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2; real BO, BO_s, BO_pi, BO_pi2; - real p_boc1, p_boc2; - sparse_matrix *H; + real p_boc1, p_boc2; + sparse_matrix *H, *H_sp; list *far_nbrs, *bonds, *hbonds; single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; @@ -547,44 +608,67 @@ void Init_Forces_Tab( reax_system *system, control_params *control, bonds = *lists + BONDS; hbonds = *lists + HBONDS; - H = &workspace->H; + H = workspace->H; + H_sp = workspace->H_sp; Htop = 0; + H_sp_top = 0; num_bonds = 0; num_hbonds = 0; btop_i = btop_j = 0; p_boc1 = system->reaxprm.gp.l[0]; p_boc2 = system->reaxprm.gp.l[1]; - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { atom_i = &(system->atoms[i]); type_i = atom_i->type; start_i = Start_Index(i, far_nbrs); end_i = End_Index(i, far_nbrs); H->start[i] = Htop; + H_sp->start[i] = H_sp_top; btop_i = End_Index( i, bonds ); sbp_i = &(system->reaxprm.sbp[type_i]); ihb = ihb_top = -1; - if( control->hb_cut > 0 && (ihb=sbp_i->p_hbond) == 1 ) + if ( control->hb_cut > 0 && (ihb = sbp_i->p_hbond) == 1 ) ihb_top = End_Index( workspace->hbond_index[i], hbonds ); - for( pj = start_i; pj < end_i; ++pj ) { + for ( pj = start_i; pj < end_i; ++pj ) + { nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); j = nbr_pj->nbr; atom_j = &(system->atoms[j]); flag = 0; - if((data->step-data->prev_steps) % control->reneighbor == 0) { - if(nbr_pj->d <= control->r_cut) + flag_sp = 0; + if ((data->step - data->prev_steps) % control->reneighbor == 0) + { + if (nbr_pj->d <= control->r_cut) + { flag = 1; - else flag = 0; + if ( nbr_pj->d <= control->r_sp_cut ) + { + flag_sp = 1; + } + } + else + { + flag = 0; + flag_sp = 0; + } } - else if((nbr_pj->d=Sq_Distance_on_T3(atom_i->x,atom_j->x,&(system->box), - nbr_pj->dvec))<=SQR(control->r_cut)){ + else if ((nbr_pj->d = Sq_Distance_on_T3(atom_i->x, atom_j->x, &(system->box), + nbr_pj->dvec)) <= SQR(control->r_cut)) + { + if ( nbr_pj->d <= SQR(control->r_sp_cut)) + { + flag_sp = 1; + } nbr_pj->d = sqrt(nbr_pj->d); flag = 1; } - if( flag ){ + if ( flag ) + { type_j = system->atoms[j].type; r_ij = nbr_pj->d; sbp_j = &(system->reaxprm.sbp[type_j]); @@ -596,65 +680,81 @@ void Init_Forces_Tab( reax_system *system, control_params *control, /* cubic spline interpolation */ r = (int)(r_ij * t->inv_dx); - if( r == 0 ) ++r; - base = (real)(r+1) * t->dx; + if ( r == 0 ) ++r; + base = (real)(r + 1) * t->dx; dif = r_ij - base; - val = ((t->ele[r].d*dif + t->ele[r].c)*dif + t->ele[r].b)*dif + - t->ele[r].a; + val = ((t->ele[r].d * dif + t->ele[r].c) * dif + t->ele[r].b) * dif + + t->ele[r].a; val *= EV_to_KCALpMOL / C_ele; - H->entries[Htop].j = j; - H->entries[Htop].val = self_coef * val; + H->j[Htop] = j; + H->val[Htop] = self_coef * val; ++Htop; - /* hydrogen bond lists */ - if( control->hb_cut > 0 && (ihb==1 || ihb==2) && - nbr_pj->d <= control->hb_cut ) { + /* H_sp matrix entry */ + if ( flag_sp ) + { + H_sp->j[H_sp_top] = j; + H_sp->val[H_sp_top] = H->val[Htop - 1]; + ++H_sp_top; + } + + /* hydrogen bond lists */ + if ( control->hb_cut > 0 && (ihb == 1 || ihb == 2) && + nbr_pj->d <= control->hb_cut ) + { // fprintf( stderr, "%d %d\n", atom1, atom2 ); jhb = sbp_j->p_hbond; - if( ihb == 1 && jhb == 2 ) { + if ( ihb == 1 && jhb == 2 ) + { hbonds->select.hbond_list[ihb_top].nbr = j; hbonds->select.hbond_list[ihb_top].scl = 1; hbonds->select.hbond_list[ihb_top].ptr = nbr_pj; ++ihb_top; ++num_hbonds; } - else if( ihb == 2 && jhb == 1 ) { + else if ( ihb == 2 && jhb == 1 ) + { jhb_top = End_Index( workspace->hbond_index[j], hbonds ); hbonds->select.hbond_list[jhb_top].nbr = i; hbonds->select.hbond_list[jhb_top].scl = -1; hbonds->select.hbond_list[jhb_top].ptr = nbr_pj; - Set_End_Index( workspace->hbond_index[j], jhb_top+1, hbonds ); + Set_End_Index( workspace->hbond_index[j], jhb_top + 1, hbonds ); ++num_hbonds; } } /* uncorrected bond orders */ - if( far_nbrs->select.far_nbr_list[pj].d <= control->nbr_cut ) { + if ( far_nbrs->select.far_nbr_list[pj].d <= control->nbr_cut ) + { r2 = SQR(r_ij); - if( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0) { + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0) + { C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 ); BO_s = (1.0 + control->bo_cut) * EXP( C12 ); } else BO_s = C12 = 0.0; - if( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) { + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) + { C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 ); BO_pi = EXP( C34 ); } else BO_pi = C34 = 0.0; - if( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) { - C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); - BO_pi2= EXP( C56 ); + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) + { + C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); + BO_pi2 = EXP( C56 ); } else BO_pi2 = C56 = 0.0; /* Initially BO values are the uncorrected ones, page 1 */ BO = BO_s + BO_pi + BO_pi2; - if( BO >= control->bo_cut ) { + if ( BO >= control->bo_cut ) + { num_bonds += 2; /****** bonds i-j and j-i ******/ ibond = &( bonds->select.bond_list[btop_i] ); @@ -666,6 +766,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, ibond->d = r_ij; jbond->d = r_ij; rvec_Copy( ibond->dvec, nbr_pj->dvec ); + //fprintf (stderr, " %f - %f - %f \n", nbr_pj->dvec[0], nbr_pj->dvec[1], nbr_pj->dvec[2]); rvec_Scale( jbond->dvec, -1, nbr_pj->dvec ); ivec_Copy( ibond->rel_box, nbr_pj->rel_box ); ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box ); @@ -674,7 +775,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, ibond->sym_index = btop_j; jbond->sym_index = btop_i; ++btop_i; - Set_End_Index( j, btop_j+1, bonds ); + Set_End_Index( j, btop_j + 1, bonds ); bo_ij = &( ibond->bo_data ); bo_ji = &( jbond->bo_data ); @@ -688,22 +789,22 @@ void Init_Forces_Tab( reax_system *system, control_params *control, Cln_BOp_pi = twbp->p_bo4 * C34 / r2; Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2; - /* Only dln_BOp_xx wrt. dr_i is stored here, note that + /* Only dln_BOp_xx wrt. dr_i is stored here, note that dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ - rvec_Scale(bo_ij->dln_BOp_s,-bo_ij->BO_s*Cln_BOp_s,ibond->dvec); - rvec_Scale(bo_ij->dln_BOp_pi,-bo_ij->BO_pi*Cln_BOp_pi,ibond->dvec); + rvec_Scale(bo_ij->dln_BOp_s, -bo_ij->BO_s * Cln_BOp_s, ibond->dvec); + rvec_Scale(bo_ij->dln_BOp_pi, -bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec); rvec_Scale(bo_ij->dln_BOp_pi2, - -bo_ij->BO_pi2*Cln_BOp_pi2,ibond->dvec); + -bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec); rvec_Scale(bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s); rvec_Scale(bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi ); rvec_Scale(bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 ); - /* Only dBOp wrt. dr_i is stored here, note that + /* Only dBOp wrt. dr_i is stored here, note that dBOp/dr_i = -dBOp/dr_j and all others are 0 */ - rvec_Scale( bo_ij->dBOp, - -(bo_ij->BO_s * Cln_BOp_s + - bo_ij->BO_pi * Cln_BOp_pi + - bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); + rvec_Scale( bo_ij->dBOp, + -(bo_ij->BO_s * Cln_BOp_s + + bo_ij->BO_pi * Cln_BOp_pi + + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp ); rvec_Add( workspace->dDeltap_self[i], bo_ij->dBOp ); @@ -718,30 +819,37 @@ void Init_Forces_Tab( reax_system *system, control_params *control, bo_ij->Cdbo = bo_ij->Cdbopi = bo_ij->Cdbopi2 = 0.0; bo_ji->Cdbo = bo_ji->Cdbopi = bo_ji->Cdbopi2 = 0.0; - Set_End_Index( j, btop_j+1, bonds ); + Set_End_Index( j, btop_j + 1, bonds ); } } } } - H->entries[Htop].j = i; - H->entries[Htop].val = system->reaxprm.sbp[type_i].eta; + /* diagonal entry */ + H->j[Htop] = i; + H->val[Htop] = system->reaxprm.sbp[type_i].eta; ++Htop; + /* diagonal entry */ + H_sp->j[H_sp_top] = i; + H_sp->val[H_sp_top] = H->val[Htop - 1]; + ++H_sp_top; + Set_End_Index( i, btop_i, bonds ); - if( ihb == 1 ) + if ( ihb == 1 ) Set_End_Index( workspace->hbond_index[i], ihb_top, hbonds ); } // mark the end of j list - H->start[i] = Htop; + H->start[i] = Htop; + H_sp->start[i] = H_sp_top; /* validate lists - decide if reallocation is required! */ - Validate_Lists( workspace, lists, - data->step, system->N, H->m, Htop, num_bonds, num_hbonds ); + Validate_Lists( workspace, lists, + data->step, system->N, H->m, Htop, num_bonds, num_hbonds ); #if defined(DEBUG_FOCUS) - fprintf( stderr, "step%d: Htop = %d, num_bonds = %d, num_hbonds = %d\n", - data->step, Htop, num_bonds, num_hbonds ); + fprintf( stderr, "step%d: Htop = %d, num_bonds = %d, num_hbonds = %d\n", + data->step, Htop, num_bonds, num_hbonds ); //Print_Bonds( system, bonds, "sbonds.out" ); //Print_Bond_List2( system, bonds, "sbonds.out" ); //Print_Sparse_Matrix2( H, "H.out" ); @@ -749,9 +857,10 @@ void Init_Forces_Tab( reax_system *system, control_params *control, } -void Estimate_Storage_Sizes( reax_system *system, control_params *control, - list **lists, int *Htop, int *hb_top, - int *bond_top, int *num_3body ) { +void Estimate_Storage_Sizes( reax_system *system, control_params *control, + list **lists, int *Htop, int *hb_top, + int *bond_top, int *num_3body ) +{ int i, j, pj; int start_i, end_i; int type_i, type_j; @@ -759,7 +868,7 @@ void Estimate_Storage_Sizes( reax_system *system, control_params *control, real r_ij, r2; real C12, C34, C56; real BO, BO_s, BO_pi, BO_pi2; - real p_boc1, p_boc2; + real p_boc1, p_boc2; list *far_nbrs; single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; @@ -770,7 +879,8 @@ void Estimate_Storage_Sizes( reax_system *system, control_params *control, p_boc1 = system->reaxprm.gp.l[0]; p_boc2 = system->reaxprm.gp.l[1]; - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { atom_i = &(system->atoms[i]); type_i = atom_i->type; start_i = Start_Index(i, far_nbrs); @@ -778,7 +888,8 @@ void Estimate_Storage_Sizes( reax_system *system, control_params *control, sbp_i = &(system->reaxprm.sbp[type_i]); ihb = sbp_i->p_hbond; - for( pj = start_i; pj < end_i; ++pj ) { + for ( pj = start_i; pj < end_i; ++pj ) + { nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); j = nbr_pj->nbr; atom_j = &(system->atoms[j]); @@ -786,46 +897,53 @@ void Estimate_Storage_Sizes( reax_system *system, control_params *control, sbp_j = &(system->reaxprm.sbp[type_j]); twbp = &(system->reaxprm.tbp[ index_tbp(type_i,type_j,system->reaxprm.num_atom_types) ]); - if( nbr_pj->d <= control->r_cut ) { + if ( nbr_pj->d <= control->r_cut ) + { ++(*Htop); - /* hydrogen bond lists */ - if( control->hb_cut > 0.1 && (ihb==1 || ihb==2) && - nbr_pj->d <= control->hb_cut ) { + /* hydrogen bond lists */ + if ( control->hb_cut > 0.1 && (ihb == 1 || ihb == 2) && + nbr_pj->d <= control->hb_cut ) + { jhb = sbp_j->p_hbond; - if( ihb == 1 && jhb == 2 ) + if ( ihb == 1 && jhb == 2 ) ++hb_top[i]; - else if( ihb == 2 && jhb == 1 ) + else if ( ihb == 2 && jhb == 1 ) ++hb_top[j]; } /* uncorrected bond orders */ - if( nbr_pj->d <= control->nbr_cut ) { + if ( nbr_pj->d <= control->nbr_cut ) + { r_ij = nbr_pj->d; r2 = SQR(r_ij); - if( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0) { + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0) + { C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 ); BO_s = (1.0 + control->bo_cut) * EXP( C12 ); } else BO_s = C12 = 0.0; - if( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) { + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) + { C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 ); BO_pi = EXP( C34 ); } else BO_pi = C34 = 0.0; - if( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) { - C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); - BO_pi2= EXP( C56 ); + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) + { + C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); + BO_pi2 = EXP( C56 ); } else BO_pi2 = C56 = 0.0; /* Initially BO values are the uncorrected ones, page 1 */ BO = BO_s + BO_pi + BO_pi2; - if( BO >= control->bo_cut ) { + if ( BO >= control->bo_cut ) + { ++bond_top[i]; ++bond_top[j]; } @@ -836,8 +954,8 @@ void Estimate_Storage_Sizes( reax_system *system, control_params *control, *Htop += system->N; *Htop *= SAFE_ZONE; - - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { hb_top[i] = MAX( hb_top[i] * SAFE_HBONDS, MIN_HBONDS ); *num_3body += SQR(bond_top[i]); bond_top[i] = MAX( bond_top[i] * 2, MIN_BONDS ); @@ -846,49 +964,40 @@ void Estimate_Storage_Sizes( reax_system *system, control_params *control, } -void Compute_Forces( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list** lists, output_controls *out_control ) +void Compute_Forces( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, + list** lists, output_controls *out_control ) { real t_start, t_elapsed; t_start = Get_Time( ); - if( !control->tabulate ) + if ( !control->tabulate ) + { Init_Forces( system, control, data, workspace, lists, out_control ); - else Init_Forces_Tab( system, control, data, workspace, lists, out_control ); + } + else + { + Init_Forces_Tab( system, control, data, workspace, lists, out_control ); + } t_elapsed = Get_Timing_Info( t_start ); data->timing.init_forces += t_elapsed; - #if defined(DEBUG_FOCUS) - print_sparse_matrix (system, workspace); fprintf( stderr, "init_forces - "); #endif - - //analyze_hbonds (system, workspace, lists); - t_start = Get_Time( ); Compute_Bonded_Forces( system, control, data, workspace, lists, out_control ); t_elapsed = Get_Timing_Info( t_start ); data->timing.bonded += t_elapsed; - - //print_bond_list (system, workspace, lists); - //exit (0); - -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "bonded_forces - "); #endif t_start = Get_Time( ); - Compute_NonBonded_Forces( system, control, data, workspace, - lists, out_control ); + Compute_NonBonded_Forces( system, control, data, workspace, + lists, out_control ); t_elapsed = Get_Timing_Info( t_start ); data->timing.nonb += t_elapsed; - -#ifdef __DEBUG_CUDA__ - fprintf( stderr, "non_bonded_forces - %lf \n", t_elapsed); -#endif - #if defined(DEBUG_FOCUS) fprintf( stderr, "nonbondeds - "); #endif @@ -904,7 +1013,7 @@ void Compute_Forces( reax_system *system, control_params *control, Print_Total_Force( system, control, data, workspace, lists, out_control ); Compare_Total_Forces( system, control, data, workspace, lists, out_control ); #endif -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "forces - "); #endif } diff --git a/PuReMD-GPU/src/forces.h b/PuReMD-GPU/src/forces.h index 73323f0419baf383d6bf671158ef85584a710728..0ef8b117c78de5e7b3cdc2311b4a492b4615a859 100644 --- a/PuReMD-GPU/src/forces.h +++ b/PuReMD-GPU/src/forces.h @@ -23,12 +23,14 @@ #include "mytypes.h" + void Init_Bonded_Force_Functions( control_params* ); void Compute_Forces( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); void Estimate_Storage_Sizes( reax_system*, control_params*, list**, - int*, int*, int*, int* ); + int*, int*, int*, int* ); + #endif diff --git a/PuReMD-GPU/src/four_body_interactions.c b/PuReMD-GPU/src/four_body_interactions.c index c51601fa991203a77ec4840c10e74e15cfa42c87..25642871d23e389a96db943607f432aa68252a02 100644 --- a/PuReMD-GPU/src/four_body_interactions.c +++ b/PuReMD-GPU/src/four_body_interactions.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -22,20 +23,21 @@ #include "bond_orders.h" #include "box.h" +#include "index_utils.h" #include "list.h" #include "lookup.h" #include "vector.h" #include "math.h" -#include "index_utils.h" +#define MIN_SINE 1e-10 real Calculate_Omega( rvec dvec_ij, real r_ij, rvec dvec_jk, real r_jk, - rvec dvec_kl, real r_kl, rvec dvec_li, real r_li, - three_body_interaction_data *p_ijk, - three_body_interaction_data *p_jkl, - rvec dcos_omega_di, rvec dcos_omega_dj, - rvec dcos_omega_dk, rvec dcos_omega_dl, - output_controls *out_control ) + rvec dvec_kl, real r_kl, rvec dvec_li, real r_li, + three_body_interaction_data *p_ijk, + three_body_interaction_data *p_jkl, + rvec dcos_omega_di, rvec dcos_omega_dj, + rvec dcos_omega_dk, rvec dcos_omega_dl, + output_controls *out_control ) { real unnorm_cos_omega, unnorm_sin_omega, omega; real sin_ijk, cos_ijk, sin_jkl, cos_jkl; @@ -49,11 +51,11 @@ real Calculate_Omega( rvec dvec_ij, real r_ij, rvec dvec_jk, real r_jk, cos_jkl = COS( p_jkl->theta ); /* omega */ - unnorm_cos_omega = -rvec_Dot( dvec_ij,dvec_jk )*rvec_Dot( dvec_jk,dvec_kl ) + - SQR( r_jk ) * rvec_Dot( dvec_ij,dvec_kl ); + unnorm_cos_omega = -rvec_Dot( dvec_ij, dvec_jk ) * rvec_Dot( dvec_jk, dvec_kl ) + + SQR( r_jk ) * rvec_Dot( dvec_ij, dvec_kl ); rvec_Cross( cross_jk_kl, dvec_jk, dvec_kl ); unnorm_sin_omega = -r_jk * rvec_Dot( dvec_ij, cross_jk_kl ); - omega = atan2( unnorm_sin_omega, unnorm_cos_omega ); + omega = atan2( unnorm_sin_omega, unnorm_cos_omega ); /* derivatives */ /* coef for adjusments to cos_theta's */ @@ -70,24 +72,25 @@ real Calculate_Omega( rvec dvec_ij, real r_ij, rvec dvec_jk, real r_jk, hnhd = r_ij * r_kl * cos_ijk * sin_jkl; hnhe = r_ij * r_kl * sin_ijk * cos_jkl; + poem = 2.0 * r_ij * r_kl * sin_ijk * sin_jkl; - if( poem < 1e-20 ) poem = 1e-20; + if ( poem < 1e-20 ) poem = 1e-20; - tel = (SQR(r_ij) + SQR(r_jk) + SQR(r_kl) - SQR(r_li)) - - 2.0 * ( r_ij * r_jk * cos_ijk - r_ij * r_kl * cos_ijk * cos_jkl + - r_jk * r_kl * cos_jkl ); + tel = (SQR(r_ij) + SQR(r_jk) + SQR(r_kl) - SQR(r_li)) - + 2.0 * ( r_ij * r_jk * cos_ijk - r_ij * r_kl * cos_ijk * cos_jkl + + r_jk * r_kl * cos_jkl ); arg = tel / poem; - if( arg > 1.0 ) + if ( arg > 1.0 ) { arg = 1.0; } - if( arg < -1.0 ) + if ( arg < -1.0 ) { arg = -1.0; } - /*fprintf( out_control->etor, + /*fprintf( out_control->etor, "%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e\n", htra, htrb, htrc, hthd, hthe, hnra, hnrc, hnhd, hnhe ); fprintf( out_control->etor, "%23.15e%23.15e%23.15e\n", @@ -99,69 +102,72 @@ real Calculate_Omega( rvec dvec_ij, real r_ij, rvec dvec_jk, real r_jk, fprintf( out_control->etor, "%23.15e%23.15e%23.15e%23.15e\n", r_li, dvec_li[0], dvec_li[1], dvec_li[2] ); fprintf( out_control->etor, "%23.15e%23.15e%23.15e%23.15e\n", - r_ij, r_jk, r_kl, r_li ); - fprintf( out_control->etor, "%23.15e%23.15e%23.15e%23.15e\n", - cos_ijk, cos_jkl, sin_ijk, sin_jkl ); + r_ij, r_jk, r_kl, r_li ); + fprintf( out_control->etor, "%23.15e%23.15e%23.15e%23.15e\n", + cos_ijk, cos_jkl, sin_ijk, sin_jkl ); fprintf( out_control->etor, "%23.15e%23.15e%23.15e\n", poem, tel, arg );*/ /* fprintf( out_control->etor, "%23.15e%23.15e%23.15e\n", - -p_ijk->dcos_dk[0]/sin_ijk, - -p_ijk->dcos_dk[1]/sin_ijk, + -p_ijk->dcos_dk[0]/sin_ijk, + -p_ijk->dcos_dk[1]/sin_ijk, -p_ijk->dcos_dk[2]/sin_ijk ); fprintf( out_control->etor, "%23.15e%23.15e%23.15e\n", - -p_jkl->dcos_dk[0]/sin_jkl, - -p_jkl->dcos_dk[1]/sin_jkl, + -p_jkl->dcos_dk[0]/sin_jkl, + -p_jkl->dcos_dk[1]/sin_jkl, -p_jkl->dcos_dk[2]/sin_jkl );*/ - if( sin_ijk >= 0 && sin_ijk <= MIN_SINE ) + if ( sin_ijk >= 0 && sin_ijk <= MIN_SINE ) { sin_ijk = MIN_SINE; } - else if( sin_ijk <= 0 && sin_ijk >= -MIN_SINE ) + else if ( sin_ijk <= 0 && sin_ijk >= -MIN_SINE ) { sin_ijk = -MIN_SINE; } - if( sin_jkl >= 0 && sin_jkl <= MIN_SINE ) + if ( sin_jkl >= 0 && sin_jkl <= MIN_SINE ) { sin_jkl = MIN_SINE; } - else if( sin_jkl <= 0 && sin_jkl >= -MIN_SINE ) + else if ( sin_jkl <= 0 && sin_jkl >= -MIN_SINE ) { sin_jkl = -MIN_SINE; } // dcos_omega_di - rvec_ScaledSum( dcos_omega_di, (htra-arg*hnra)/r_ij, dvec_ij, -1., dvec_li ); - rvec_ScaledAdd( dcos_omega_di,-(hthd - arg*hnhd)/sin_ijk, p_ijk->dcos_dk ); + rvec_ScaledSum( dcos_omega_di, (htra - arg * hnra) / r_ij, dvec_ij, -1., dvec_li ); + rvec_ScaledAdd( dcos_omega_di, -(hthd - arg * hnhd) / sin_ijk, p_ijk->dcos_dk ); rvec_Scale( dcos_omega_di, 2.0 / poem, dcos_omega_di ); // dcos_omega_dj - rvec_ScaledSum( dcos_omega_dj,-(htra-arg*hnra)/r_ij, dvec_ij, - -htrb / r_jk, dvec_jk ); - rvec_ScaledAdd( dcos_omega_dj,-(hthd-arg*hnhd) / sin_ijk, p_ijk->dcos_dj ); - rvec_ScaledAdd( dcos_omega_dj,-(hthe-arg*hnhe) / sin_jkl, p_jkl->dcos_di ); + rvec_ScaledSum( dcos_omega_dj, -(htra - arg * hnra) / r_ij, dvec_ij, + -htrb / r_jk, dvec_jk ); + rvec_ScaledAdd( dcos_omega_dj, -(hthd - arg * hnhd) / sin_ijk, p_ijk->dcos_dj ); + rvec_ScaledAdd( dcos_omega_dj, -(hthe - arg * hnhe) / sin_jkl, p_jkl->dcos_di ); rvec_Scale( dcos_omega_dj, 2.0 / poem, dcos_omega_dj ); // dcos_omega_dk - rvec_ScaledSum( dcos_omega_dk,-(htrc-arg*hnrc) / r_kl, dvec_kl, - htrb / r_jk, dvec_jk ); - rvec_ScaledAdd( dcos_omega_dk,-(hthd-arg*hnhd) / sin_ijk, p_ijk->dcos_di ); - rvec_ScaledAdd( dcos_omega_dk,-(hthe-arg*hnhe) / sin_jkl, p_jkl->dcos_dj ); + rvec_ScaledSum( dcos_omega_dk, -(htrc - arg * hnrc) / r_kl, dvec_kl, + htrb / r_jk, dvec_jk ); + rvec_ScaledAdd( dcos_omega_dk, -(hthd - arg * hnhd) / sin_ijk, p_ijk->dcos_di ); + rvec_ScaledAdd( dcos_omega_dk, -(hthe - arg * hnhe) / sin_jkl, p_jkl->dcos_dj ); rvec_Scale( dcos_omega_dk, 2.0 / poem, dcos_omega_dk ); // dcos_omega_dl - rvec_ScaledSum( dcos_omega_dl, (htrc-arg*hnrc) / r_kl, dvec_kl, 1., dvec_li ); - rvec_ScaledAdd( dcos_omega_dl,-(hthe-arg*hnhe) / sin_jkl, p_jkl->dcos_dk ); + rvec_ScaledSum( dcos_omega_dl, (htrc - arg * hnrc) / r_kl, dvec_kl, 1., dvec_li ); + rvec_ScaledAdd( dcos_omega_dl, -(hthe - arg * hnhe) / sin_jkl, p_jkl->dcos_dk ); rvec_Scale( dcos_omega_dl, 2.0 / poem, dcos_omega_dl ); - return omega; + return omega; //return arg; } -void Four_Body_Interactions( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) + + + +void Four_Body_Interactions( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, + list **lists, output_controls *out_control ) { int i, j, k, l, pi, pj, pk, pl, pij, plk; int type_i, type_j, type_k, type_l; @@ -212,31 +218,35 @@ void Four_Body_Interactions( reax_system *system, control_params *control, list *thb_intrs = (*lists) + THREE_BODIES; - for( j = 0; j < system->N; ++j ) { + for ( j = 0; j < system->N; ++j ) + { type_j = system->atoms[j].type; Delta_j = workspace->Delta_boc[j]; start_j = Start_Index(j, bonds); end_j = End_Index(j, bonds); - for( pk = start_j; pk < end_j; ++pk ) { + for ( pk = start_j; pk < end_j; ++pk ) + { pbond_jk = &( bonds->select.bond_list[pk] ); k = pbond_jk->nbr; bo_jk = &( pbond_jk->bo_data ); BOA_jk = bo_jk->BO - control->thb_cut; /* see if there are any 3-body interactions involving j&k - where j is the central atom. Otherwise there is no point in - trying to form a 4-body interaction out of this neighborhood */ - if( j < k && bo_jk->BO > control->thb_cut/*0*/ && - Num_Entries(pk, thb_intrs) ) { + where j is the central atom. Otherwise there is no point in + trying to form a 4-body interaction out of this neighborhood */ + if ( j < k && bo_jk->BO > control->thb_cut/*0*/ && + Num_Entries(pk, thb_intrs) ) + { start_k = Start_Index(k, bonds); - end_k = End_Index(k, bonds); + end_k = End_Index(k, bonds); pj = pbond_jk->sym_index; // pj points to j on k's list - /* do the same check as above: are there any 3-body interactions + /* do the same check as above: are there any 3-body interactions involving k&j where k is the central atom */ - if( Num_Entries(pj, thb_intrs) ) { + if ( Num_Entries(pj, thb_intrs) ) + { type_k = system->atoms[k].type; Delta_k = workspace->Delta_boc[k]; r_jk = pbond_jk->d; @@ -244,7 +254,7 @@ void Four_Body_Interactions( reax_system *system, control_params *control, start_pk = Start_Index(pk, thb_intrs ); end_pk = End_Index(pk, thb_intrs ); start_pj = Start_Index(pj, thb_intrs ); - end_pj = End_Index(pj, thb_intrs ); + end_pj = End_Index(pj, thb_intrs ); exp_tor2_jk = EXP( -p_tor2 * BOA_jk ); exp_cot2_jk = EXP( -p_cot2 * SQR(BOA_jk - 1.5) ); @@ -255,14 +265,16 @@ void Four_Body_Interactions( reax_system *system, control_params *control, /* pick i up from j-k interaction where j is the centre atom */ - for( pi = start_pk; pi < end_pk; ++pi ) { + for ( pi = start_pk; pi < end_pk; ++pi ) + { p_ijk = &( thb_intrs->select.three_body_list[pi] ); pij = p_ijk->pthb; // pij is pointer to i on j's bond_list pbond_ij = &( bonds->select.bond_list[pij] ); bo_ij = &( pbond_ij->bo_data ); - if( bo_ij->BO > control->thb_cut/*0*/ ) { + if ( bo_ij->BO > control->thb_cut/*0*/ ) + { i = p_ijk->thb; type_i = system->atoms[i].type; r_ij = pbond_ij->d; @@ -272,17 +284,18 @@ void Four_Body_Interactions( reax_system *system, control_params *control, sin_ijk = SIN( theta_ijk ); cos_ijk = COS( theta_ijk ); //tan_ijk_i = 1. / TAN( theta_ijk ); - if( sin_ijk >= 0 && sin_ijk <= MIN_SINE ) + if ( sin_ijk >= 0 && sin_ijk <= MIN_SINE ) tan_ijk_i = cos_ijk / MIN_SINE; - else if( sin_ijk <= 0 && sin_ijk >= -MIN_SINE ) + else if ( sin_ijk <= 0 && sin_ijk >= -MIN_SINE ) tan_ijk_i = cos_ijk / -MIN_SINE; else tan_ijk_i = cos_ijk / sin_ijk; exp_tor2_ij = EXP( -p_tor2 * BOA_ij ); - exp_cot2_ij = EXP( -p_cot2 * SQR(BOA_ij -1.5) ); + exp_cot2_ij = EXP( -p_cot2 * SQR(BOA_ij - 1.5) ); /* pick l up from j-k intr. where k is the centre */ - for( pl = start_pj; pl < end_pj; ++pl ) { + for ( pl = start_pj; pl < end_pj; ++pl ) + { p_jkl = &( thb_intrs->select.three_body_list[pl] ); l = p_jkl->thb; plk = p_jkl->pthb; //pointer to l on k's bond_list! @@ -292,8 +305,9 @@ void Four_Body_Interactions( reax_system *system, control_params *control, fbh = &(system->reaxprm.fbp[ index_fbp(type_i,type_j,type_k,type_l,system->reaxprm.num_atom_types ) ]); fbp = &(system->reaxprm.fbp[ index_fbp(type_i,type_j,type_k,type_l,system->reaxprm.num_atom_types )].prm[0]); - if( i != l && fbh->cnt && bo_kl->BO > control->thb_cut/*0*/ && - bo_ij->BO * bo_jk->BO * bo_kl->BO > control->thb_cut/*0*/ ){ + if ( i != l && fbh->cnt && bo_kl->BO > control->thb_cut/*0*/ && + bo_ij->BO * bo_jk->BO * bo_kl->BO > control->thb_cut/*0*/ ) + { ++num_frb_intrs; r_kl = pbond_kl->d; BOA_kl = bo_kl->BO - control->thb_cut; @@ -302,77 +316,77 @@ void Four_Body_Interactions( reax_system *system, control_params *control, sin_jkl = SIN( theta_jkl ); cos_jkl = COS( theta_jkl ); //tan_jkl_i = 1. / TAN( theta_jkl ); - if( sin_jkl >= 0 && sin_jkl <= MIN_SINE ) + if ( sin_jkl >= 0 && sin_jkl <= MIN_SINE ) tan_jkl_i = cos_jkl / MIN_SINE; - else if( sin_jkl <= 0 && sin_jkl >= -MIN_SINE ) + else if ( sin_jkl <= 0 && sin_jkl >= -MIN_SINE ) tan_jkl_i = cos_jkl / -MIN_SINE; - else tan_jkl_i = cos_jkl /sin_jkl; + else tan_jkl_i = cos_jkl / sin_jkl; - Sq_Distance_on_T3( system->atoms[l].x, system->atoms[i].x, - &(system->box), dvec_li ); + Sq_Distance_on_T3( system->atoms[l].x, system->atoms[i].x, + &(system->box), dvec_li ); r_li = rvec_Norm( dvec_li ); /* omega and its derivative */ - //cos_omega=Calculate_Omega(pbond_ij->dvec,r_ij,pbond_jk->dvec, - omega = Calculate_Omega(pbond_ij->dvec, r_ij, pbond_jk->dvec, - r_jk, pbond_kl->dvec, r_kl, - dvec_li, r_li, p_ijk, p_jkl, - dcos_omega_di, dcos_omega_dj, - dcos_omega_dk, dcos_omega_dl, - out_control); + //cos_omega=Calculate_Omega(pbond_ij->dvec,r_ij,pbond_jk->dvec, + omega = Calculate_Omega(pbond_ij->dvec, r_ij, pbond_jk->dvec, + r_jk, pbond_kl->dvec, r_kl, + dvec_li, r_li, p_ijk, p_jkl, + dcos_omega_di, dcos_omega_dj, + dcos_omega_dk, dcos_omega_dl, + out_control); cos_omega = COS( omega ); cos2omega = COS( 2. * omega ); cos3omega = COS( 3. * omega ); /* end omega calculations */ /* torsion energy */ - exp_tor1 = EXP(fbp->p_tor1 * SQR(2.-bo_jk->BO_pi-f11_DjDk)); + exp_tor1 = EXP(fbp->p_tor1 * SQR(2. - bo_jk->BO_pi - f11_DjDk)); exp_tor2_kl = EXP( -p_tor2 * BOA_kl ); - exp_cot2_kl = EXP( -p_cot2 * SQR(BOA_kl-1.5) ); - fn10 = (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) * - (1.0 - exp_tor2_kl); - - CV = 0.5 * ( fbp->V1 * (1.0 + cos_omega) + - fbp->V2 * exp_tor1 * (1.0 - cos2omega) + - fbp->V3 * (1.0 + cos3omega) ); - //CV = 0.5 * fbp->V1 * (1.0 + cos_omega) + + exp_cot2_kl = EXP( -p_cot2 * SQR(BOA_kl - 1.5) ); + fn10 = (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) * + (1.0 - exp_tor2_kl); + + CV = 0.5 * ( fbp->V1 * (1.0 + cos_omega) + + fbp->V2 * exp_tor1 * (1.0 - cos2omega) + + fbp->V3 * (1.0 + cos3omega) ); + //CV = 0.5 * fbp->V1 * (1.0 + cos_omega) + // fbp->V2 * exp_tor1 * (1.0 - SQR(cos_omega)) + // fbp->V3 * (0.5 + 2.0*CUBE(cos_omega) - 1.5 * cos_omega); data->E_Tor += e_tor = fn10 * sin_ijk * sin_jkl * CV; dfn11 = (-p_tor3 * exp_tor3_DjDk + - (p_tor3 * exp_tor3_DjDk - p_tor4 * exp_tor4_DjDk) * - (2.+exp_tor3_DjDk) * exp_tor34_inv) * exp_tor34_inv; + (p_tor3 * exp_tor3_DjDk - p_tor4 * exp_tor4_DjDk) * + (2. + exp_tor3_DjDk) * exp_tor34_inv) * exp_tor34_inv; CEtors1 = sin_ijk * sin_jkl * CV; - CEtors2 = -fn10 * 2.0 * fbp->p_tor1 * fbp->V2 * exp_tor1 * - (2.0 - bo_jk->BO_pi - f11_DjDk) * (1.0 - SQR(cos_omega)) * - sin_ijk * sin_jkl; + CEtors2 = -fn10 * 2.0 * fbp->p_tor1 * fbp->V2 * exp_tor1 * + (2.0 - bo_jk->BO_pi - f11_DjDk) * (1.0 - SQR(cos_omega)) * + sin_ijk * sin_jkl; CEtors3 = CEtors2 * dfn11; - CEtors4 = CEtors1 * p_tor2 * exp_tor2_ij * - (1.0 - exp_tor2_jk) * (1.0 - exp_tor2_kl); + CEtors4 = CEtors1 * p_tor2 * exp_tor2_ij * + (1.0 - exp_tor2_jk) * (1.0 - exp_tor2_kl); - CEtors5 = CEtors1 * p_tor2 * exp_tor2_jk * - (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_kl); + CEtors5 = CEtors1 * p_tor2 * exp_tor2_jk * + (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_kl); CEtors6 = CEtors1 * p_tor2 * exp_tor2_kl * - (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk); + (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk); cmn = -fn10 * CV; CEtors7 = cmn * sin_jkl * tan_ijk_i; CEtors8 = cmn * sin_ijk * tan_jkl_i; - CEtors9 = fn10 * sin_ijk * sin_jkl * - (0.5 * fbp->V1 - 2.0 * fbp->V2 * exp_tor1 * cos_omega + - 1.5 * fbp->V3 * (cos2omega + 2. * SQR(cos_omega))); + CEtors9 = fn10 * sin_ijk * sin_jkl * + (0.5 * fbp->V1 - 2.0 * fbp->V2 * exp_tor1 * cos_omega + + 1.5 * fbp->V3 * (cos2omega + 2. * SQR(cos_omega))); //cmn = -fn10 * CV; //CEtors7 = cmn * sin_jkl * cos_ijk; //CEtors8 = cmn * sin_ijk * cos_jkl; - //CEtors9 = fn10 * sin_ijk * sin_jkl * + //CEtors9 = fn10 * sin_ijk * sin_jkl * // (0.5 * fbp->V1 - 2.0 * fbp->V2 * exp_tor1 * cos_omega + // fbp->V3 * (6*SQR(cos_omega) - 1.50)); /* end of torsion energy */ @@ -380,38 +394,38 @@ void Four_Body_Interactions( reax_system *system, control_params *control, /* 4-body conjugation energy */ fn12 = exp_cot2_ij * exp_cot2_jk * exp_cot2_kl; - data->E_Con += e_con = fbp->p_cot1 * fn12 * - (1. + (SQR(cos_omega)-1.) * sin_ijk*sin_jkl); + data->E_Con += e_con = fbp->p_cot1 * fn12 * + (1. + (SQR(cos_omega) - 1.) * sin_ijk * sin_jkl); - Cconj = -2.0 * fn12 * fbp->p_cot1 * p_cot2 * - (1. + (SQR(cos_omega)-1.) * sin_ijk*sin_jkl); + Cconj = -2.0 * fn12 * fbp->p_cot1 * p_cot2 * + (1. + (SQR(cos_omega) - 1.) * sin_ijk * sin_jkl); CEconj1 = Cconj * (BOA_ij - 1.5e0); CEconj2 = Cconj * (BOA_jk - 1.5e0); CEconj3 = Cconj * (BOA_kl - 1.5e0); - CEconj4 = -fbp->p_cot1 * fn12 * - (SQR(cos_omega) - 1.0) * sin_jkl * tan_ijk_i; - CEconj5 = -fbp->p_cot1 * fn12 * - (SQR(cos_omega) - 1.0) * sin_ijk * tan_jkl_i; - //CEconj4 = -fbp->p_cot1 * fn12 * + CEconj4 = -fbp->p_cot1 * fn12 * + (SQR(cos_omega) - 1.0) * sin_jkl * tan_ijk_i; + CEconj5 = -fbp->p_cot1 * fn12 * + (SQR(cos_omega) - 1.0) * sin_ijk * tan_jkl_i; + //CEconj4 = -fbp->p_cot1 * fn12 * // (SQR(cos_omega) - 1.0) * sin_jkl * cos_ijk; - //CEconj5 = -fbp->p_cot1 * fn12 * + //CEconj5 = -fbp->p_cot1 * fn12 * // (SQR(cos_omega) - 1.0) * sin_ijk * cos_jkl; - CEconj6 = 2.0 * fbp->p_cot1 * fn12 * - cos_omega * sin_ijk * sin_jkl; + CEconj6 = 2.0 * fbp->p_cot1 * fn12 * + cos_omega * sin_ijk * sin_jkl; /* end 4-body conjugation energy */ //fprintf(stdout, "%6d %6d %6d %6d %7.3f %7.3f %7.3f %7.3f ", // workspace->orig_id[i], workspace->orig_id[j], - // workspace->orig_id[k], workspace->orig_id[l], + // workspace->orig_id[k], workspace->orig_id[l], // omega, cos_omega, cos2omega, cos3omega ); - //fprintf(stdout, + //fprintf(stdout, // "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", - // CEtors2, CEtors3, CEtors4, CEtors5, + // CEtors2, CEtors3, CEtors4, CEtors5, // CEtors6, CEtors7, CEtors8, CEtors9 ); //fprintf(stdout, "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", - // theta_ijk, theta_jkl, sin_ijk, + // theta_ijk, theta_jkl, sin_ijk, // sin_jkl, cos_jkl, tan_jkl_i ); /* forces */ @@ -420,37 +434,38 @@ void Four_Body_Interactions( reax_system *system, control_params *control, workspace->CdDelta[k] += CEtors3; bo_ij->Cdbo += (CEtors4 + CEconj1); bo_jk->Cdbo += (CEtors5 + CEconj2); - bo_kl->Cdbo += (CEtors6 + CEconj3); - if( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT) { + if ( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT ) + { /* dcos_theta_ijk */ - rvec_ScaledAdd( system->atoms[i].f, - CEtors7 + CEconj4, p_ijk->dcos_dk ); - rvec_ScaledAdd( system->atoms[j].f, - CEtors7 + CEconj4, p_ijk->dcos_dj ); - rvec_ScaledAdd( system->atoms[k].f, - CEtors7 + CEconj4, p_ijk->dcos_di ); + rvec_ScaledAdd( system->atoms[i].f, + CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_ScaledAdd( system->atoms[j].f, + CEtors7 + CEconj4, p_ijk->dcos_dj ); + rvec_ScaledAdd( system->atoms[k].f, + CEtors7 + CEconj4, p_ijk->dcos_di ); /* dcos_theta_jkl */ - rvec_ScaledAdd( system->atoms[j].f, - CEtors8 + CEconj5, p_jkl->dcos_di ); - rvec_ScaledAdd( system->atoms[k].f, - CEtors8 + CEconj5, p_jkl->dcos_dj ); - rvec_ScaledAdd( system->atoms[l].f, - CEtors8 + CEconj5, p_jkl->dcos_dk ); + rvec_ScaledAdd( system->atoms[j].f, + CEtors8 + CEconj5, p_jkl->dcos_di ); + rvec_ScaledAdd( system->atoms[k].f, + CEtors8 + CEconj5, p_jkl->dcos_dj ); + rvec_ScaledAdd( system->atoms[l].f, + CEtors8 + CEconj5, p_jkl->dcos_dk ); /* dcos_omega */ - rvec_ScaledAdd( system->atoms[i].f, - CEtors9 + CEconj6, dcos_omega_di ); - rvec_ScaledAdd( system->atoms[j].f, - CEtors9 + CEconj6, dcos_omega_dj ); - rvec_ScaledAdd( system->atoms[k].f, - CEtors9 + CEconj6, dcos_omega_dk ); - rvec_ScaledAdd( system->atoms[l].f, - CEtors9 + CEconj6, dcos_omega_dl ); + rvec_ScaledAdd( system->atoms[i].f, + CEtors9 + CEconj6, dcos_omega_di ); + rvec_ScaledAdd( system->atoms[j].f, + CEtors9 + CEconj6, dcos_omega_dj ); + rvec_ScaledAdd( system->atoms[k].f, + CEtors9 + CEconj6, dcos_omega_dk ); + rvec_ScaledAdd( system->atoms[l].f, + CEtors9 + CEconj6, dcos_omega_dl ); } - else { + else + { ivec_Sum(rel_box_jl, pbond_jk->rel_box, pbond_kl->rel_box); /* dcos_theta_ijk */ @@ -459,8 +474,8 @@ void Four_Body_Interactions( reax_system *system, control_params *control, rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); rvec_Add( data->ext_press, ext_press ); - rvec_ScaledAdd( system->atoms[j].f, - CEtors7 + CEconj4, p_ijk->dcos_dj ); + rvec_ScaledAdd( system->atoms[j].f, + CEtors7 + CEconj4, p_ijk->dcos_dj ); rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_di ); rvec_Add( system->atoms[k].f, force ); @@ -469,8 +484,8 @@ void Four_Body_Interactions( reax_system *system, control_params *control, /* dcos_theta_jkl */ - rvec_ScaledAdd( system->atoms[j].f, - CEtors8 + CEconj5, p_jkl->dcos_di ); + rvec_ScaledAdd( system->atoms[j].f, + CEtors8 + CEconj5, p_jkl->dcos_di ); rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dj ); rvec_Add( system->atoms[k].f, force ); @@ -483,14 +498,14 @@ void Four_Body_Interactions( reax_system *system, control_params *control, rvec_Add( data->ext_press, ext_press ); - /* dcos_omega */ + /* dcos_omega */ rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_di ); rvec_Add( system->atoms[i].f, force ); rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); rvec_Add( data->ext_press, ext_press ); - rvec_ScaledAdd( system->atoms[j].f, - CEtors9 + CEconj6, dcos_omega_dj ); + rvec_ScaledAdd( system->atoms[j].f, + CEtors9 + CEconj6, dcos_omega_dj ); rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_dk ); rvec_Add( system->atoms[k].f, force ); @@ -504,39 +519,39 @@ void Four_Body_Interactions( reax_system *system, control_params *control, /* This part is intended for a fully-flexible box */ - /* rvec_ScaledSum( temp_rvec, - CEtors7 + CEconj4, p_ijk->dcos_dk, // i + /* rvec_ScaledSum( temp_rvec, + CEtors7 + CEconj4, p_ijk->dcos_dk, // i CEtors9 + CEconj6, dcos_omega_di ); - rvec_OuterProduct( temp_rtensor, + rvec_OuterProduct( temp_rtensor, temp_rvec, system->atoms[i].x ); rtensor_Copy( total_rtensor, temp_rtensor ); - rvec_ScaledSum( temp_rvec, + rvec_ScaledSum( temp_rvec, CEtors7 + CEconj4, p_ijk->dcos_dj, // j CEtors8 + CEconj5, p_jkl->dcos_di ); - rvec_ScaledAdd( temp_rvec, + rvec_ScaledAdd( temp_rvec, CEtors9 + CEconj6, dcos_omega_dj ); - rvec_OuterProduct( temp_rtensor, + rvec_OuterProduct( temp_rtensor, temp_rvec, system->atoms[j].x ); rtensor_Add( total_rtensor, temp_rtensor ); - rvec_ScaledSum( temp_rvec, + rvec_ScaledSum( temp_rvec, CEtors7 + CEconj4, p_ijk->dcos_di, // k CEtors8 + CEconj5, p_jkl->dcos_dj ); - rvec_ScaledAdd( temp_rvec, + rvec_ScaledAdd( temp_rvec, CEtors9 + CEconj6, dcos_omega_dk ); - rvec_OuterProduct( temp_rtensor, + rvec_OuterProduct( temp_rtensor, temp_rvec, system->atoms[k].x ); rtensor_Add( total_rtensor, temp_rtensor ); - rvec_ScaledSum( temp_rvec, + rvec_ScaledSum( temp_rvec, CEtors8 + CEconj5, p_jkl->dcos_dk, // l CEtors9 + CEconj6, dcos_omega_dl ); - rvec_OuterProduct( temp_rtensor, + rvec_OuterProduct( temp_rtensor, temp_rvec, system->atoms[l].x ); rtensor_Copy( total_rtensor, temp_rtensor ); - if( pbond_ij->imaginary || pbond_jk->imaginary || + if( pbond_ij->imaginary || pbond_jk->imaginary || pbond_kl->imaginary ) rtensor_ScaledAdd( data->flex_bar.P, -1., total_rtensor ); else @@ -544,82 +559,82 @@ void Four_Body_Interactions( reax_system *system, control_params *control, } #ifdef TEST_ENERGY - /*fprintf( out_control->etor, - //"%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", - //r_ij, r_jk, r_kl, - "%12.8f%12.8f%12.8f%12.8f\n", - cos_ijk, cos_jkl, sin_ijk, sin_jkl );*/ + /*fprintf( out_control->etor, + //"%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", + //r_ij, r_jk, r_kl, + "%12.8f%12.8f%12.8f%12.8f\n", + cos_ijk, cos_jkl, sin_ijk, sin_jkl );*/ // fprintf( out_control->etor, "%12.8f\n", dfn11 ); - fprintf( out_control->etor, "%12.8f%12.8f%12.8f\n", - fn10, cos_omega, CV ); + fprintf( out_control->etor, "%12.8f%12.8f%12.8f\n", + fn10, cos_omega, CV ); - fprintf( out_control->etor, - "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", - CEtors2, CEtors3, CEtors4, CEtors5, - CEtors6, CEtors7, CEtors8, CEtors9 ); + fprintf( out_control->etor, + "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", + CEtors2, CEtors3, CEtors4, CEtors5, + CEtors6, CEtors7, CEtors8, CEtors9 ); - /* fprintf( out_control->etor, + /* fprintf( out_control->etor, "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", htra, htrb, htrc, hthd, hthe, hnra, hnrc, hnhd, hnhe ); */ - fprintf( out_control->etor, - "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", - CEconj1, CEconj2, CEconj3, CEconj4, CEconj5, CEconj6 ); + fprintf( out_control->etor, + "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", + CEconj1, CEconj2, CEconj3, CEconj4, CEconj5, CEconj6 ); /* fprintf(out_control->etor,"%23.15e%23.15e%23.15e%23.15e\n", fbp->V1, fbp->V2, fbp->V3, fbp->p_tor1 );*/ - fprintf( out_control->etor, - //"%6d%6d%6d%6d%23.15e%23.15e%23.15e%23.15e\n", - "%6d%6d%6d%6d%12.8f%12.8f\n", - workspace->orig_id[i], workspace->orig_id[j], - workspace->orig_id[k], workspace->orig_id[l], - e_tor, e_con ); + fprintf( out_control->etor, + //"%6d%6d%6d%6d%23.15e%23.15e%23.15e%23.15e\n", + "%6d%6d%6d%6d%12.8f%12.8f\n", + workspace->orig_id[i], workspace->orig_id[j], + workspace->orig_id[k], workspace->orig_id[l], + e_tor, e_con ); //RAD2DEG(omega), BOA_jk, e_tor, data->E_Tor ); - fprintf( out_control->econ, - "%6d%6d%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e\n", - workspace->orig_id[i], workspace->orig_id[j], - workspace->orig_id[k], workspace->orig_id[l], - RAD2DEG(omega), BOA_ij, BOA_jk, BOA_kl, - e_con,data->E_Con ); - - /* fprintf( out_control->etor, - "%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n", - (CEtors7 + CEconj4)*p_ijk->dcos_dk[0], - (CEtors7 + CEconj4)*p_ijk->dcos_dk[1], + fprintf( out_control->econ, + "%6d%6d%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e\n", + workspace->orig_id[i], workspace->orig_id[j], + workspace->orig_id[k], workspace->orig_id[l], + RAD2DEG(omega), BOA_ij, BOA_jk, BOA_kl, + e_con, data->E_Con ); + + /* fprintf( out_control->etor, + "%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n", + (CEtors7 + CEconj4)*p_ijk->dcos_dk[0], + (CEtors7 + CEconj4)*p_ijk->dcos_dk[1], (CEtors7 + CEconj4)*p_ijk->dcos_dk[2], - (CEtors7 + CEconj4)*p_ijk->dcos_dj[0], - (CEtors7 + CEconj4)*p_ijk->dcos_dj[1], + (CEtors7 + CEconj4)*p_ijk->dcos_dj[0], + (CEtors7 + CEconj4)*p_ijk->dcos_dj[1], (CEtors7 + CEconj4)*p_ijk->dcos_dj[2], - (CEtors7 + CEconj4)*p_ijk->dcos_di[0], - (CEtors7 + CEconj4)*p_ijk->dcos_di[1], + (CEtors7 + CEconj4)*p_ijk->dcos_di[0], + (CEtors7 + CEconj4)*p_ijk->dcos_di[1], (CEtors7 + CEconj4)*p_ijk->dcos_di[2] ); */ - /* fprintf( out_control->etor, + /* fprintf( out_control->etor, "%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n", - (CEtors8 + CEconj5)*p_jkl->dcos_di[0], - (CEtors8 + CEconj5)*p_jkl->dcos_di[1], - (CEtors8 + CEconj5)*p_jkl->dcos_di[2], - (CEtors8 + CEconj5)*p_jkl->dcos_dj[0], - (CEtors8 + CEconj5)*p_jkl->dcos_dj[1], - (CEtors8 + CEconj5)*p_jkl->dcos_dj[2], - (CEtors8 + CEconj5)*p_jkl->dcos_dk[0], - (CEtors8 + CEconj5)*p_jkl->dcos_dk[1], + (CEtors8 + CEconj5)*p_jkl->dcos_di[0], + (CEtors8 + CEconj5)*p_jkl->dcos_di[1], + (CEtors8 + CEconj5)*p_jkl->dcos_di[2], + (CEtors8 + CEconj5)*p_jkl->dcos_dj[0], + (CEtors8 + CEconj5)*p_jkl->dcos_dj[1], + (CEtors8 + CEconj5)*p_jkl->dcos_dj[2], + (CEtors8 + CEconj5)*p_jkl->dcos_dk[0], + (CEtors8 + CEconj5)*p_jkl->dcos_dk[1], (CEtors8 + CEconj5)*p_jkl->dcos_dk[2] ); */ - fprintf( out_control->etor, - "%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n", - dcos_omega_di[0], dcos_omega_di[1], dcos_omega_di[2], - dcos_omega_dj[0], dcos_omega_dj[1], dcos_omega_dj[2], - dcos_omega_dk[0], dcos_omega_dk[1], dcos_omega_dk[2], - dcos_omega_dl[0], dcos_omega_dl[1], dcos_omega_dl[2] ); + fprintf( out_control->etor, + "%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n%12.8f%12.8f%12.8f\n", + dcos_omega_di[0], dcos_omega_di[1], dcos_omega_di[2], + dcos_omega_dj[0], dcos_omega_dj[1], dcos_omega_dj[2], + dcos_omega_dk[0], dcos_omega_dk[1], dcos_omega_dk[2], + dcos_omega_dl[0], dcos_omega_dl[1], dcos_omega_dl[2] ); #endif #ifdef TEST_FORCES - // Torsion Forces - Add_dBOpinpi2(system, lists, j, pk, CEtors2, 0., - workspace->f_tor, workspace->f_tor); + /* Torsion Forces */ + Add_dBOpinpi2(system, lists, j, pk, CEtors2, 0., + workspace->f_tor, workspace->f_tor); Add_dDelta( system, lists, j, CEtors3, workspace->f_tor ); Add_dDelta( system, lists, k, CEtors3, workspace->f_tor ); Add_dBO( system, lists, j, pij, CEtors4, workspace->f_tor ); @@ -639,7 +654,7 @@ void Four_Body_Interactions( reax_system *system, control_params *control, rvec_ScaledAdd( workspace->f_tor[k], CEtors9, dcos_omega_dk ); rvec_ScaledAdd( workspace->f_tor[l], CEtors9, dcos_omega_dl ); - // Conjugation Forces + /* Conjugation Forces */ Add_dBO( system, lists, j, pij, CEconj1, workspace->f_con ); Add_dBO( system, lists, j, pk, CEconj2, workspace->f_con ); Add_dBO( system, lists, k, plk, CEconj3, workspace->f_con ); @@ -666,12 +681,12 @@ void Four_Body_Interactions( reax_system *system, control_params *control, } // pk loop ends } // j loop - /* fprintf( stderr, "4body: ext_press (%23.15e %23.15e %23.15e)\n", + /* fprintf( stderr, "4body: ext_press (%23.15e %23.15e %23.15e)\n", data->ext_press[0], data->ext_press[1], data->ext_press[2] );*/ #ifdef TEST_FORCES fprintf( stderr, "Number of torsion angles: %d\n", num_frb_intrs ); - fprintf( stderr, "Torsion Energy: %g\t Conjugation Energy: %g\n", - data->E_Tor, data->E_Con ); + fprintf( stderr, "Torsion Energy: %g\t Conjugation Energy: %g\n", + data->E_Tor, data->E_Con ); #endif } diff --git a/PuReMD-GPU/src/four_body_interactions.h b/PuReMD-GPU/src/four_body_interactions.h index 8e8dd7c0991a747000e77b2d460711e433db52ef..65e315a94f95239ad8c7081a00c32cc0d3264cd3 100644 --- a/PuReMD-GPU/src/four_body_interactions.h +++ b/PuReMD-GPU/src/four_body_interactions.h @@ -23,10 +23,9 @@ #include "mytypes.h" -#define MIN_SINE 1e-10 - void Four_Body_Interactions( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); + #endif diff --git a/PuReMD-GPU/src/geo_tools.c b/PuReMD-GPU/src/geo_tools.c new file mode 100644 index 0000000000000000000000000000000000000000..f3c3bd48140f9fd46ac3e460a0ab4bd33ade85b9 --- /dev/null +++ b/PuReMD-GPU/src/geo_tools.c @@ -0,0 +1,797 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include <ctype.h> + +#include "geo_tools.h" +#include "allocate.h" +#include "box.h" +#include "list.h" +#include "restart.h" +#include "tool_box.h" +#include "vector.h" + + +/********************* geo format routines ******************/ +void Count_Geo_Atoms( FILE *geo, reax_system *system ) +{ + int i, serial; + rvec x; + char element[3], name[9], line[MAX_LINE + 1]; + + /* total number of atoms */ + fscanf( geo, " %d", &(system->N) ); + + /* count atoms */ + for ( i = 0; i < system->N; ++i ) + { + fscanf( geo, CUSTOM_ATOM_FORMAT, + &serial, element, name, &x[0], &x[1], &x[2] ); + Fit_to_Periodic_Box( &(system->box), &x ); + } + + fseek( geo, 0, SEEK_SET ); // set the pointer to the beginning of the file + fgets( line, MAX_LINE, geo ); + fgets( line, MAX_LINE, geo ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "N = %d\n\n", system->N ); +#endif +} + + +char Read_Geo( char* geo_file, reax_system* system, control_params *control, + simulation_data *data, static_storage *workspace ) +{ + + FILE *geo; + char descriptor[9]; + int i, serial, top; + real box_x, box_y, box_z, alpha, beta, gamma; + rvec x; + char element[3], name[9]; + reax_atom *atom; + + /* open the geometry file */ + if ( (geo = fopen(geo_file, "r")) == NULL ) + { + fprintf( stderr, "Error opening the geo file! terminating...\n" ); + exit( FILE_NOT_FOUND ); + } + + /* read box information */ + fscanf( geo, CUSTOM_BOXGEO_FORMAT, + descriptor, &box_x, &box_y, &box_z, &alpha, &beta, &gamma ); + /* initialize the box */ + Setup_Box( box_x, box_y, box_z, alpha, beta, gamma, &(system->box) ); + + /* count my atoms & allocate storage */ + Count_Geo_Atoms( geo, system ); + if ( PreAllocate_Space( system, control, workspace ) == FAILURE ) + { + fprintf( stderr, "PreAllocate_Space: not enough memory!" ); + fprintf( stderr, "terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* read in my atom info */ + top = 0; + for ( i = 0; i < system->N; ++i ) + { + fscanf( geo, CUSTOM_ATOM_FORMAT, + &serial, element, name, &x[0], &x[1], &x[2] ); + Fit_to_Periodic_Box( &(system->box), &x ); +#if defined(DEBUG) + fprintf( stderr, "atom%d: %s %s %f %f %f\n", + serial, element, name, x[0], x[1], x[2] ); +#endif + + atom = &(system->atoms[top]); + workspace->orig_id[i] = serial; + atom->type = Get_Atom_Type( &(system->reaxprm), element ); + strcpy( atom->name, name ); + rvec_Copy( atom->x, x ); + rvec_MakeZero( atom->v ); + rvec_MakeZero( atom->f ); + atom->q = 0.; + + top++; + } + + fclose( geo ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "finished reading the geo file\n" ); +#endif + + return SUCCESS; +} + + +int Read_Box_Info( reax_system *system, FILE *geo, int geo_format ) +{ + char *cryst; + char line[MAX_LINE + 1]; + char descriptor[9]; + char s_a[12], s_b[12], s_c[12], s_alpha[12], s_beta[12], s_gamma[12]; + char s_group[12], s_zValue[12]; + + /* initialize variables */ + fseek( geo, 0, SEEK_SET ); // set the pointer to the beginning of the file + + switch ( geo_format ) + { + case PDB: + cryst = "CRYST1"; + break; + default: + cryst = "BOX"; + } + + /* locate the cryst line in the geo file, read it and + initialize the big box */ + while ( fgets( line, MAX_LINE, geo ) ) + { + if ( strncmp( line, cryst, 6 ) == 0 ) + { + if ( geo_format == PDB ) + sscanf( line, PDB_CRYST1_FORMAT, + &descriptor[0], + &s_a[0], &s_b[0], &s_c[0], + &s_alpha[0], &s_beta[0], &s_gamma[0], + &s_group[0], &s_zValue[0] ); + + /* compute full volume tensor from the angles */ + Setup_Box( atof(s_a), atof(s_b), atof(s_c), + atof(s_alpha), atof(s_beta), atof(s_gamma), + &(system->box) ); + return SUCCESS; + } + } + if ( ferror( geo ) ) + { + return FAILURE; + } + + return FAILURE; +} + + +void Count_PDB_Atoms( FILE *geo, reax_system *system ) +{ + char *endptr = NULL; + char line[MAX_LINE + 1]; + char s_x[9], s_y[9], s_z[9]; + rvec x; + + /* initialize variables */ + fseek( geo, 0, SEEK_SET ); /* set the pointer to the beginning of the file */ + system->N = 0; + + /* increment number of atoms for each line denoting an atom desc */ + while ( fgets( line, MAX_LINE, geo ) ) + { + if ( strncmp( line, "ATOM", 4 ) == 0 || + strncmp( line, "HETATM", 6 ) == 0 ) + { + system->N++; + + strncpy( s_x, line + 30, 8 ); + s_x[8] = 0; + strncpy( s_y, line + 38, 8 ); + s_y[8] = 0; + strncpy( s_z, line + 46, 8 ); + s_z[8] = 0; + Make_Point( strtod( s_x, &endptr ), strtod( s_y, &endptr ), + strtod( s_z, &endptr ), &x ); + Fit_to_Periodic_Box( &(system->box), &x ); + } + } + +#if defined(DEBUG) + fprintf( stderr, "count atoms:\n" ); + fprintf( stderr, "N = %d\n\n", system->N ); +#endif +} + + +char Read_PDB( char* pdb_file, reax_system* system, control_params *control, + simulation_data *data, static_storage *workspace ) +{ + + FILE *pdb; + char **tmp; + char *s, *s1; + char descriptor[9], serial[9]; + char atom_name[9], res_name[9], res_seq[9]; + char s_x[9], s_y[9], s_z[9]; + char occupancy[9], temp_factor[9]; + char seg_id[9], element[9], charge[9]; + char alt_loc, chain_id, icode; + char *endptr = NULL; + int i, c, c1, pdb_serial, top; + rvec x; + reax_atom *atom; + + /* open pdb file */ + if ( (pdb = fopen(pdb_file, "r")) == NULL ) + { + fprintf( stderr, "fopen: error opening the pdb file! terminating...\n" ); + exit( FILE_NOT_FOUND ); + } + + /* allocate memory for tokenizing pdb lines */ + if ( Allocate_Tokenizer_Space( &s, &s1, &tmp ) == FAILURE ) + { + fprintf( stderr, "Allocate_Tokenizer_Space: not enough memory!" ); + fprintf( stderr, "terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* read box information */ + if ( Read_Box_Info( system, pdb, PDB ) == FAILURE ) + { + fprintf( stderr, "Read_Box_Info: no CRYST line in the pdb file!" ); + fprintf( stderr, "terminating...\n" ); + exit( INVALID_GEO ); + } + + Count_PDB_Atoms( pdb, system ); + if ( PreAllocate_Space( system, control, workspace ) == FAILURE ) + { + fprintf( stderr, "PreAllocate_Space: not enough memory!" ); + fprintf( stderr, "terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* start reading and processing the pdb file */ +#if defined(DEBUG_FOCUS) + fprintf( stderr, "starting to read the pdb file\n" ); +#endif + fseek( pdb, 0, SEEK_SET ); + c = 0; + c1 = 0; + top = 0; + s[0] = 0; + + while ( fgets( s, MAX_LINE, pdb ) ) + { + /* read new line and tokenize it */ + strncpy( s1, s, MAX_LINE - 1 ); + c1 = Tokenize( s, &tmp ); + + /* process new line */ + if ( strncmp(tmp[0], "ATOM", 4) == 0 || strncmp(tmp[0], "HETATM", 6) == 0 ) + { + if ( strncmp(tmp[0], "ATOM", 4) == 0 ) + { + strncpy( &descriptor[0], s1, 6 ); + descriptor[6] = 0; + strncpy( &serial[0], s1 + 6, 5 ); + serial[5] = 0; + strncpy( &atom_name[0], s1 + 12, 4 ); + atom_name[4] = 0; + //strncpy( &serial[0], s1+6, 7 ); serial[7] = 0; + //strncpy( &atom_name[0], s1+13, 3 ); atom_name[3] = 0; + alt_loc = s1[16]; + strncpy( &res_name[0], s1 + 17, 3 ); + res_name[3] = 0; + chain_id = s1[21]; + strncpy( &res_seq[0], s1 + 22, 4 ); + res_seq[4] = 0; + icode = s1[26]; + strncpy( &s_x[0], s1 + 30, 8 ); + s_x[8] = 0; + strncpy( &s_y[0], s1 + 38, 8 ); + s_y[8] = 0; + strncpy( &s_z[0], s1 + 46, 8 ); + s_z[8] = 0; + strncpy( &occupancy[0], s1 + 54, 6 ); + occupancy[6] = 0; + strncpy( &temp_factor[0], s1 + 60, 6 ); + temp_factor[6] = 0; + strncpy( &seg_id[0], s1 + 72, 4 ); + seg_id[4] = 0; + strncpy( &element[0], s1 + 76, 2 ); + element[2] = 0; + strncpy( &charge[0], s1 + 78, 2 ); + charge[2] = 0; + } + else if (strncmp(tmp[0], "HETATM", 6) == 0) + { + strncpy( &descriptor[0], s1, 6 ); + descriptor[6] = 0; + strncpy( &serial[0], s1 + 6, 5 ); + serial[5] = 0; + strncpy( &atom_name[0], s1 + 12, 4 ); + atom_name[4] = 0; + //strncpy( &serial[0], s1+6, 7 ); serial[7] = 0; + //strncpy( &atom_name[0], s1+13, 3 ); atom_name[3] = 0; + alt_loc = s1[16]; + strncpy( &res_name[0], s1 + 17, 3 ); + res_name[3] = 0; + chain_id = s1[21]; + strncpy( &res_seq[0], s1 + 22, 4 ); + res_seq[4] = 0; + icode = s1[26]; + strncpy( &s_x[0], s1 + 30, 8 ); + s_x[8] = 0; + strncpy( &s_y[0], s1 + 38, 8 ); + s_y[8] = 0; + strncpy( &s_z[0], s1 + 46, 8 ); + s_z[8] = 0; + strncpy( &occupancy[0], s1 + 54, 6 ); + occupancy[6] = 0; + strncpy( &temp_factor[0], s1 + 60, 6 ); + temp_factor[6] = 0; + //strncpy( &seg_id[0], s1+72, 4 ); seg_id[4] = 0; + strncpy( &element[0], s1 + 76, 2 ); + element[2] = 0; + strncpy( &charge[0], s1 + 78, 2 ); + charge[2] = 0; + } + + /* if the point is inside my_box, add it to my lists */ + Make_Point( strtod( &s_x[0], &endptr ), + strtod( &s_y[0], &endptr ), + strtod( &s_z[0], &endptr ), &x ); + + Fit_to_Periodic_Box( &(system->box), &x ); + + /* store orig_id, type, name and coord info of the new atom */ + atom = &(system->atoms[top]); + pdb_serial = (int) strtod( &serial[0], &endptr ); + workspace->orig_id[top] = pdb_serial; + + Trim_Spaces( element ); + atom->type = Get_Atom_Type( &(system->reaxprm), element ); + strcpy( atom->name, atom_name ); + + rvec_Copy( atom->x, x ); + rvec_MakeZero( atom->v ); + rvec_MakeZero( atom->f ); + atom->q = 0; + + top++; + // fprintf( stderr, "p%d: %6d%2d x:%8.3f%8.3f%8.3f" + // "q:%8.3f occ:%s temp:%s seg:%s elmnt:%s\n", + // system->my_rank, + // c, system->my_atoms[top].type, + // system->my_atoms[top].x[0], + // system->my_atoms[top].x[1], + // system->my_atoms[top].x[2], + // system->my_atoms[top].q, occupancy, temp_factor, + // seg_id, element ); + + //fprintf( stderr, "atom( %8.3f %8.3f %8.3f ) --> p%d\n", + // system->my_atoms[top].x[0], system->my_atoms[top].x[1], + // system->my_atoms[top].x[2], system->my_rank ); + + c++; + } + + /* IMPORTANT: We do not check for the soundness of restrictions here. + When atom2 is on atom1's restricted list, and there is a restriction + on atom2, then atom1 has to be on atom2's restricted list, too. + However, we do not check if this is the case in the input file, + this is upto the user. */ + else if (!strncmp( tmp[0], "CONECT", 6 )) + { + if ( control->restrict_bonds ) + { + /* error check */ + // Check_Input_Range( c1 - 2, 0, MAX_RESTRICT, + // "CONECT line exceeds max num restrictions allowed.\n" ); + + /* read bond restrictions */ + // if( is_Valid_Serial( workspace, pdb_serial = atoi(tmp[1]) ) ) + // ratom = workspace->map_serials[ pdb_serial ]; + + // workspace->restricted[ ratom ] = c1 - 2; + // for( i = 2; i < c1; ++i ) + // { + // if( is_Valid_Serial(workspace, pdb_serial = atoi(tmp[i])) ) + // workspace->restricted_list[ ratom ][ i-2 ] = + // workspace->map_serials[ pdb_serial ]; + // } + + // fprintf( stderr, "restriction on %d:", ratom ); + // for( i = 0; i < workspace->restricted[ ratom ]; ++i ) + // fprintf( stderr, " %d", + // workspace->restricted_list[ratom][i] ); + // fprintf( stderr, "\n" ); + } + } + + /* clear previous input line */ + s[0] = 0; + for ( i = 0; i < c1; ++i ) + tmp[i][0] = 0; + } + if ( ferror( pdb ) ) + { + return FAILURE; + } + + fclose( pdb ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "finished reading the pdb file\n" ); +#endif + + return SUCCESS; +} + + +/* PDB serials are written without regard to the order, we'll see if this + cause trouble, if so we'll have to rethink this approach + Also, we do not write connect lines yet. +*/ +char Write_PDB( reax_system* system, list* bonds, simulation_data *data, + control_params *control, static_storage *workspace, output_controls *out_control ) +{ + int i, buffer_req, buffer_len; + //int j, connect[4]; + char name[8]; + //real bo; + real alpha, beta, gamma; + reax_atom *p_atom; + char fname[MAX_STR]; + char *line; + char *buffer; + FILE *pdb; + + /* Allocation */ + line = (char*) smalloc( sizeof(char) * PDB_ATOM_FORMAT_O_LENGTH, "geo:line" ); + buffer_req = system->N * PDB_ATOM_FORMAT_O_LENGTH; + + buffer = (char*) smalloc( sizeof(char) * buffer_req, "geo:buffer" ); + + pdb = NULL; + line[0] = 0; + buffer[0] = 0; + /* Writing Box information */ + gamma = ACOS( (system->box.box[0][0] * system->box.box[1][0] + + system->box.box[0][1] * system->box.box[1][1] + + system->box.box[0][2] * system->box.box[1][2]) / + (system->box.box_norms[0] * system->box.box_norms[1]) ); + beta = ACOS( (system->box.box[0][0] * system->box.box[2][0] + + system->box.box[0][1] * system->box.box[2][1] + + system->box.box[0][2] * system->box.box[2][2]) / + (system->box.box_norms[0] * system->box.box_norms[2]) ); + alpha = ACOS( (system->box.box[2][0] * system->box.box[1][0] + + system->box.box[2][1] * system->box.box[1][1] + + system->box.box[2][2] * system->box.box[1][2]) / + (system->box.box_norms[2] * system->box.box_norms[1]) ); + + /*open pdb and write header*/ + sprintf(fname, "%s-%d.pdb", control->sim_name, data->step); + pdb = fopen(fname, "w"); + fprintf( pdb, PDB_CRYST1_FORMAT_O, + "CRYST1", + system->box.box_norms[0], system->box.box_norms[1], + system->box.box_norms[2], + RAD2DEG(alpha), RAD2DEG(beta), RAD2DEG(gamma), " ", 0 ); + fprintf( out_control->log, "Box written\n" ); + fflush( out_control->log ); + + /*write atom lines to buffer*/ + for ( i = 0; i < system->N; i++) + { + p_atom = &(system->atoms[i]); + strncpy(name, p_atom->name, 8); + Trim_Spaces(name); + sprintf( line, PDB_ATOM_FORMAT_O, + "ATOM ", workspace->orig_id[i], p_atom->name, ' ', "REX", ' ', 1, ' ', + p_atom->x[0], p_atom->x[1], p_atom->x[2], + 1.0, 0.0, "0", name, " " ); + fprintf( stderr, "PDB NAME <%s>\n", p_atom->name ); + strncpy( buffer + i * PDB_ATOM_FORMAT_O_LENGTH, line, + PDB_ATOM_FORMAT_O_LENGTH ); + } + + buffer_len = system->N * PDB_ATOM_FORMAT_O_LENGTH; + buffer[buffer_len] = 0; + + fprintf( pdb, "%s", buffer ); + fclose( pdb ); + + /* Writing connect information */ + /* + for(i=0; i < system->N; i++) { + count = 0; + for(j = Start_Index(i, bonds); j < End_Index(i, bonds); ++j) { + bo = bonds->bond_list[j].bo_data.BO; + if (bo > 0.3) { + connect[count] = bonds->bond_list[j].nbr+1; + count++; + } + } + + fprintf( out_control->pdb, "%6s%5d", "CONECT", i+1 ); + for( k=0; k < count; k++ ) + fprintf( out_control->pdb, "%5d", connect[k] ); + fprintf( out_control->pdb, "\n" ); + } + */ + + free(buffer); + free(line); + + return SUCCESS; +} + + +char Read_BGF( char* bgf_file, reax_system* system, control_params *control, + simulation_data *data, static_storage *workspace ) +{ + FILE *bgf; + char **tokens; + char *line, *backup; + char descriptor[10], serial[10]; + char atom_name[10], res_name[10], res_seq[10]; + char s_x[12], s_y[12], s_z[12]; + char occupancy[10], temp_factor[10]; + char element[10], charge[10]; + char chain_id; + char s_a[12], s_b[12], s_c[12], s_alpha[12], s_beta[12], s_gamma[12]; + char *endptr = NULL; + int i, atom_cnt, token_cnt, bgf_serial, ratom = 0; + + /* open biograf file */ + if ( (bgf = fopen( bgf_file, "r" )) == NULL ) + { + fprintf( stderr, "Error opening the bgf file!\n" ); + exit( FILE_NOT_FOUND ); + } + + /* allocate memory for tokenizing biograf file lines */ + line = (char*) malloc( sizeof(char) * MAX_LINE ); + backup = (char*) malloc( sizeof(char) * MAX_LINE ); + tokens = (char**) malloc( sizeof(char*) * MAX_TOKENS ); + for ( i = 0; i < MAX_TOKENS; i++ ) + { + tokens[i] = (char*) malloc( sizeof(char) * MAX_TOKEN_LEN ); + } + + /* count number of atoms in the pdb file */ + system->N = 0; + line[0] = 0; + + while ( fgets( line, MAX_LINE, bgf ) ) + { + tokens[0][0] = 0; + token_cnt = Tokenize( line, &tokens ); + + if ( !strcmp( tokens[0], "ATOM" ) || !strcmp( tokens[0], "HETATM" ) ) + { + (system->N)++; + } + + line[0] = 0; + } + if ( ferror ( bgf ) ) + { + return FAILURE; + } + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "system->N: %d\n", system->N ); +#endif + + fclose( bgf ); + + /* memory allocations for atoms, atom maps, bond restrictions */ +// system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); +// +// workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); +// for ( i = 0; i < MAX_ATOM_ID; ++i ) +// { +// workspace->map_serials[i] = -1; +// } +// +// workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); +// workspace->restricted = (int*) calloc( system->N, sizeof(int) ); +// workspace->restricted_list = (int**) calloc( system->N, sizeof(int*) ); +// for ( i = 0; i < system->N; ++i ) +// { +// workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); +// } + + //TODO: setup similar for BGF +// Count_PDB_Atoms( pdb, system ); + if ( PreAllocate_Space( system, control, workspace ) == FAILURE ) + { + fprintf( stderr, "PreAllocate_Space: not enough memory!" ); + fprintf( stderr, "terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* start reading and processing bgf file */ + if ( (bgf = fopen( bgf_file, "r" )) == NULL ) + { + fprintf( stderr, "Error opening the bgf file!\n" ); + exit( FILE_NOT_FOUND ); + } + atom_cnt = 0; + token_cnt = 0; + + while ( fgets( line, MAX_LINE, bgf ) ) + { + /* read new line and tokenize it */ + strncpy( backup, line, MAX_LINE - 1 ); + token_cnt = Tokenize( line, &tokens ); + + /* process new line */ + if ( !strncmp(tokens[0], "ATOM", 4) || !strncmp(tokens[0], "HETATM", 6) ) + { + if ( !strncmp(tokens[0], "ATOM", 4) ) + { + strncpy( &descriptor[0], backup, 6 ); + descriptor[6] = 0; + strncpy( &serial[0], backup + 7, 5 ); + serial[5] = 0; + strncpy( &atom_name[0], backup + 13, 5 ); + atom_name[5] = 0; + strncpy( &res_name[0], backup + 19, 3 ); + res_name[3] = 0; + chain_id = backup[23]; + strncpy( &res_seq[0], backup + 25, 5 ); + res_seq[5] = 0; + strncpy( &s_x[0], backup + 30, 10 ); + s_x[10] = 0; + strncpy( &s_y[0], backup + 40, 10 ); + s_y[10] = 0; + strncpy( &s_z[0], backup + 50, 10 ); + s_z[10] = 0; + strncpy( &element[0], backup + 61, 5 ); + element[5] = 0; + strncpy( &occupancy[0], backup + 66, 3 ); + occupancy[3] = 0; + strncpy( &temp_factor[0], backup + 69, 2 ); + temp_factor[2] = 0; + strncpy( &charge[0], backup + 72, 8 ); + charge[8] = 0; + } + else if ( !strncmp(tokens[0], "HETATM", 6) ) + { + /* bgf hetatm: + (7x,i5,1x,a5,1x,a3,1x,a1,1x,a5,3f10.5,1x,a5,i3,i2,1x,f8.5) */ + strncpy( &descriptor[0], backup, 6 ); + descriptor[6] = 0; + strncpy( &serial[0], backup + 7, 5 ); + serial[5] = 0; + strncpy( &atom_name[0], backup + 13, 5 ); + atom_name[5] = 0; + strncpy( &res_name[0], backup + 19, 3 ); + res_name[3] = 0; + chain_id = backup[23]; + strncpy( &res_seq[0], backup + 25, 5 ); + res_seq[5] = 0; + strncpy( &s_x[0], backup + 30, 10 ); + s_x[10] = 0; + strncpy( &s_y[0], backup + 40, 10 ); + s_y[10] = 0; + strncpy( &s_z[0], backup + 50, 10 ); + s_z[10] = 0; + strncpy( &element[0], backup + 61, 5 ); + element[5] = 0; + strncpy( &occupancy[0], backup + 66, 3 ); + occupancy[3] = 0; + strncpy( &temp_factor[0], backup + 69, 2 ); + temp_factor[2] = 0; + strncpy( &charge[0], backup + 72, 8 ); + charge[8] = 0; + } + + /* add to mapping */ + bgf_serial = strtod( &serial[0], &endptr ); + Check_Input_Range( bgf_serial, 0, MAX_ATOM_ID, "Invalid bgf serial" ); + workspace->map_serials[ bgf_serial ] = atom_cnt; + workspace->orig_id[ atom_cnt ] = bgf_serial; + // fprintf( stderr, "map %d --> %d\n", bgf_serial, atom_cnt ); + + /* copy atomic positions */ + system->atoms[atom_cnt].x[0] = strtod( &s_x[0], &endptr ); + system->atoms[atom_cnt].x[1] = strtod( &s_y[0], &endptr ); + system->atoms[atom_cnt].x[2] = strtod( &s_z[0], &endptr ); + + /* atom name and type */ + strcpy( system->atoms[atom_cnt].name, atom_name ); + Trim_Spaces( element ); + system->atoms[atom_cnt].type = + Get_Atom_Type( &(system->reaxprm), element ); + + /* fprintf( stderr, + "a:%3d(%1d) c:%10.5f%10.5f%10.5f q:%10.5f occ:%s temp:%s seg_id:%s element:%s\n", + atom_cnt, system->atoms[ atom_cnt ].type, + system->atoms[ atom_cnt ].x[0], + system->atoms[ atom_cnt ].x[1], system->atoms[ atom_cnt ].x[2], + system->atoms[ atom_cnt ].q, occupancy, temp_factor, + seg_id, element ); */ + + atom_cnt++; + } + else if (!strncmp( tokens[0], "CRYSTX", 6 )) + { + sscanf( backup, BGF_CRYSTX_FORMAT, + &descriptor[0], + &s_a[0], + &s_b[0], + &s_c[0], + &s_alpha[0], + &s_beta[0], + &s_gamma[0] ); + + /* Compute full volume tensor from the angles */ + Setup_Box( atof(s_a), atof(s_b), atof(s_c), + atof(s_alpha), atof(s_beta), atof(s_gamma), + &(system->box) ); + } + else if (!strncmp( tokens[0], "CONECT", 6 )) + { + /* check number of restrictions */ + Check_Input_Range( token_cnt - 2, 0, MAX_RESTRICT, + "CONECT line exceeds max restrictions allowed.\n" ); + + /* read bond restrictions */ + if ( is_Valid_Serial( workspace, bgf_serial = atoi(tokens[1]) ) ) + { + ratom = workspace->map_serials[ bgf_serial ]; + } + + workspace->restricted[ ratom ] = token_cnt - 2; + for ( i = 2; i < token_cnt; ++i ) + { + if ( is_Valid_Serial( workspace, bgf_serial = atoi(tokens[i]) ) ) + { + workspace->restricted_list[ ratom * system->N + (i - 2) ] = + workspace->map_serials[ bgf_serial ]; + } + } + + /* fprintf( stderr, "restriction on %d:", ratom ); + for( i = 0; i < workspace->restricted[ ratom ]; ++i ) + fprintf( stderr, " %d", workspace->restricted_list[ratom][i] ); + fprintf( stderr, "\n" ); */ + } + + /* clear previous input line */ + line[0] = 0; + + for ( i = 0; i < token_cnt; ++i ) + { + tokens[i][0] = 0; + } + } + if ( ferror ( bgf ) ) + { + return FAILURE; + } + + fclose( bgf ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "bgf file read\n" ); +#endif + + return SUCCESS; +} diff --git a/PuReMD-GPU/src/pdb_tools.h b/PuReMD-GPU/src/geo_tools.h similarity index 84% rename from PuReMD-GPU/src/pdb_tools.h rename to PuReMD-GPU/src/geo_tools.h index 12518fc2daaecd735f5cb0f781a0f1c72e504aa7..4c44e3081e6105947b610916e95210e123b4a7d9 100644 --- a/PuReMD-GPU/src/pdb_tools.h +++ b/PuReMD-GPU/src/geo_tools.h @@ -1,9 +1,10 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or @@ -18,13 +19,20 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#ifndef __PDB_TOOLS_H_ -#define __PDB_TOOLS_H_ +#ifndef __GEO_TOOLS_H_ +#define __GEO_TOOLS_H_ #include "mytypes.h" -/* -PDB format : +// CUSTOM_BOXGEO: BOXGEO box_x box_y box_z angle1 angle2 angle3 +#define CUSTOM_BOXGEO_FORMAT " %s %lf %lf %lf %lf %lf %lf" +// CUSTOM ATOM: serial element name x y z +#define CUSTOM_ATOM_FORMAT " %d %s %s %lf %lf %lf" + +char Read_Geo( char*, reax_system*, control_params*, + simulation_data*, static_storage* ); + +/* PDB format : http://www.rcsb.org/pdb/file_formats/pdb/pdbguide2.2/guide2.2_frame.html #define PDB_ATOM_FORMAT "%6s%5d%4s%c%4s%c%4d%c%8s%8s%8s%6s%6s%4s%2s%2s\n" @@ -94,24 +102,28 @@ COLUMNS DATA TYPE FIELD DEFINITION 67 - 70 Integer z Z value */ -//#define PDB_ATOM_FORMAT "ATOM %4d%4s%c%3s%c%4d%c%8.3f%8.3f%8.3f%6.2f%6.2f%-4s%2s%2s\n" +//#define PDB_ATOM_FORMAT +//"ATOM %4d%4s%c%3s%c%4d%c%8.3f%8.3f%8.3f%6.2f%6.2f%-4s%2s%2s\n" #define PDB_ATOM_FORMAT "%6s%5d%4s%c%4s%c%4d%c%8s%8s%8s%6s%6s%4s%2s%2s\n" +#define PDB_ATOM_FORMAT_LENGTH 71 #define PDB_HETATM_FORMAT "%6s%5d%4s%c%4s%c%4d%c%8s%8s%8s%6s%6s%2s%2s\n" #define PDB_CONECT_FORMAT "%6s%5d%5d%5d%5d%5d\n" #define PDB_CRYST1_FORMAT "%6s%9s%9s%9s%7s%7s%7s%11s%4s\n" #define PDB_ATOM_FORMAT_O "%6s%5d %4s%c%3s %c%4d%c %8.3f%8.3f%8.3f%6.2f%6.2f %-4s%2s%2s\n" +#define PDB_ATOM_FORMAT_O_LENGTH 81 #define PDB_CRYST1_FORMAT_O "%6s%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f%11s%4d\n" #define BGF_CRYSTX_FORMAT "%8s%11s%11s%11s%11s%11s%11s" char Read_PDB( char*, reax_system*, control_params*, - simulation_data*, static_storage* ); + simulation_data*, static_storage* ); + char Read_BGF( char*, reax_system*, control_params*, - simulation_data*, static_storage* ); + simulation_data*, static_storage* ); -char Write_PDB( reax_system*, control_params*, simulation_data*, - static_storage*, list*, output_controls* ); +char Write_PDB( reax_system*, list*, simulation_data*, + control_params*, static_storage*, output_controls* ); #endif diff --git a/PuReMD-GPU/src/grid.c b/PuReMD-GPU/src/grid.c index fb09b409194a84b1646da3b779aad8b547ff9db3..2077d56080799f3674eecb3038a0f30bd318bd67 100644 --- a/PuReMD-GPU/src/grid.c +++ b/PuReMD-GPU/src/grid.c @@ -1,28 +1,29 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ #include "grid.h" +#include "index_utils.h" #include "reset_utils.h" #include "vector.h" -#include "index_utils.h" int Estimate_GCell_Population( reax_system* system ) @@ -34,36 +35,45 @@ int Estimate_GCell_Population( reax_system* system ) g = &( system->g ); Reset_Grid( g ); - for( l = 0; l < system->N; l++ ) { + for ( l = 0; l < system->N; l++ ) + { i = (int)(system->atoms[l].x[0] * g->inv_len[0]); j = (int)(system->atoms[l].x[1] * g->inv_len[1]); k = (int)(system->atoms[l].x[2] * g->inv_len[2]); g->top[index_grid_3d (i, j, k, g)]++; - // fprintf( stderr, "\tatom%-6d (%8.3f%8.3f%8.3f) --> (%3d%3d%3d)\n", + // fprintf( stderr, "\tatom%-6d (%8.3f%8.3f%8.3f) --> (%3d%3d%3d)\n", // l, system->atoms[l].x[0], system->atoms[l].x[1], system->atoms[l].x[2], // i, j, k ); } max_atoms = 0; - for( i = 0; i < g->ncell[0]; i++ ) - for( j = 0; j < g->ncell[1]; j++ ) - for( k = 0; k < g->ncell[2]; k++ ) + for ( i = 0; i < g->ncell[0]; i++ ) + { + for ( j = 0; j < g->ncell[1]; j++ ) + { + for ( k = 0; k < g->ncell[2]; k++ ) + { if( max_atoms < g->top[index_grid_3d (i, j, k, g)] ) + { max_atoms = g->top[index_grid_3d (i, j, k, g)]; + } + } + } + } - return MAX(max_atoms*SAFE_ZONE, MIN_GCELL_POPL); + return MAX(max_atoms * SAFE_ZONE, MIN_GCELL_POPL); } void Allocate_Space_for_Grid( reax_system *system ) { int i, j, k, l; - grid *g = &(system->g); - - int total = g->ncell[0] * g->ncell[1] * g->ncell[2]; + grid *g; + int total; g = &(system->g); - g->max_nbrs = (2*g->spread[0]+1) * (2*g->spread[1]+1) * (2*g->spread[2]+1)+3; + g->max_nbrs = (2 * g->spread[0] + 1) * (2 * g->spread[1] + 1) * (2 * g->spread[2] + 1) + 3; + total = g->ncell[0] * g->ncell[1] * g->ncell[2]; /* allocate space for the new grid */ g->top = (int*) calloc( total, sizeof( int )); @@ -73,10 +83,14 @@ void Allocate_Space_for_Grid( reax_system *system ) g->nbrs = (ivec*) calloc( total * g->max_nbrs, sizeof( ivec )); g->nbrs_cp = (rvec*) calloc( total * g->max_nbrs, sizeof( rvec )); - for( i = 0; i < g->ncell[0]; i++ ) { - for( j = 0; j < g->ncell[1]; j++ ) { - for( k = 0; k < g->ncell[2]; k++ ) { - for( l = 0; l < g->max_nbrs; ++l ){ + for( i = 0; i < g->ncell[0]; i++ ) + { + for( j = 0; j < g->ncell[1]; j++ ) + { + for( k = 0; k < g->ncell[2]; k++ ) + { + for( l = 0; l < g->max_nbrs; ++l ) + { g->nbrs[ index_grid_nbrs (i, j, k, l, g) ][0] = -1; g->nbrs[ index_grid_nbrs (i, j, k, l, g) ][1] = -1; g->nbrs[ index_grid_nbrs (i, j, k, l, g) ][2] = -1; @@ -110,49 +124,74 @@ int Shift(int p, int dp, int dim, grid *g ) int dim_len = 0; int newp = p + dp; - switch( dim ) { - case 0: dim_len = g->ncell[0]; - break; - case 1: dim_len = g->ncell[1]; - break; - case 2: dim_len = g->ncell[2]; + switch ( dim ) + { + case 0: + dim_len = g->ncell[0]; + break; + case 1: + dim_len = g->ncell[1]; + break; + case 2: + dim_len = g->ncell[2]; + } + + while ( newp < 0 ) + { + newp = newp + dim_len; + } + while ( newp >= dim_len ) + { + newp = newp - dim_len; } - while( newp < 0 ) newp = newp + dim_len; - while( newp >= dim_len ) newp = newp - dim_len; return newp; } /* finds the closest point between two grid cells denoted by c1 and c2. periodic boundary conditions are taken into consideration as well. */ -void Find_Closest_Point( grid *g, int c1x, int c1y, int c1z, - int c2x, int c2y, int c2z, rvec closest_point ) +void Find_Closest_Point( grid *g, int c1x, int c1y, int c1z, + int c2x, int c2y, int c2z, rvec closest_point ) { int i, d; ivec c1 = { c1x, c1y, c1z }; ivec c2 = { c2x, c2y, c2z }; - for( i = 0; i < 3; i++ ) { - if( g->ncell[i] < 5 ) { + for ( i = 0; i < 3; i++ ) + { + if ( g->ncell[i] < 5 ) + { closest_point[i] = NEG_INF - 1.; continue; } d = c2[i] - c1[i]; - if( abs(d) <= g->ncell[i] / 2 ) { - if( d > 0 ) + if ( abs(d) <= g->ncell[i] / 2 ) + { + if ( d > 0 ) + { closest_point[i] = c2[i] * g->len[i]; + } else if ( d == 0 ) + { closest_point[i] = NEG_INF - 1.; + } else + { closest_point[i] = ( c2[i] + 1 ) * g->len[i]; + } } - else { - if( d > 0 ) + else + { + if ( d > 0 ) + { closest_point[i] = ( c2[i] - g->ncell[i] + 1 ) * g->len[i]; - else + } + else + { closest_point[i] = ( c2[i] + g->ncell[i] ) * g->len[i]; + } } } } @@ -168,29 +207,36 @@ void Find_Neighbor_GridCells( grid *g ) rvec *cp_stack; /* pick up a cell in the grid */ - for( i = 0; i < g->ncell[0]; i++ ) - for( j = 0; j < g->ncell[1]; j++ ) - for( k = 0; k < g->ncell[2]; k++ ) { + for ( i = 0; i < g->ncell[0]; i++ ) + { + for ( j = 0; j < g->ncell[1]; j++ ) + { + for ( k = 0; k < g->ncell[2]; k++ ) + { nbrs_stack = &( g->nbrs[ index_grid_nbrs (i, j, k, 0, g) ] ); cp_stack = &( g->nbrs_cp[ index_grid_nbrs (i, j, k, 0, g) ] ); stack_top = 0; //fprintf( stderr, "grid1: %d %d %d\n", i, j, k ); /* choose an unmarked neighbor cell*/ - for( di = -g->spread[0]; di <= g->spread[0]; di++ ) { + for ( di = -g->spread[0]; di <= g->spread[0]; di++ ) + { x = Shift( i, di, 0, g ); - for( dj = -g->spread[1]; dj <= g->spread[1]; dj++ ) { + for ( dj = -g->spread[1]; dj <= g->spread[1]; dj++ ) + { y = Shift( j, dj, 1, g ); - for( dk = -g->spread[2]; dk <= g->spread[2]; dk++ ) { + for ( dk = -g->spread[2]; dk <= g->spread[2]; dk++ ) + { z = Shift( k, dk, 2, g ); //fprintf( stderr, "\tgrid2: %d %d %d\n", x, y, z ); - if( !g->mark[ index_grid_3d (x, y, z, g) ] ) { + if( !g->mark[ index_grid_3d (x, y, z, g) ] ) + { /*(di < 0 || // 9 combinations - (di == 0 && dj < 0) || // 3 combinations - (di == 0 && dj == 0 && dk < 0) ) )*/ + (di == 0 && dj < 0) || // 3 combinations + (di == 0 && dj == 0 && dk < 0) ) )*/ /* put the neighbor cell into the stack and mark it */ nbrs_stack[stack_top][0] = x; nbrs_stack[stack_top][1] = y; @@ -198,8 +244,8 @@ void Find_Neighbor_GridCells( grid *g ) g->mark[ index_grid_3d(x,y,z,g) ] = 1; Find_Closest_Point( g, i, j, k, x, y, z, cp_stack[stack_top] ); - //fprintf( stderr, "\tcp: %lf %lf %lf\n", - // cp_stack[stack_top][0], cp_stack[stack_top][1], + //fprintf( stderr, "\tcp: %lf %lf %lf\n", + // cp_stack[stack_top][0], cp_stack[stack_top][1], // cp_stack[stack_top][2]); stack_top++; } @@ -220,6 +266,8 @@ void Find_Neighbor_GridCells( grid *g ) nbrs_stack[stack_top][2] = -1; Reset_Marks( g, nbrs_stack, stack_top ); } + } + } } @@ -234,9 +282,13 @@ void Setup_Grid( reax_system* system ) /* determine number of grid cells in each direction */ ivec_rScale( ncell, 1. / g->cell_size, my_box->box_norms ); - for( d = 0; d < 3; ++d ) - if( ncell[d] <= 0 ) + for ( d = 0; d < 3; ++d ) + { + if ( ncell[d] <= 0 ) + { ncell[d] = 1; + } + } /* find the number of grid cells */ g->total = ncell[0] * ncell[1] * ncell[2]; @@ -270,25 +322,34 @@ void Update_Grid( reax_system* system ) /* determine number of grid cells in each direction */ ivec_rScale( ncell, 1. / g->cell_size, my_box->box_norms ); - for( d = 0; d < 3; ++d ) - if( ncell[d] == 0 ) + for ( d = 0; d < 3; ++d ) + { + if ( ncell[d] == 0 ) + { ncell[d] = 1; + } + } - if( ivec_isEqual( ncell, g->ncell ) ) {/* ncell are unchanged */ + if ( ivec_isEqual( ncell, g->ncell ) ) /* ncell are unchanged */ + { /* update cell lengths */ rvec_iDivide( g->len, my_box->box_norms, g->ncell ); rvec_Invert( g->inv_len, g->len ); /* update closest point distances between gcells */ - for( i = 0; i < g->ncell[0]; i++ ) - for( j = 0; j < g->ncell[1]; j++ ) - for( k = 0; k < g->ncell[2]; k++ ) { + for ( i = 0; i < g->ncell[0]; i++ ) + { + for ( j = 0; j < g->ncell[1]; j++ ) + { + for ( k = 0; k < g->ncell[2]; k++ ) + { nbrs = &( g->nbrs[ index_grid_nbrs (i, j, k, 0, g) ] ); nbrs_cp = &( g->nbrs_cp[ index_grid_nbrs (i, j, k, 0, g) ] ); //fprintf( stderr, "gridcell %d %d %d\n", i, j, k ); itr = 0; - while( nbrs[itr][0] >= 0 ){ + while ( nbrs[itr][0] >= 0 ) + { x = nbrs[itr][0]; y = nbrs[itr][1]; z = nbrs[itr][2]; @@ -297,9 +358,12 @@ void Update_Grid( reax_system* system ) ++itr; } } + } + } } - else{ /* at least one of ncell has changed */ - Deallocate_Grid_Space( g ); + else /* at least one of ncell has changed */ + { + Deallocate_Grid_Space( g ); /* update number of grid cells */ g->total = ncell[0] * ncell[1] * ncell[2]; ivec_Copy( g->ncell, ncell ); @@ -311,10 +375,10 @@ void Update_Grid( reax_system* system ) Find_Neighbor_GridCells( g ); #if defined(DEBUG_FOCUS) fprintf( stderr, "updated grid: " ); - fprintf( stderr, "ncell[%d %d %d] ", - g->ncell[0], g->ncell[1], g->ncell[2] ); - fprintf( stderr, "len[%5.2f %5.2f %5.2f] ", - g->len[0], g->len[1], g->len[2] ); + fprintf( stderr, "ncell[%d %d %d] ", + g->ncell[0], g->ncell[1], g->ncell[2] ); + fprintf( stderr, "len[%5.2f %5.2f %5.2f] ", + g->len[0], g->len[1], g->len[2] ); fprintf( stderr, "g->max_atoms = %d\n", g->max_atoms ); #endif } @@ -328,40 +392,59 @@ void Bin_Atoms( reax_system* system, static_storage *workspace ) grid *g = &( system->g ); Reset_Grid( g ); - - for( l = 0; l < system->N; l++ ) { + for ( l = 0; l < system->N; l++ ) + { i = (int)(system->atoms[l].x[0] * g->inv_len[0]); j = (int)(system->atoms[l].x[1] * g->inv_len[1]); k = (int)(system->atoms[l].x[2] * g->inv_len[2]); #ifdef __BNVT_FIX__ - if (i >= g->ncell[0]) i = g->ncell[0]-1; - if (j >= g->ncell[1]) j = g->ncell[1]-1; - if (k >= g->ncell[2]) k = g->ncell[2]-1; + if (i >= g->ncell[0]) + { + i = g->ncell[0]-1; + } + if (j >= g->ncell[1]) + { + j = g->ncell[1]-1; + } + if (k >= g->ncell[2]) + { + k = g->ncell[2]-1; + } #endif g->atoms[ index_grid_atoms (i,j,k,g->top[ index_grid_3d (i,j,k,g) ], g) ] = l; g->top[index_grid_3d (i,j,k,g) ]++; - //fprintf( stderr, "\tatom%-6d (%8.3f%8.3f%8.3f) --> (%3d%3d%3d)\n", - //l, system->atoms[l].x[0], system->atoms[l].x[1], system->atoms[l].x[2], - //i, j, k ); + // fprintf( stderr, "\tatom%-6d (%8.3f%8.3f%8.3f) --> (%3d%3d%3d)\n", + // l, system->atoms[l].x[0], system->atoms[l].x[1], system->atoms[l].x[2], + // i, j, k ); } max_atoms = 0; - for( i = 0; i < g->ncell[0]; i++ ) - for( j = 0; j < g->ncell[1]; j++ ) - for( k = 0; k < g->ncell[2]; k++ ) + for ( i = 0; i < g->ncell[0]; i++ ) + { + for ( j = 0; j < g->ncell[1]; j++ ) + { + for ( k = 0; k < g->ncell[2]; k++ ) + { if( max_atoms < g->top[ index_grid_3d (i, j, k, g) ] ) + { max_atoms = g->top[ index_grid_3d (i, j, k, g) ]; + } + } + } + } /* check if current gcell->max_atoms is safe */ - if( max_atoms >= g->max_atoms * SAFE_ZONE ) - workspace->realloc.gcell_atoms = MAX(max_atoms*SAFE_ZONE,MIN_GCELL_POPL); + if ( max_atoms >= g->max_atoms * SAFE_ZONE ) + { + workspace->realloc.gcell_atoms = MAX(max_atoms * SAFE_ZONE, MIN_GCELL_POPL); + } } -inline void reax_atom_Copy( reax_atom *dest, reax_atom *src ) +static inline void reax_atom_Copy( reax_atom *dest, reax_atom *src ) { dest->type = src->type; rvec_Copy( dest->x, src->x ); @@ -370,30 +453,37 @@ inline void reax_atom_Copy( reax_atom *dest, reax_atom *src ) } -void Copy_Storage( reax_system *system, static_storage *workspace, - int top, int old_id, int old_type, - int *num_H, real *v, real *s, real *t, - int *orig_id, rvec *f_old ) +void Copy_Storage( reax_system *system, static_storage *workspace, + int top, int old_id, int old_type, + int *num_H, real *v, real *s, real *t, + int *orig_id, rvec *f_old ) { int i; - for( i = 0; i < RESTART+1; ++i ) + for ( i = 0; i < RESTART + 1; ++i ) + { v[ index_wkspace_sys (i,top, system->N) ] = workspace->v[ index_wkspace_sys (i,old_id, system->N) ]; + } - for( i = 0; i < 3; ++i ) { - s[ index_wkspace_sys (i,top, system->N) ] = workspace->s[ index_wkspace_sys (i,old_id, system->N) ]; - t[ index_wkspace_sys (i,top, system->N) ] = workspace->t[ index_wkspace_sys (i,old_id, system->N) ]; + for ( i = 0; i < 3; ++i ) + { + s[ index_wkspace_sys(i,top, system->N) ] = workspace->s[ index_wkspace_sys(i,old_id, system->N) ]; + t[ index_wkspace_sys(i,top, system->N) ] = workspace->t[ index_wkspace_sys(i,old_id, system->N) ]; } orig_id[top] = workspace->orig_id[old_id]; - workspace->Hdia_inv[top] = 1. / system->reaxprm.sbp[ old_type ].eta; workspace->b_s[top] = -system->reaxprm.sbp[ old_type ].chi; - workspace->b_t[top] = -1.0; + workspace->b_t[top] = -1.0; - if( system->reaxprm.sbp[ old_type ].p_hbond == 1 ) // H atom + if ( system->reaxprm.sbp[ old_type ].p_hbond == 1 ) // H atom + { workspace->hbond_index[top] = (*num_H)++; - else workspace->hbond_index[top] = -1; + } + else + { + workspace->hbond_index[top] = -1; + } rvec_Copy( f_old[top], workspace->f_old[old_id] ); } @@ -404,12 +494,12 @@ void Free_Storage( static_storage *workspace ) free( workspace->v ); free( workspace->s ); free( workspace->t ); - free( workspace->orig_id ); + free( workspace->orig_id ); } -void Assign_New_Storage( static_storage *workspace, - real *v, real *s, real *t, +void Assign_New_Storage( static_storage *workspace, + real *v, real *s, real *t, int *orig_id, rvec *f_old ) { workspace->v = v; @@ -425,14 +515,20 @@ void Assign_New_Storage( static_storage *workspace, void Cluster_Atoms( reax_system *system, static_storage *workspace ) { - int i, j, k, l, top, old_id, num_H = 0; + int i, j, k, l, top, old_id, num_H; reax_atom *old_atom; - grid *g = &( system->g ); - reax_atom *new_atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); - int *orig_id = (int *) calloc( system->N, sizeof( int ) ); + grid *g; + reax_atom *new_atoms; + int *orig_id ; real *v; real *s, *t; - rvec *f_old = (rvec*) calloc( system->N, sizeof(rvec) ); + rvec *f_old; + + num_H = 0; + g = &( system->g ); + new_atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); + orig_id = (int *) calloc( system->N, sizeof( int ) ); + f_old = (rvec*) calloc( system->N, sizeof(rvec) ); s = (real*) calloc( 3, sizeof( real ) * system->N ); t = (real*) calloc( 3, sizeof( real ) * system->N ); @@ -440,24 +536,30 @@ void Cluster_Atoms( reax_system *system, static_storage *workspace ) top = 0; - for( i = 0; i < g->ncell[0]; i++ ) - for( j = 0; j < g->ncell[1]; j++ ) - for( k = 0; k < g->ncell[2]; k++ ) { + for ( i = 0; i < g->ncell[0]; i++ ) + { + for ( j = 0; j < g->ncell[1]; j++ ) + { + for ( k = 0; k < g->ncell[2]; k++ ) + { g->start[ index_grid_3d (i, j, k, g) ] = top; - for( l = 0; l < g->top[ index_grid_3d (i, j, k, g) ]; ++l ) { + for( l = 0; l < g->top[ index_grid_3d (i, j, k, g) ]; ++l ) + { old_id = g->atoms[ index_grid_atoms (i, j, k, l, g) ]; old_atom = &( system->atoms[old_id] ); // fprintf( stderr, "%d <-- %d\n", top, old_id ); reax_atom_Copy( &(new_atoms[top]), old_atom ); - Copy_Storage( system, workspace, top, old_id, old_atom->type, - &num_H, v, s, t, orig_id, f_old ); + Copy_Storage( system, workspace, top, old_id, old_atom->type, + &num_H, v, s, t, orig_id, f_old ); ++top; } g->end[ index_grid_3d (i, j, k, g) ] = top; } + } + } free( system->atoms ); diff --git a/PuReMD-GPU/src/init_md.c b/PuReMD-GPU/src/init_md.c index 2a2ce1270e2c694722e489b9a3f38f8dd48177a1..d1b40c6a224208ba3b24e00682b6872be8c16752 100644 --- a/PuReMD-GPU/src/init_md.c +++ b/PuReMD-GPU/src/init_md.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -25,8 +26,8 @@ #include "forces.h" #include "grid.h" #include "index_utils.h" -#include "lin_alg.h" #include "integrate.h" +#include "lin_alg.h" #include "neighbors.h" #include "list.h" #include "lookup.h" @@ -34,21 +35,20 @@ #include "reset_utils.h" #include "system_props.h" #include "traj.h" +#include "tool_box.h" #include "vector.h" -void Generate_Initial_Velocities(reax_system *system, real T ) +void Generate_Initial_Velocities( reax_system *system, real T ) { int i; real scale, norm; - if( T <= 0.1 ) + if ( T <= 0.1 ) { - for ( i = 0; i < system->N; i++ ) - { + for (i = 0; i < system->N; i++) rvec_MakeZero( system->atoms[i].v ); - } #if defined(DEBUG) fprintf( stderr, "no random velocities...\n" ); @@ -56,73 +56,74 @@ void Generate_Initial_Velocities(reax_system *system, real T ) } else { - for( i = 0; i < system->N; i++ ) + for ( i = 0; i < system->N; i++ ) { rvec_Random( system->atoms[i].v ); norm = rvec_Norm_Sqr( system->atoms[i].v ); - scale = SQRT( system->reaxprm.sbp[ system->atoms[i].type ].mass * - norm / (3.0 * K_B * T) ); + scale = SQRT( system->reaxprm.sbp[ system->atoms[i].type ].mass * + norm / (3.0 * K_B * T) ); - rvec_Scale( system->atoms[i].v, 1.0/scale, system->atoms[i].v ); + rvec_Scale( system->atoms[i].v, 1.0 / scale, system->atoms[i].v ); - /* - fprintf( stderr, "v = %f %f %f\n", - system->atoms[i].v[0],system->atoms[i].v[1],system->atoms[i].v[2]); - fprintf( stderr, "scale = %f\n", scale ); - fprintf( stderr, "v = %f %f %f\n", - system->atoms[i].v[0],system->atoms[i].v[1],system->atoms[i].v[2]); - */ + /*fprintf( stderr, "v = %f %f %f\n", + system->atoms[i].v[0],system->atoms[i].v[1],system->atoms[i].v[2]); + fprintf( stderr, "scale = %f\n", scale ); + fprintf( stderr, "v = %f %f %f\n", + system->atoms[i].v[0],system->atoms[i].v[1],system->atoms[i].v[2]);*/ } } } -void Init_System( reax_system *system, control_params *control, +void Init_System( reax_system *system, control_params *control, simulation_data *data ) { int i; rvec dx; - if( !control->restart ) + if ( !control->restart ) { Reset_Atoms( system ); } Compute_Total_Mass( system, data ); - Compute_Center_of_Mass( system, data, stderr ); /* reposition atoms */ // just fit the atoms to the periodic box - if( control->reposition_atoms == 0 ) + if ( control->reposition_atoms == 0 ) { rvec_MakeZero( dx ); } // put the center of mass to the center of the box - else if( control->reposition_atoms == 1 ) + else if ( control->reposition_atoms == 1 ) { rvec_Scale( dx, 0.5, system->box.box_norms ); rvec_ScaledAdd( dx, -1., data->xcm ); } // put the center of mass to the origin - else if( control->reposition_atoms == 2 ) { + else if ( control->reposition_atoms == 2 ) + { rvec_Scale( dx, -1., data->xcm ); } - else { + else + { fprintf( stderr, "UNKNOWN OPTION: reposition_atoms. Terminating...\n" ); exit( UNKNOWN_OPTION ); } - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { Inc_on_T3( system->atoms[i].x, dx, &(system->box) ); - /*fprintf( stderr, "%6d%2d%8.3f%8.3f%8.3f\n", - i, system->atoms[i].type, + /*fprintf( stderr, "%6d%2d%8.3f%8.3f%8.3f\n", + i, system->atoms[i].type, system->atoms[i].x[0], system->atoms[i].x[1], system->atoms[i].x[2] );*/ } /* Initialize velocities so that desired init T can be attained */ - if( !control->restart || (control->restart && control->random_vel) ) { + if ( !control->restart || (control->restart && control->random_vel) ) + { Generate_Initial_Velocities( system, control->T_init ); } @@ -130,96 +131,153 @@ void Init_System( reax_system *system, control_params *control, } -void Init_Simulation_Data( reax_system *system, control_params *control, - simulation_data *data, output_controls *out_control, - evolve_function *Evolve ) +void Init_Simulation_Data( reax_system *system, control_params *control, + simulation_data *data, output_controls *out_control, evolve_function *Evolve ) { Reset_Simulation_Data( data ); - if( !control->restart ) + if ( !control->restart ) + { data->step = data->prev_steps = 0; + } - switch( control->ensemble ) { - case NVE: - data->N_f = 3 * system->N; - *Evolve = Velocity_Verlet_NVE; - break; + switch ( control->ensemble ) + { + case NVE: + data->N_f = 3 * system->N; + *Evolve = Velocity_Verlet_NVE; + break; - case NVT: - data->N_f = 3 * system->N + 1; - //control->Tau_T = 100 * data->N_f * K_B * control->T_final; - if( !control->restart || (control->restart && control->random_vel) ) { - data->therm.G_xi = control->Tau_T * (2.0 * data->E_Kin - - data->N_f * K_B * control->T ); - data->therm.v_xi = data->therm.G_xi * control->dt; - data->therm.v_xi_old = 0; - data->therm.xi = 0; + case NVT: + data->N_f = 3 * system->N + 1; + //control->Tau_T = 100 * data->N_f * K_B * control->T_final; + if ( !control->restart || (control->restart && control->random_vel) ) + { + data->therm.G_xi = control->Tau_T * (2.0 * data->E_Kin - + data->N_f * K_B * control->T ); + data->therm.v_xi = data->therm.G_xi * control->dt; + data->therm.v_xi_old = 0; + data->therm.xi = 0; #if defined(DEBUG_FOCUS) - fprintf( stderr, "init_md: G_xi=%f Tau_T=%f E_kin=%f N_f=%f v_xi=%f\n", - data->therm.G_xi, control->Tau_T, data->E_Kin, - data->N_f, data->therm.v_xi ); + fprintf( stderr, "init_md: G_xi=%f Tau_T=%f E_kin=%f N_f=%f v_xi=%f\n", + data->therm.G_xi, control->Tau_T, data->E_Kin, + data->N_f, data->therm.v_xi ); #endif - } + } - *Evolve = Velocity_Verlet_Nose_Hoover_NVT_Klein; - break; + *Evolve = Velocity_Verlet_Nose_Hoover_NVT_Klein; + break; - case NPT: // Anisotropic NPT - fprintf( stderr, "THIS OPTION IS NOT YET IMPLEMENTED! TERMINATING...\n" ); - exit( UNKNOWN_OPTION ); - data->N_f = 3 * system->N + 9; - if( !control->restart ) { - data->therm.G_xi = control->Tau_T * (2.0 * data->E_Kin - - data->N_f * K_B * control->T ); - data->therm.v_xi = data->therm.G_xi * control->dt; - data->iso_bar.eps = 0.33333 * log(system->box.volume); - //data->inv_W = 1. / (data->N_f*K_B*control->T*SQR(control->Tau_P)); - //Compute_Pressure( system, data, workspace ); - } - *Evolve = Velocity_Verlet_Berendsen_Isotropic_NPT; - break; + case NPT: // Anisotropic NPT + fprintf( stderr, "THIS OPTION IS NOT YET IMPLEMENTED! TERMINATING...\n" ); + exit( UNKNOWN_OPTION ); + data->N_f = 3 * system->N + 9; + if ( !control->restart ) + { + data->therm.G_xi = control->Tau_T * (2.0 * data->E_Kin - + data->N_f * K_B * control->T ); + data->therm.v_xi = data->therm.G_xi * control->dt; + data->iso_bar.eps = 0.33333 * log(system->box.volume); + //data->inv_W = 1. / (data->N_f*K_B*control->T*SQR(control->Tau_P)); + //Compute_Pressure( system, data, workspace ); + } + *Evolve = Velocity_Verlet_Berendsen_Isotropic_NPT; + break; - case sNPT: // Semi-Isotropic NPT - data->N_f = 3 * system->N + 4; - *Evolve = Velocity_Verlet_Berendsen_SemiIsotropic_NPT; - break; + case sNPT: // Semi-Isotropic NPT + data->N_f = 3 * system->N + 4; + *Evolve = Velocity_Verlet_Berendsen_SemiIsotropic_NPT; + break; - case iNPT: // Isotropic NPT - data->N_f = 3 * system->N + 2; - *Evolve = Velocity_Verlet_Berendsen_Isotropic_NPT; - break; + case iNPT: // Isotropic NPT + data->N_f = 3 * system->N + 2; + *Evolve = Velocity_Verlet_Berendsen_Isotropic_NPT; + break; - case bNVT: //berendensen NVT - data->N_f = 3 * system->N + 1; - *Evolve = Velocity_Verlet_Berendsen_NVT; - break; + case bNVT: + data->N_f = 3 * system->N + 1; + *Evolve = Velocity_Verlet_Berendsen_NVT; + fprintf (stderr, " Initializing Velocity_Verlet_Berendsen_NVT .... \n"); + break; - default: - break; + default: + break; } Compute_Kinetic_Energy( system, data ); - /* init timing info for the host*/ + /* init timing info */ data->timing.start = Get_Time( ); data->timing.total = data->timing.start; data->timing.nbrs = 0; data->timing.init_forces = 0; data->timing.bonded = 0; data->timing.nonb = 0; - data->timing.QEq = 0; - data->timing.matvecs = 0; + data->timing.QEq = ZERO; + data->timing.QEq_sort_mat_rows = ZERO; + data->timing.pre_comp = ZERO; + data->timing.pre_app = ZERO; + data->timing.solver_iters = 0; + data->timing.solver_spmv = ZERO; + data->timing.solver_vector_ops = ZERO; + data->timing.solver_orthog = ZERO; + data->timing.solver_tri_solve = ZERO; } -void Init_Workspace( reax_system *system, control_params *control, +/* Initialize Taper params */ +void Init_Taper( control_params *control ) +{ + real d1, d7; + real swa, swa2, swa3; + real swb, swb2, swb3; + + swa = control->r_low; + swb = control->r_cut; + + if ( fabs( swa ) > 0.01 ) + { + fprintf( stderr, "Warning: non-zero value for lower Taper-radius cutoff\n" ); + } + + if ( swb < 0 ) + { + fprintf( stderr, "Negative value for upper Taper-radius cutoff\n" ); + exit( INVALID_INPUT ); + } + else if ( swb < 5 ) + { + fprintf( stderr, "Warning: low value for upper Taper-radius cutoff:%f\n", + swb ); + } + + d1 = swb - swa; + d7 = POW( d1, 7.0 ); + swa2 = SQR( swa ); + swa3 = CUBE( swa ); + swb2 = SQR( swb ); + swb3 = CUBE( swb ); + + control->Tap7 = 20.0 / d7; + control->Tap6 = -70.0 * (swa + swb) / d7; + control->Tap5 = 84.0 * (swa2 + 3.0 * swa * swb + swb2) / d7; + control->Tap4 = -35.0 * (swa3 + 9.0 * swa2 * swb + 9.0 * swa * swb2 + swb3 ) / d7; + control->Tap3 = 140.0 * (swa3 * swb + 3.0 * swa2 * swb2 + swa * swb3 ) / d7; + control->Tap2 = -210.0 * (swa3 * swb2 + swa2 * swb3) / d7; + control->Tap1 = 140.0 * swa3 * swb3 / d7; + control->Tap0 = (-35.0 * swa3 * swb2 * swb2 + 21.0 * swa2 * swb3 * swb2 + + 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; +} + + +void Init_Workspace( reax_system *system, control_params *control, static_storage *workspace ) -{ +{ int i; /* Allocate space for hydrogen bond list */ @@ -231,35 +289,27 @@ void Init_Workspace( reax_system *system, control_params *control, workspace->Deltap_boc = (real *) malloc( system->N * sizeof( real ) ); workspace->dDeltap_self = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->Delta = (real *) malloc( system->N * sizeof( real ) ); - workspace->Delta_lp = (real *) malloc( system->N * sizeof( real ) ); + workspace->Delta = (real *) malloc( system->N * sizeof( real ) ); + workspace->Delta_lp = (real *) malloc( system->N * sizeof( real ) ); workspace->Delta_lp_temp = (real *) malloc( system->N * sizeof( real ) ); - workspace->dDelta_lp = (real *) malloc( system->N * sizeof( real ) ); + workspace->dDelta_lp = (real *) malloc( system->N * sizeof( real ) ); workspace->dDelta_lp_temp = (real *) malloc( system->N * sizeof( real ) ); workspace->Delta_e = (real *) malloc( system->N * sizeof( real ) ); workspace->Delta_boc = (real *) malloc( system->N * sizeof( real ) ); - workspace->nlp = (real *) malloc( system->N * sizeof( real ) ); - workspace->nlp_temp = (real *) malloc( system->N * sizeof( real ) ); - workspace->Clp = (real *) malloc( system->N * sizeof( real ) ); + workspace->nlp = (real *) malloc( system->N * sizeof( real ) ); + workspace->nlp_temp = (real *) malloc( system->N * sizeof( real ) ); + workspace->Clp = (real *) malloc( system->N * sizeof( real ) ); workspace->CdDelta = (real *) malloc( system->N * sizeof( real ) ); - workspace->vlpex = (real *) malloc( system->N * sizeof( real ) ); + workspace->vlpex = (real *) malloc( system->N * sizeof( real ) ); /* QEq storage */ - //workspace->H = NULL; - //workspace->L = NULL; - //workspace->U = NULL; - // - workspace->H.start = NULL; - workspace->L.start = NULL; - workspace->U.start = NULL; - - workspace->H.entries = NULL; - workspace->L.entries = NULL; - workspace->U.entries = NULL; - + workspace->H = NULL; + workspace->H_sp = NULL; + workspace->L = NULL; + workspace->U = NULL; + workspace->Hdia_inv = NULL; workspace->droptol = (real *) calloc( system->N, sizeof( real ) ); workspace->w = (real *) calloc( system->N, sizeof( real ) ); - workspace->Hdia_inv = (real *) calloc( system->N, sizeof( real ) ); workspace->b = (real *) calloc( system->N * 2, sizeof( real ) ); workspace->b_s = (real *) calloc( system->N, sizeof( real ) ); workspace->b_t = (real *) calloc( system->N, sizeof( real ) ); @@ -273,25 +323,27 @@ void Init_Workspace( reax_system *system, control_params *control, // workspace->s_oldest = (real *) calloc( system->N, sizeof( real ) ); // workspace->t_oldest = (real *) calloc( system->N, sizeof( real ) ); - for( i = 0; i < system->N; ++i ) { - workspace->Hdia_inv[i] = 1./system->reaxprm.sbp[system->atoms[i].type].eta; + for ( i = 0; i < system->N; ++i ) + { workspace->b_s[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; workspace->b_t[i] = -1.0; workspace->b[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; - workspace->b[i+system->N] = -1.0; + workspace->b[i + system->N] = -1.0; } + //TODO: conditionally allocate based on solver selection /* GMRES storage */ - workspace->y = (real *) calloc( RESTART+1, sizeof( real ) ); - workspace->z = (real *) calloc( RESTART+1, sizeof( real ) ); - workspace->g = (real *) calloc( RESTART+1, sizeof( real ) ); - workspace->hs = (real *) calloc( RESTART+1, sizeof( real ) ); - workspace->hc = (real *) calloc( RESTART+1, sizeof( real ) ); - - workspace->rn = (real *) calloc( (RESTART+1)*system->N*2, sizeof( real) ); - workspace->v = (real *) calloc( (RESTART+1)*system->N, sizeof( real) ); - workspace->h = (real *) calloc( (RESTART+1)*(RESTART+1), sizeof( real) ); + workspace->y = (real *) calloc( RESTART + 1, sizeof( real ) ); + //TODO: unused? + workspace->z = (real *) calloc( RESTART + 1, sizeof( real ) ); + workspace->g = (real *) calloc( RESTART + 1, sizeof( real ) ); + workspace->h = (real *) calloc( (RESTART + 1) * (RESTART + 1), sizeof( real ) ); + workspace->hs = (real *) calloc( RESTART + 1, sizeof( real ) ); + workspace->hc = (real *) calloc( RESTART + 1, sizeof( real ) ); + //TODO: unused? + workspace->rn = (real *) calloc( (RESTART + 1) * system->N * 2, sizeof( real ) ); + workspace->v = (real *) calloc( (RESTART + 1) * system->N, sizeof( real ) ); /* CG storage */ workspace->r = (real *) calloc( system->N, sizeof( real ) ); @@ -304,20 +356,25 @@ void Init_Workspace( reax_system *system, control_params *control, workspace->f_old = (rvec *) malloc( system->N * sizeof( rvec ) ); workspace->v_const = (rvec *) malloc( system->N * sizeof( rvec ) ); - /* storage for analysis */ - if( control->molec_anal || control->diffusion_coef ) + if ( control->molec_anal || control->diffusion_coef ) { workspace->mark = (int *) calloc( system->N, sizeof(int) ); workspace->old_mark = (int *) calloc( system->N, sizeof(int) ); } - else + else + { workspace->mark = workspace->old_mark = NULL; + } - if( control->diffusion_coef ) + if ( control->diffusion_coef ) + { workspace->x_old = (rvec *) calloc( system->N, sizeof( rvec ) ); - else workspace->x_old = NULL; - + } + else + { + workspace->x_old = NULL; + } #ifdef TEST_FORCES workspace->dDelta = (rvec *) malloc( system->N * sizeof( rvec ) ); @@ -344,9 +401,14 @@ void Init_Workspace( reax_system *system, control_params *control, workspace->realloc.gcell_atoms = -1; Reset_Workspace( system, workspace ); + + /* Initialize Taper function */ + Init_Taper( control ); } -void compare_far_neighbors (int *test, int *start, int *end, far_neighbor_data *data, list *slist, int N) + +void compare_far_neighbors( int *test, int *start, int *end, + far_neighbor_data *data, list *slist, int N ) { int index = 0; int count = 0; @@ -369,16 +431,19 @@ void compare_far_neighbors (int *test, int *start, int *end, far_neighbor_data * } */ - - for (i = 0; i < N; i++){ - index = Start_Index (i, slist); + for (i = 0; i < N; i++) + { + index = Start_Index( i, slist ); //fprintf (stderr, "GPU : Neighbors of atom --> %d (start: %d , end: %d )\n", i, start[i], end[i]); - - for (j = start[i]; j < end[i]; j++){ + for (j = start[i]; j < end[i]; j++) + { gpu = data[j]; - if (i < data[j].nbr) continue; + if (i < data[j].nbr) + { + continue; + } /* if (i < data[j].nbr) { //fprintf (stderr, " atom %d and neighbor %d @ index %d\n", i, data[j].nbr, j); @@ -386,7 +451,6 @@ void compare_far_neighbors (int *test, int *start, int *end, far_neighbor_data * int dest = i; int x; - for (x = start[src]; x < end[src]; x++) { if (data[x].nbr != dest) continue; @@ -431,9 +495,11 @@ void compare_far_neighbors (int *test, int *start, int *end, far_neighbor_data * cpu = slist->select.far_nbr_list[index]; //if ( (gpu.nbr != cpu.nbr) || (gpu.d != cpu.d) ){ //if ( (gpu->d != cpu->d) ){ - if ( (gpu.nbr != cpu.nbr) || (gpu.d != cpu.d) || - (cpu.dvec[0] != gpu.dvec[0]) || (cpu.dvec[1] != gpu.dvec[1]) || (cpu.dvec[2] != gpu.dvec[2]) || - (cpu.rel_box[0] != gpu.rel_box[0]) || (cpu.rel_box[1] != gpu.rel_box[1]) || (cpu.rel_box[2] != gpu.rel_box[2])) { + if ( (gpu.nbr != cpu.nbr) || (gpu.d != cpu.d) + ||(cpu.dvec[0] != gpu.dvec[0]) || (cpu.dvec[1] != gpu.dvec[1]) + || (cpu.dvec[2] != gpu.dvec[2]) || (cpu.rel_box[0] != gpu.rel_box[0]) + || (cpu.rel_box[1] != gpu.rel_box[1]) || (cpu.rel_box[2] != gpu.rel_box[2])) + { //if ( (gpu.dvec[0] != i) || (gpu.dvec[1] != i) ||(gpu.dvec[2] != i) || // (gpu.rel_box[0] != i) || (gpu.rel_box[1] != i) ||(gpu.rel_box[2] != i) ) { //if (memcmp (&gpu, &cpu, FAR_NEIGHBOR_SIZE - RVEC_SIZE - INT_SIZE )){ @@ -457,16 +523,16 @@ void compare_far_neighbors (int *test, int *start, int *end, far_neighbor_data * count ++; } - //fprintf (stderr, "GPU (neighbor %d , d %d )\n", gpu->nbr, gpu->d); - index ++; + //fprintf (stderr, "GPU (neighbor %d , d %d )\n", gpu->nbr, gpu->d); + index ++; } - if (index != End_Index (i, slist)) + if (index != End_Index( i, slist )) { fprintf( stderr, "End index does not match for atom --> %d end index (%d) Cpu (%d, %d ) gpu (%d, %d)\n", i, index, Start_Index (i, slist), End_Index(i, slist), - start[i], end[i]); + start[i], end[i] ); exit( 10 ); } } @@ -518,112 +584,118 @@ void compare_far_neighbors (int *test, int *start, int *end, far_neighbor_data * } -void Init_Lists( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) +void Init_Lists( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, + list **lists, output_controls *out_control ) { int i, num_nbrs, num_hbonds, num_bonds, num_3body, Htop; int *hb_top, *bond_top; - real t_start, t_elapsed; - num_nbrs = Estimate_NumNeighbors( system, control, workspace, lists ); - -#ifdef __DEBUG_CUDA__ - fprintf (stderr, "Serial NumNeighbors ---> %d \n", num_nbrs); -#endif - - if( !Make_List(system->N, num_nbrs, TYP_FAR_NEIGHBOR, (*lists)+FAR_NBRS ) ) { + if ( !Make_List(system->N, num_nbrs, TYP_FAR_NEIGHBOR, (*lists) + FAR_NBRS) ) + { fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n"); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } #if defined(DEBUG_FOCUS) - fprintf( stderr, "memory allocated: far_nbrs = %ldMB\n", - num_nbrs * sizeof(far_neighbor_data) / (1024*1024) ); -#endif - - t_start = Get_Time (); - Generate_Neighbor_Lists(system,control,data,workspace,lists,out_control); - t_elapsed = Get_Timing_Info ( t_start ); - -#ifdef __DEBUG_CUDA__ - fprintf (stderr, " Timing Generate Neighbors %lf \n", t_elapsed ); + fprintf( stderr, "memory allocated: far_nbrs = %ldMB\n", + num_nbrs * sizeof(far_neighbor_data) / (1024 * 1024) ); #endif + Generate_Neighbor_Lists(system, control, data, workspace, lists, out_control); Htop = 0; hb_top = (int*) calloc( system->N, sizeof(int) ); bond_top = (int*) calloc( system->N, sizeof(int) ); num_3body = 0; - Estimate_Storage_Sizes( system, control, lists, + Estimate_Storage_Sizes( system, control, lists, &Htop, hb_top, bond_top, &num_3body ); - Allocate_Matrix( &(workspace->H), system->N, Htop ); - + if ( Allocate_Matrix( workspace->H, system->N, Htop ) == FAILURE ) + { + fprintf( stderr, "Not enough space for init matrices. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + /* TODO: better estimate for H_sp? + * If so, need to refactor Estimate_Storage_Sizes + * to use various cut-off distances as parameters + * (non-bonded, hydrogen, 3body, etc.) */ + if ( Allocate_Matrix( workspace->H_sp, system->N, Htop ) == FAILURE ) + { + fprintf( stderr, "Not enough space for init matrices. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - Htop: %d\n", Htop ); - fprintf( stderr, "memory allocated: H = %ldMB\n", - Htop * sizeof(sparse_matrix_entry) / (1024*1024) ); + fprintf( stderr, "memory allocated: H = %ldMB\n", + Htop * sizeof(sparse_matrix_entry) / (1024 * 1024) ); #endif workspace->num_H = 0; - if( control->hb_cut > 0 ) { + if ( control->hb_cut > 0 ) + { /* init H indexes */ - for( i = 0; i < system->N; ++i ) - if( system->reaxprm.sbp[ system->atoms[i].type ].p_hbond == 1 ) // H atom + for ( i = 0; i < system->N; ++i ) + { + // H atom + if ( system->reaxprm.sbp[ system->atoms[i].type ].p_hbond == 1 ) + { workspace->hbond_index[i] = workspace->num_H++; - else workspace->hbond_index[i] = -1; - - Allocate_HBond_List( system->N, workspace->num_H, workspace->hbond_index, - hb_top, (*lists)+HBONDS ); - num_hbonds = hb_top[system->N-1]; + } + else + { + workspace->hbond_index[i] = -1; + } + } -#ifdef __DEBUG_CUDA__ - fprintf( stderr, "Serial num_hbonds: %d\n", num_hbonds ); -#endif + Allocate_HBond_List( system->N, workspace->num_H, workspace->hbond_index, + hb_top, (*lists) + HBONDS ); + num_hbonds = hb_top[system->N - 1]; #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - num_hbonds: %d\n", num_hbonds ); - fprintf( stderr, "memory allocated: hbonds = %ldMB\n", - num_hbonds * sizeof(hbond_data) / (1024*1024) ); + fprintf( stderr, "memory allocated: hbonds = %ldMB\n", + num_hbonds * sizeof(hbond_data) / (1024 * 1024) ); #endif } /* bonds list */ - Allocate_Bond_List( system->N, bond_top, (*lists)+BONDS ); - num_bonds = bond_top[system->N-1]; + Allocate_Bond_List( system->N, bond_top, (*lists) + BONDS ); + num_bonds = bond_top[system->N - 1]; #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - num_bonds: %d\n", num_bonds ); - fprintf( stderr, "memory allocated: bonds = %ldMB\n", - num_bonds * sizeof(bond_data) / (1024*1024) ); + fprintf( stderr, "memory allocated: bonds = %ldMB\n", + num_bonds * sizeof(bond_data) / (1024 * 1024) ); #endif -#ifdef __DEBUG_CUDA__ - fprintf (stderr, " host num_3body : %d \n", num_3body); - fprintf (stderr, " host num_bonds : %d \n", num_bonds); -#endif +//fprintf (stderr, " **** sizeof 3 body : %d \n", sizeof (three_body_interaction_data)); +//fprintf (stderr, " **** num_3body : %d \n", num_3body); +//fprintf (stderr, " **** num_bonds : %d \n", num_bonds); /* 3bodies list */ - if(!Make_List(num_bonds, num_3body, TYP_THREE_BODY, (*lists)+THREE_BODIES )) { + if (!Make_List(num_bonds, num_3body, TYP_THREE_BODY, (*lists) + THREE_BODIES)) + { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - num_3body: %d\n", num_3body ); - fprintf( stderr, "memory allocated: 3-body = %ldMB\n", - num_3body * sizeof(three_body_interaction_data) / (1024*1024) ); + fprintf( stderr, "memory allocated: 3-body = %ldMB\n", + num_3body * sizeof(three_body_interaction_data) / (1024 * 1024) ); #endif #ifdef TEST_FORCES - if(!Make_List( system->N, num_bonds * 8, TYP_DDELTA, (*lists) + DDELTA )) { + if (!Make_List( system->N, num_bonds * 8, TYP_DDELTA, (*lists) + DDELTA )) + { fprintf( stderr, "Problem in initializing dDelta list. Terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } - if( !Make_List( num_bonds, num_bonds*MAX_BONDS*3, TYP_DBO, (*lists)+DBO ) ) { + if ( !Make_List( num_bonds, num_bonds * MAX_BONDS * 3, TYP_DBO, (*lists) + DBO ) ) + { fprintf( stderr, "Problem in initializing dBO list. Terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } #endif @@ -632,83 +704,91 @@ void Init_Lists( reax_system *system, control_params *control, } -void Init_Out_Controls(reax_system *system, control_params *control, +void Init_Out_Controls(reax_system *system, control_params *control, static_storage *workspace, output_controls *out_control) { char temp[1000]; /* Init trajectory file */ - if( out_control->write_steps > 0 ) { + if ( out_control->write_steps > 0 ) + { strcpy( temp, control->sim_name ); strcat( temp, ".trj" ); out_control->trj = fopen( temp, "w" ); out_control->write_header( system, control, workspace, out_control ); } - if( out_control->energy_update_freq > 0 ) { + if ( out_control->energy_update_freq > 0 ) + { /* Init out file */ strcpy( temp, control->sim_name ); strcat( temp, ".out" ); out_control->out = fopen( temp, "w" ); fprintf( out_control->out, "%-6s%16s%16s%16s%11s%11s%13s%13s%13s\n", - "step", "total energy", "poten. energy", "kin. energy", - "temp.", "target", "volume", "press.", "target" ); + "step", "total energy", "poten. energy", "kin. energy", + "temp.", "target", "volume", "press.", "target" ); fflush( out_control->out ); /* Init potentials file */ strcpy( temp, control->sim_name ); strcat( temp, ".pot" ); out_control->pot = fopen( temp, "w" ); - fprintf( out_control->pot, - "%-6s%13s%13s%13s%13s%13s%13s%13s%13s%13s%13s%13s\n", - "step", "ebond", "eatom", "elp", "eang", "ecoa", "ehb", - "etor", "econj", "evdw","ecoul", "epol" ); + fprintf( out_control->pot, + "%-6s%13s%13s%13s%13s%13s%13s%13s%13s%13s%13s%13s\n", + "step", "ebond", "eatom", "elp", "eang", "ecoa", "ehb", + "etor", "econj", "evdw", "ecoul", "epol" ); fflush( out_control->pot ); /* Init log file */ strcpy( temp, control->sim_name ); strcat( temp, ".log" ); out_control->log = fopen( temp, "w" ); - fprintf( out_control->log, "%-6s%10s%10s%10s%10s%10s%10s%10s\n", - "step", "total", "neighbors", "init", "bonded", - "nonbonded", "QEq", "matvec" ); + fprintf( out_control->log, "%-6s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n", + "step", "total", "neighbors", "init", "bonded", + "nonbonded", "QEq", "QEq Sort", "S iters", "Pre Comp", "Pre App", + "S spmv", "S vec ops", "S orthog", "S tsolve" ); } /* Init pressure file */ - if( control->ensemble == NPT || - control->ensemble == iNPT || - control->ensemble == sNPT ) { + if ( control->ensemble == NPT || + control->ensemble == iNPT || + control->ensemble == sNPT ) + { strcpy( temp, control->sim_name ); strcat( temp, ".prs" ); out_control->prs = fopen( temp, "w" ); fprintf( out_control->prs, "%-6s%13s%13s%13s%13s%13s%13s%13s%13s\n", - "step", "norm_x", "norm_y", "norm_z", - "press_x", "press_y", "press_z", "target_p", "volume" ); + "step", "norm_x", "norm_y", "norm_z", + "press_x", "press_y", "press_z", "target_p", "volume" ); fflush( out_control->prs ); } /* Init molecular analysis file */ - if( control->molec_anal ) { + if ( control->molec_anal ) + { sprintf( temp, "%s.mol", control->sim_name ); out_control->mol = fopen( temp, "w" ); - if( control->num_ignored ) { + if ( control->num_ignored ) + { sprintf( temp, "%s.ign", control->sim_name ); out_control->ign = fopen( temp, "w" ); - } + } } /* Init electric dipole moment analysis file */ - if( control->dipole_anal ) { + if ( control->dipole_anal ) + { strcpy( temp, control->sim_name ); strcat( temp, ".dpl" ); out_control->dpl = fopen( temp, "w" ); - fprintf( out_control->dpl, - "Step Molecule Count Avg. Dipole Moment Norm\n" ); + fprintf( out_control->dpl, + "Step Molecule Count Avg. Dipole Moment Norm\n" ); fflush( out_control->dpl ); } /* Init diffusion coef analysis file */ - if( control->diffusion_coef ) { + if ( control->diffusion_coef ) + { strcpy( temp, control->sim_name ); strcat( temp, ".drft" ); out_control->drft = fopen( temp, "w" ); @@ -836,21 +916,22 @@ void Init_Out_Controls(reax_system *system, control_params *control, #endif /* Error handling */ - /* if ( out_control->out == NULL || out_control->pot == NULL || - out_control->log == NULL || out_control->mol == NULL || - out_control->dpl == NULL || out_control->drft == NULL || + /* if ( out_control->out == NULL || out_control->pot == NULL || + out_control->log == NULL || out_control->mol == NULL || + out_control->dpl == NULL || out_control->drft == NULL || out_control->pdb == NULL ) { fprintf( stderr, "FILE OPEN ERROR. TERMINATING..." ); - exit( CANNOT_OPEN_OUTFILE ); + exit( CANNOT_OPEN_FILE ); }*/ } -void Initialize(reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, list **lists, +void Initialize(reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control, evolve_function *Evolve) { + real start, end; Randomize(); Init_System( system, control, data ); @@ -870,10 +951,16 @@ void Initialize(reax_system *system, control_params *control, Init_Force_Test_Functions( ); #endif - if( control->tabulate ) + if ( control->tabulate ) + { + start = Get_Time (); Make_LR_Lookup_Table( system, control ); + end = Get_Timing_Info (start); + + //fprintf (stderr, "Time for LR Lookup Table calculation is %f \n", end ); + } #if defined(DEBUG_FOCUS) - fprintf( stderr, "data structures have been initialized...\n" ); + fprintf( stderr, "data structures have been initialized...\n" ); #endif } diff --git a/PuReMD-GPU/src/init_md.h b/PuReMD-GPU/src/init_md.h index 8c23806594a8f2b107ddb884efbf68e7b5fe27ff..947d81e6e50e96f325742c6d024c6011a900152c 100644 --- a/PuReMD-GPU/src/init_md.h +++ b/PuReMD-GPU/src/init_md.h @@ -31,10 +31,10 @@ extern "C" { void Initialize( reax_system*, control_params*, simulation_data*, static_storage*, list**, output_controls*, evolve_function* ); -void Generate_Initial_Velocities(reax_system *, real ); +void Generate_Initial_Velocities( reax_system *, real ); -void Init_Out_Controls(reax_system *, control_params *, static_storage *, - output_controls *); +void Init_Out_Controls( reax_system *, control_params *, static_storage *, + output_controls * ); #ifdef __cplusplus } diff --git a/PuReMD-GPU/src/integrate.c b/PuReMD-GPU/src/integrate.c index 482a9c89a302c052e9ac44ae2de446c61b1c6a3e..d65406f8824697a396c65337bd197ad771320e81 100644 --- a/PuReMD-GPU/src/integrate.c +++ b/PuReMD-GPU/src/integrate.c @@ -1,32 +1,32 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ #include "integrate.h" - #include "allocate.h" #include "box.h" #include "forces.h" #include "grid.h" #include "neighbors.h" #include "print_utils.h" -#include "QEq.h" +#include "qeq.h" #include "reset_utils.h" #include "restart.h" #include "system_props.h" @@ -34,9 +34,10 @@ #include "list.h" -void Velocity_Verlet_NVE(reax_system* system, control_params* control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) + +void Velocity_Verlet_NVE(reax_system* system, control_params* control, + simulation_data *data, static_storage *workspace, + list **lists, output_controls *out_control ) { int i, steps, renbr; real inv_m, dt, dt_sqr; @@ -46,53 +47,50 @@ void Velocity_Verlet_NVE(reax_system* system, control_params* control, dt_sqr = SQR(dt); steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); - -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "step%d: ", data->step ); #endif - for( i = 0; i < system->N; i++ ) { + for ( i = 0; i < system->N; i++ ) + { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; - rvec_ScaledSum( dx, dt, system->atoms[i].v, - 0.5 * dt_sqr * -F_CONV * inv_m, system->atoms[i].f ); + rvec_ScaledSum( dx, dt, system->atoms[i].v, + 0.5 * dt_sqr * -F_CONV * inv_m, system->atoms[i].f ); Inc_on_T3( system->atoms[i].x, dx, &( system->box ) ); - rvec_ScaledAdd( system->atoms[i].v, - 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); + rvec_ScaledAdd( system->atoms[i].v, + 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); } - -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "verlet1 - "); #endif Reallocate( system, workspace, lists, renbr ); Reset( system, control, data, workspace, lists ); - if( renbr ) - { - Generate_Neighbor_Lists( system, control, data, workspace, - lists, out_control ); - } + if ( renbr ) + Generate_Neighbor_Lists( system, control, data, workspace, + lists, out_control ); Compute_Forces( system, control, data, workspace, lists, out_control ); - for( i = 0; i < system->N; i++ ) + for ( i = 0; i < system->N; i++ ) { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; - rvec_ScaledAdd( system->atoms[i].v, - 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); + rvec_ScaledAdd( system->atoms[i].v, + 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); } - -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "verlet2\n"); #endif } -void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, - control_params* control, - simulation_data *data, - static_storage *workspace, - list **lists, + +void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, + control_params* control, + simulation_data *data, + static_storage *workspace, + list **lists, output_controls *out_control ) { int i, itr, steps, renbr; @@ -106,22 +104,17 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, therm = &( data->therm ); steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); - #if defined(DEBUG_FOCUS) fprintf( stderr, "step%d: ", data->step ); #endif -#ifdef __DEBUG_CUDA__ - fprintf (stderr, " Entering Velocity_Verlet_Nose_Hoover_NVT_Klein: coef to update velocity --> %6.10f\n", therm->v_xi_old); -#endif - /* Compute x(t + dt) and copy old forces */ - for (i=0; i < system->N; i++) + for (i = 0; i < system->N; i++) { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; rvec_ScaledSum( dx, dt - 0.5 * dt_sqr * therm->v_xi, system->atoms[i].v, - 0.5 * dt_sqr * inv_m * -F_CONV, system->atoms[i].f ); + 0.5 * dt_sqr * inv_m * -F_CONV, system->atoms[i].f ); Inc_on_T3( system->atoms[i].x, dx, &(system->box) ); @@ -129,105 +122,88 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, } /* Compute xi(t + dt) */ therm->xi += ( therm->v_xi * dt + 0.5 * dt_sqr * therm->G_xi ); - #if defined(DEBUG_FOCUS) fprintf( stderr, "verlet1 - " ); #endif Reallocate( system, workspace, lists, renbr ); Reset( system, control, data, workspace, lists ); - - if( renbr ) - { - Generate_Neighbor_Lists( system, control, data, workspace, - lists, out_control ); - } - + if ( renbr ) + Generate_Neighbor_Lists( system, control, data, workspace, + lists, out_control ); /* Calculate Forces at time (t + dt) */ - Compute_Forces( system,control,data, workspace, lists, out_control ); + Compute_Forces( system, control, data, workspace, lists, out_control ); /* Compute iteration constants for each atom's velocity */ - for( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N; ++i ) { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; - rvec_Scale( workspace->v_const[i], - 1.0 - 0.5 * dt * therm->v_xi, system->atoms[i].v ); - rvec_ScaledAdd( workspace->v_const[i], - 0.5 * dt * inv_m * -F_CONV, workspace->f_old[i] ); - rvec_ScaledAdd( workspace->v_const[i], - 0.5 * dt * inv_m * -F_CONV, system->atoms[i].f ); + rvec_Scale( workspace->v_const[i], + 1.0 - 0.5 * dt * therm->v_xi, system->atoms[i].v ); + rvec_ScaledAdd( workspace->v_const[i], + 0.5 * dt * inv_m * -F_CONV, workspace->f_old[i] ); + rvec_ScaledAdd( workspace->v_const[i], + 0.5 * dt * inv_m * -F_CONV, system->atoms[i].f ); #if defined(DEBUG) - fprintf( stderr, "atom%d: inv_m=%f, C1=%f, C2=%f, v_const=%f %f %f\n", - i, inv_m, 1.0 - 0.5 * dt * therm->v_xi, - 0.5 * dt * inv_m * -F_CONV, workspace->v_const[i][0], - workspace->v_const[i][1], workspace->v_const[i][2] ); + fprintf( stderr, "atom%d: inv_m=%f, C1=%f, C2=%f, v_const=%f %f %f\n", + i, inv_m, 1.0 - 0.5 * dt * therm->v_xi, + 0.5 * dt * inv_m * -F_CONV, workspace->v_const[i][0], + workspace->v_const[i][1], workspace->v_const[i][2] ); #endif } v_xi_new = therm->v_xi_old + 2.0 * dt * therm->G_xi; E_kin_new = G_xi_new = v_xi_old = 0; itr = 0; - do { - itr++; + do + { + itr++; /* new values become old in this iteration */ v_xi_old = v_xi_new; coef_v = 1.0 / (1.0 + 0.5 * dt * v_xi_old); E_kin_new = 0; - -#ifdef __DEBUG_CUDA__ - fprintf (stderr, " *********** coef to update velocity --> %6.10f, %6.10f, %6.10f\n", coef_v, dt, therm->v_xi_old); - //print_sys_atoms (system); -#endif - - for( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N; ++i ) { rvec_Scale( system->atoms[i].v, coef_v, workspace->v_const[i] ); - E_kin_new += ( 0.5*system->reaxprm.sbp[system->atoms[i].type].mass * - rvec_Dot( system->atoms[i].v, system->atoms[i].v ) ); + E_kin_new += ( 0.5 * system->reaxprm.sbp[system->atoms[i].type].mass * + rvec_Dot( system->atoms[i].v, system->atoms[i].v ) ); #if defined(DEBUG) - fprintf( stderr, "itr%d-atom%d: coef_v = %f, v_xi_old = %f\n", - itr, i, coef_v, v_xi_old ); + fprintf( stderr, "itr%d-atom%d: coef_v = %f, v_xi_old = %f\n", + itr, i, coef_v, v_xi_old ); #endif } - G_xi_new = control->Tau_T * ( 2.0 * E_kin_new - - data->N_f * K_B * control->T ); + G_xi_new = control->Tau_T * ( 2.0 * E_kin_new - + data->N_f * K_B * control->T ); v_xi_new = therm->v_xi + 0.5 * dt * ( therm->G_xi + G_xi_new ); - #if defined(DEBUG) fprintf( stderr, "itr%d: G_xi_new = %f, v_xi_new = %f, v_xi_old = %f\n", - itr, G_xi_new, v_xi_new, v_xi_old ); + itr, G_xi_new, v_xi_new, v_xi_old ); #endif } - while( fabs(v_xi_new - v_xi_old ) > 1e-5 ); + while ( fabs(v_xi_new - v_xi_old ) > 1e-5 ); -#ifdef __DEBUG_CUDA__ - fprintf (stderr, " Iteration Count in NVE --> %d \n", itr ); -#endif - -#ifndef __BUILD_DEBUG__ therm->v_xi_old = therm->v_xi; therm->v_xi = v_xi_new; - therm->G_xi = G_xi_new; -#endif - -#if defined(DEBUG_FOCUS) - fprintf( stderr,"vel scale\n" ); -#endif + therm->G_xi = G_xi_new; +#if defined(DEBUG_FOCUS) + fprintf( stderr, "vel scale\n" ); +#endif } -/* uses Berendsen-type coupling for both T and P. - All box dimensions are scaled by the same amount, + +/* uses Berendsen-type coupling for both T and P. + All box dimensions are scaled by the same amount, there is no change in the angles between axes. */ -void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, - control_params* control, +void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, + control_params* control, simulation_data *data, - static_storage *workspace, - list **lists, + static_storage *workspace, + list **lists, output_controls *out_control ) { int i, steps, renbr; @@ -237,94 +213,102 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, dt = control->dt; steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); - #if defined(DEBUG_FOCUS) - //fprintf( out_control->prs, - // "tau_t: %g tau_p: %g dt/tau_t: %g dt/tau_p: %g\n", + //fprintf( out_control->prs, + // "tau_t: %g tau_p: %g dt/tau_t: %g dt/tau_p: %g\n", //control->Tau_T, control->Tau_P, dt / control->Tau_T, dt / control->Tau_P ); fprintf( stderr, "step %d: ", data->step ); #endif /* velocity verlet, 1st part */ - for( i = 0; i < system->N; i++ ) + for ( i = 0; i < system->N; i++ ) { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; /* Compute x(t + dt) */ - rvec_ScaledSum( dx, dt, system->atoms[i].v, - 0.5 * -F_CONV * inv_m * SQR(dt), system->atoms[i].f ); + rvec_ScaledSum( dx, dt, system->atoms[i].v, + 0.5 * -F_CONV * inv_m * SQR(dt), system->atoms[i].f ); Inc_on_T3( system->atoms[i].x, dx, &(system->box) ); /* Compute v(t + dt/2) */ - rvec_ScaledAdd( system->atoms[i].v, - 0.5 * -F_CONV * inv_m * dt, system->atoms[i].f ); - /*fprintf( stderr, "%6d %15.8f %15.8f %15.8f %15.8f %15.8f %15.8f\n", - workspace->orig_id[i], + rvec_ScaledAdd( system->atoms[i].v, + 0.5 * -F_CONV * inv_m * dt, system->atoms[i].f ); + /*fprintf( stderr, "%6d %15.8f %15.8f %15.8f %15.8f %15.8f %15.8f\n", + workspace->orig_id[i], system->atoms[i].x[0], system->atoms[i].x[1], system->atoms[i].x[2], - 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[0], - 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[1], + 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[0], + 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[1], 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[2] ); */ } - -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "verlet1 - " ); #endif - Reallocate( system, workspace, lists, renbr ); + Reallocate( system, workspace, lists, renbr ); Reset( system, control, data, workspace, lists ); - if( renbr ) { + if ( renbr ) + { Update_Grid( system ); Generate_Neighbor_Lists( system, control, data, workspace, - lists, out_control ); + lists, out_control ); } Compute_Forces( system, control, data, workspace, lists, out_control ); /* velocity verlet, 2nd part */ - for( i = 0; i < system->N; i++ ) { + for ( i = 0; i < system->N; i++ ) + { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; /* Compute v(t + dt) */ - rvec_ScaledAdd( system->atoms[i].v, - 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); - /* fprintf( stderr, "%6d %15f %15f %15f %15.8f %15.8f %15.8f\n", - workspace->orig_id[i], + rvec_ScaledAdd( system->atoms[i].v, + 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); + /* fprintf( stderr, "%6d %15f %15f %15f %15.8f %15.8f %15.8f\n", + workspace->orig_id[i], system->atoms[i].v[0], system->atoms[i].v[1], system->atoms[i].v[2], - 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[0], - 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[1], + 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[0], + 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[1], 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[2] );*/ } - //Compute_Kinetic_Energy( system, data ); + //TODO: commented out for GPU version, why? +#ifndef HAVE_CUDA + Compute_Kinetic_Energy( system, data ); +#endif Compute_Pressure_Isotropic( system, control, data, out_control ); -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "verlet2 - " ); #endif /* pressure scaler */ mu = POW( 1.0 + (dt / control->Tau_P[0]) * (data->iso_bar.P - control->P[0]), - 1.0 / 3 ); - if( mu < MIN_dV ) + 1.0 / 3 ); + if ( mu < MIN_dV ) mu = MIN_dV; - else if( mu > MAX_dV ) + else if ( mu > MAX_dV ) mu = MAX_dV; /* temperature scaler */ lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0); - if( lambda < MIN_dT ) + if ( lambda < MIN_dT ) lambda = MIN_dT; else if (lambda > MAX_dT ) lambda = MAX_dT; lambda = SQRT( lambda ); /* Scale velocities and positions at t+dt */ - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { rvec_Scale( system->atoms[i].v, lambda, system->atoms[i].v ); - /* IMPORTANT: What Adri does with scaling positions first to - unit coordinates and then back to cartesian coordinates essentially - is scaling the coordinates with mu^2. However, this causes unphysical + /* IMPORTANT: What Adri does with scaling positions first to + unit coordinates and then back to cartesian coordinates essentially + is scaling the coordinates with mu^2. However, this causes unphysical modifications on the system because box dimensions are being scaled with mu! We need to discuss this with Adri! */ rvec_Scale( system->atoms[i].x, mu, system->atoms[i].x ); } - //Compute_Kinetic_Energy( system, data ); -#if defined(DEBUG_FOCUS) + //TODO: commented out for GPU version, why? +#ifndef HAVE_CUDA + Compute_Kinetic_Energy( system, data ); +#endif + +#if defined(DEBUG_FOCUS) fprintf( stderr, "scaling - " ); #endif @@ -335,14 +319,14 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, } -/* uses Berendsen-type coupling for both T and P. - All box dimensions are scaled by the same amount, +/* uses Berendsen-type coupling for both T and P. + All box dimensions are scaled by the same amount, there is no change in the angles between axes. */ -void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system, - control_params* control, +void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system, + control_params* control, simulation_data *data, - static_storage *workspace, - list **lists, + static_storage *workspace, + list **lists, output_controls *out_control ) { int i, d, steps, renbr; @@ -352,120 +336,139 @@ void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system, dt = control->dt; steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); - #if defined(DEBUG_FOCUS) - //fprintf( out_control->prs, - // "tau_t: %g tau_p: %g dt/tau_t: %g dt/tau_p: %g\n", + //fprintf( out_control->prs, + // "tau_t: %g tau_p: %g dt/tau_t: %g dt/tau_p: %g\n", //control->Tau_T, control->Tau_P, dt / control->Tau_T, dt / control->Tau_P ); fprintf( stderr, "step %d: ", data->step ); #endif /* velocity verlet, 1st part */ - for( i = 0; i < system->N; i++ ) { - inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; + for ( i = 0; i < system->N; i++ ) + { + inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; /* Compute x(t + dt) */ - rvec_ScaledSum( dx, dt, system->atoms[i].v, - 0.5 * -F_CONV * inv_m * SQR(dt), system->atoms[i].f ); + rvec_ScaledSum( dx, dt, system->atoms[i].v, + 0.5 * -F_CONV * inv_m * SQR(dt), system->atoms[i].f ); Inc_on_T3( system->atoms[i].x, dx, &(system->box) ); /* Compute v(t + dt/2) */ - rvec_ScaledAdd( system->atoms[i].v, - 0.5 * -F_CONV * inv_m * dt, system->atoms[i].f ); - /*fprintf( stderr, "%6d %15.8f %15.8f %15.8f %15.8f %15.8f %15.8f\n", - workspace->orig_id[i], + rvec_ScaledAdd( system->atoms[i].v, + 0.5 * -F_CONV * inv_m * dt, system->atoms[i].f ); + /*fprintf( stderr, "%6d %15.8f %15.8f %15.8f %15.8f %15.8f %15.8f\n", + workspace->orig_id[i], system->atoms[i].x[0], system->atoms[i].x[1], system->atoms[i].x[2], - 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[0], - 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[1], + 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[0], + 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[1], 0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[2] ); */ } - #if defined(DEBUG_FOCUS) fprintf( stderr, "verlet1 - " ); #endif - Reallocate( system, workspace, lists, renbr ); + Reallocate( system, workspace, lists, renbr ); Reset( system, control, data, workspace, lists ); - if( renbr ) { + if ( renbr ) + { Update_Grid( system ); - Generate_Neighbor_Lists( system, control, data, workspace, - lists, out_control ); + Generate_Neighbor_Lists( system, control, data, workspace, + lists, out_control ); } Compute_Forces( system, control, data, workspace, lists, out_control ); /* velocity verlet, 2nd part */ - for( i = 0; i < system->N; i++ ) { + for ( i = 0; i < system->N; i++ ) + { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; /* Compute v(t + dt) */ - rvec_ScaledAdd( system->atoms[i].v, - 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); - /* fprintf( stderr, "%6d %15f %15f %15f %15.8f %15.8f %15.8f\n", - workspace->orig_id[i], + rvec_ScaledAdd( system->atoms[i].v, + 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); + /* fprintf( stderr, "%6d %15f %15f %15f %15.8f %15.8f %15.8f\n", + workspace->orig_id[i], system->atoms[i].v[0], system->atoms[i].v[1], system->atoms[i].v[2], - 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[0], - 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[1], + 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[0], + 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[1], 0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[2] );*/ } - //Compute_Kinetic_Energy( system, data ); + //TODO: commented out for GPU version, why? +#ifndef HAVE_CUDA + Compute_Kinetic_Energy( system, data ); +#endif Compute_Pressure_Isotropic( system, control, data, out_control ); - -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "verlet2 - " ); #endif /* pressure scaler */ - for( d = 0; d < 3; ++d ){ - mu[d] = POW( 1.0+(dt/control->Tau_P[d])*(data->tot_press[d]-control->P[d]), - 1.0 / 3 ); - if( mu[d] < MIN_dV ) + for ( d = 0; d < 3; ++d ) + { + mu[d] = POW( 1.0 + (dt / control->Tau_P[d]) * (data->tot_press[d] - control->P[d]), + 1.0 / 3 ); + if ( mu[d] < MIN_dV ) + { mu[d] = MIN_dV; - else if( mu[d] > MAX_dV ) + } + else if ( mu[d] > MAX_dV ) + { mu[d] = MAX_dV; + } } /* temperature scaler */ lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0); - if( lambda < MIN_dT ) + if ( lambda < MIN_dT ) + { lambda = MIN_dT; + } else if (lambda > MAX_dT ) + { lambda = MAX_dT; + } lambda = SQRT( lambda ); /* Scale velocities and positions at t+dt */ - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { rvec_Scale( system->atoms[i].v, lambda, system->atoms[i].v ); - /* IMPORTANT: What Adri does with scaling positions first to - unit coordinates and then back to cartesian coordinates essentially - is scaling the coordinates with mu^2. However, this causes unphysical + /* IMPORTANT: What Adri does with scaling positions first to + unit coordinates and then back to cartesian coordinates essentially + is scaling the coordinates with mu^2. However, this causes unphysical modifications on the system because box dimensions are being scaled with mu! We need to discuss this with Adri! */ - for( d = 0; d < 3; ++d ) + for ( d = 0; d < 3; ++d ) system->atoms[i].x[d] = system->atoms[i].x[d] * mu[d]; } - //Compute_Kinetic_Energy( system, data ); -#if defined(DEBUG_FOCUS) + //TODO: commented out for GPU version, why? +#ifndef HAVE_CUDA + Compute_Kinetic_Energy( system, data ); +#endif + +#if defined(DEBUG_FOCUS) fprintf( stderr, "scaling - " ); #endif Update_Box_SemiIsotropic( &(system->box), mu ); -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "updated box & grid\n" ); #endif } + /************************************************/ /* BELOW FUNCTIONS ARE NOT BEING USED ANYMORE! */ /* */ /*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/ /*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/ /************************************************/ + #ifdef ANISOTROPIC -void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system, - control_params* control, - simulation_data *data, - static_storage *workspace, - list **lists, - output_controls *out_control ) +void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system, + control_params* control, + simulation_data *data, + static_storage *workspace, + list **lists, + output_controls *out_control ) { int i; real inv_m; @@ -473,73 +476,77 @@ void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system, real dt_sqr = SQR(dt); rvec dx; - for (i=0; i < system->N; i++) + for (i = 0; i < system->N; i++) { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; // Compute x(t + dt) - rvec_ScaledSum( dx, dt, system->atoms[i].v, - 0.5 * dt_sqr * -F_CONV * inv_m, system->atoms[i].f ); + rvec_ScaledSum( dx, dt, system->atoms[i].v, + 0.5 * dt_sqr * -F_CONV * inv_m, system->atoms[i].f ); Inc_on_T3_Gen( system->atoms[i].x, dx, &(system->box) ); // Compute v(t + dt/2) - rvec_ScaledAdd( system->atoms[i].v, - -0.5 * dt * data->therm.xi, system->atoms[i].v ); - rvec_ScaledAdd( system->atoms[i].v, - 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); + rvec_ScaledAdd( system->atoms[i].v, + -0.5 * dt * data->therm.xi, system->atoms[i].v ); + rvec_ScaledAdd( system->atoms[i].v, + 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); } // Compute zeta(t + dt/2), E_Kininetic(t + dt/2) // IMPORTANT: What will be the initial value of zeta? and what is g? - data->therm.xi += 0.5 * dt * control->Tau_T * - ( 2.0 * data->E_Kin - data->N_f * K_B * control->T ); + data->therm.xi += 0.5 * dt * control->Tau_T * + ( 2.0 * data->E_Kin - data->N_f * K_B * control->T ); Reset( system, control, data, workspace ); - fprintf(out_control->log,"reset-"); fflush( out_control->log ); + fprintf(out_control->log, "reset-"); + fflush( out_control->log ); - Generate_Neighbor_Lists( system, control, data, workspace, - lists, out_control ); - fprintf(out_control->log,"nbrs-"); fflush( out_control->log ); + Generate_Neighbor_Lists( system, control, data, workspace, + lists, out_control ); + fprintf(out_control->log, "nbrs-"); + fflush( out_control->log ); /* QEq( system, control, workspace, lists[FAR_NBRS], out_control ); fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */ Compute_Forces( system, control, data, workspace, lists, out_control ); - fprintf(out_control->log,"forces\n"); fflush( out_control->log ); + fprintf(out_control->log, "forces\n"); + fflush( out_control->log ); - //Compute_Kinetic_Energy( system, data ); + //TODO: commented out for GPU version, why? +#ifndef HAVE_CUDA + Compute_Kinetic_Energy( system, data ); +#endif - for( i = 0; i < system->N; i++ ) + for ( i = 0; i < system->N; i++ ) { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; // compute v(t + dt) - rvec_ScaledAdd( system->atoms[i].v, - -0.5 * dt * data->therm.xi, system->atoms[i].v ); - rvec_ScaledAdd( system->atoms[i].v, - 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); + rvec_ScaledAdd( system->atoms[i].v, + -0.5 * dt * data->therm.xi, system->atoms[i].v ); + rvec_ScaledAdd( system->atoms[i].v, + 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); } // Compute zeta(t + dt) - data->therm.xi += 0.5*dt * control->Tau_T * ( 2.0 * data->E_Kin - - data->N_f * K_B * control->T ); + data->therm.xi += 0.5 * dt * control->Tau_T * ( 2.0 * data->E_Kin - + data->N_f * K_B * control->T ); - fprintf( out_control->log,"Xi: %8.3f %8.3f %8.3f\n", - data->therm.xi, data->E_Kin, data->N_f * K_B * control->T ); + fprintf( out_control->log, "Xi: %8.3f %8.3f %8.3f\n", + data->therm.xi, data->E_Kin, data->N_f * K_B * control->T ); fflush( out_control->log ); } -void Velocity_Verlet_Isotropic_NPT( reax_system* system, - control_params* control, - simulation_data *data, - static_storage *workspace, - list **lists, + +void Velocity_Verlet_Isotropic_NPT( reax_system* system, control_params* control, + simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control ) { int i, itr; - real deps, v_eps_new=0, v_eps_old=0, G_xi_new; - real dxi, v_xi_new=0, v_xi_old=0, a_eps_new; + real deps, v_eps_new = 0, v_eps_old = 0, G_xi_new; + real dxi, v_xi_new = 0, v_xi_old = 0, a_eps_new; real inv_m, exp_deps, inv_3V; real E_kin, P_int, P_int_const; real coef_v, coef_v_eps; @@ -552,37 +559,37 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system, // Here we just calculate how much to increment eps, xi, v_eps, v_xi. // Commits are done after positions and velocities of atoms are updated - // because position, velocity updates uses v_eps, v_xi terms; - // yet we need EXP( deps ) to be able to calculate - // positions and velocities accurately. - iso_bar->a_eps = control->Tau_P * - ( 3.0 * box->volume * (iso_bar->P - control->P) + - 6.0 * data->E_Kin / data->N_f ) - iso_bar->v_eps * therm->v_xi; + // because position, velocity updates uses v_eps, v_xi terms; + // yet we need EXP( deps ) to be able to calculate + // positions and velocities accurately. + iso_bar->a_eps = control->Tau_P * + ( 3.0 * box->volume * (iso_bar->P - control->P) + + 6.0 * data->E_Kin / data->N_f ) - iso_bar->v_eps * therm->v_xi; deps = dt * iso_bar->v_eps + 0.5 * dt_sqr * iso_bar->a_eps; exp_deps = EXP( deps ); - therm->G_xi = control->Tau_T * ( 2.0 * data->E_Kin + - SQR( iso_bar->v_eps ) / control->Tau_P - - (data->N_f +1) * K_B * control->T ); + therm->G_xi = control->Tau_T * ( 2.0 * data->E_Kin + + SQR( iso_bar->v_eps ) / control->Tau_P - + (data->N_f + 1) * K_B * control->T ); dxi = therm->v_xi * dt + 0.5 * therm->G_xi * dt_sqr; - fprintf(out_control->log, "a: %12.6f eps: %12.6f deps: %12.6f\n", + fprintf(out_control->log, "a: %12.6f eps: %12.6f deps: %12.6f\n", iso_bar->a_eps, iso_bar->v_eps, iso_bar->eps); - fprintf(out_control->log, "G: %12.6f xi : %12.6f dxi : %12.6f\n", + fprintf(out_control->log, "G: %12.6f xi : %12.6f dxi : %12.6f\n", therm->G_xi, therm->v_xi, therm->xi ); // Update positions and velocities - // NOTE: v_old, v_xi_old, v_eps_old are meant to be the old values + // NOTE: v_old, v_xi_old, v_eps_old are meant to be the old values // in the iteration not the old values at time t or before! - for (i=0; i < system->N; i++) + for (i = 0; i < system->N; i++) { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; // Compute x(t + dt) - rvec_ScaledSum( workspace->a[i], -F_CONV * inv_m, system->atoms[i].f, - -( (2.0 + 3.0/data->N_f) * iso_bar->v_eps + therm->v_xi ), - system->atoms[i].v ); - rvec_ScaledSum( dx, dt, system->atoms[i].v, + rvec_ScaledSum( workspace->a[i], -F_CONV * inv_m, system->atoms[i].f, + -( (2.0 + 3.0 / data->N_f) * iso_bar->v_eps + therm->v_xi ), + system->atoms[i].v ); + rvec_ScaledSum( dx, dt, system->atoms[i].v, 0.5 * dt_sqr, workspace->a[i] ); Inc_on_T3( system->atoms[i].x, dx, &(system->box) ); rvec_Scale( system->atoms[i].x, exp_deps, system->atoms[i].x ); @@ -597,39 +604,40 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system, // Calculate new forces, f(t + dt) Reset( system, control, data, workspace ); - fprintf(out_control->log,"reset-"); fflush( out_control->log ); + fprintf(out_control->log, "reset-"); + fflush( out_control->log ); - Generate_Neighbor_Lists( system, control, data, workspace, - lists, out_control ); - fprintf(out_control->log,"nbrs-"); fflush( out_control->log ); + Generate_Neighbor_Lists( system, control, data, workspace, + lists, out_control ); + fprintf(out_control->log, "nbrs-"); + fflush( out_control->log ); /* QEq( system, control, workspace, lists[FAR_NBRS], out_control ); fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */ Compute_Forces( system, control, data, workspace, lists, out_control ); - fprintf(out_control->log,"forces\n"); fflush( out_control->log ); - + fprintf(out_control->log, "forces\n"); + fflush( out_control->log ); // Compute iteration constants for each atom's velocity and for P_internal // Compute kinetic energy for initial velocities of the iteration P_int_const = E_kin = 0; - for( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N; ++i ) { inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass; - rvec_ScaledSum( dv, 0.5 * dt, workspace->a[i], - 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); + rvec_ScaledSum( dv, 0.5 * dt, workspace->a[i], + 0.5 * dt * -F_CONV * inv_m, system->atoms[i].f ); rvec_Add( dv, system->atoms[i].v ); rvec_Scale( workspace->v_const[i], exp_deps, dv ); - P_int_const += ( -F_CONV * - rvec_Dot( system->atoms[i].f, system->atoms[i].x ) ); + P_int_const += ( -F_CONV * + rvec_Dot( system->atoms[i].f, system->atoms[i].x ) ); - E_kin += (0.5 * system->reaxprm.sbp[system->atoms[i].type].mass * - rvec_Dot( system->atoms[i].v, system->atoms[i].v ) ); + E_kin += (0.5 * system->reaxprm.sbp[system->atoms[i].type].mass * + rvec_Dot( system->atoms[i].v, system->atoms[i].v ) ); } - // Compute initial p_int inv_3V = 1.0 / (3.0 * system->box.volume); P_int = inv_3V * ( 2.0 * E_kin + P_int_const ); @@ -645,42 +653,38 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system, v_xi_old = v_xi_new; v_eps_old = v_eps_new; - - for( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N; ++i ) { - coef_v = 1.0 / (1.0 + 0.5 * dt * exp_deps * - ( (2.0 + 3.0/data->N_f) * v_eps_old + v_xi_old ) ); + coef_v = 1.0 / (1.0 + 0.5 * dt * exp_deps * + ( (2.0 + 3.0 / data->N_f) * v_eps_old + v_xi_old ) ); rvec_Scale( system->atoms[i].v, coef_v, workspace->v_const[i] ); } - coef_v_eps = 1.0 / (1.0 + 0.5 * dt * v_xi_old); - a_eps_new = 3.0 * control->Tau_P * - ( system->box.volume * (P_int - control->P) + 2.0 * E_kin / data->N_f ); - v_eps_new = coef_v_eps * ( iso_bar->v_eps + + a_eps_new = 3.0 * control->Tau_P * + ( system->box.volume * (P_int - control->P) + 2.0 * E_kin / data->N_f ); + v_eps_new = coef_v_eps * ( iso_bar->v_eps + 0.5 * dt * ( iso_bar->a_eps + a_eps_new ) ); - - G_xi_new = control->Tau_T * ( 2.0 * E_kin + - SQR( v_eps_old ) / control->Tau_P - + G_xi_new = control->Tau_T * ( 2.0 * E_kin + + SQR( v_eps_old ) / control->Tau_P - (data->N_f + 1) * K_B * control->T ); v_xi_new = therm->v_xi + 0.5 * dt * ( therm->G_xi + G_xi_new ); - E_kin = 0; - for( i = 0; i < system->N; ++i ) - E_kin += (0.5 * system->reaxprm.sbp[system->atoms[i].type].mass * - rvec_Dot( system->atoms[i].v, system->atoms[i].v ) ); - - P_int = inv_3V * ( 2.0*E_kin + P_int_const ); + for ( i = 0; i < system->N; ++i ) + { + E_kin += (0.5 * system->reaxprm.sbp[system->atoms[i].type].mass * + rvec_Dot( system->atoms[i].v, system->atoms[i].v ) ); + } + P_int = inv_3V * ( 2.0 * E_kin + P_int_const ); - fprintf( out_control->log, - "itr %d E_kin: %8.3f veps_n:%8.3f veps_o:%8.3f vxi_n:%8.3f vxi_o: %8.3f\n", - itr, E_kin, v_eps_new, v_eps_old, v_xi_new, v_xi_old ); + fprintf( out_control->log, + "itr %d E_kin: %8.3f veps_n:%8.3f veps_o:%8.3f vxi_n:%8.3f vxi_o: %8.3f\n", + itr, E_kin, v_eps_new, v_eps_old, v_xi_new, v_xi_old ); } - while( fabs(v_eps_new - v_eps_old) + fabs(v_xi_new - v_xi_old) > 2e-3 ); - + while ( FABS(v_eps_new - v_eps_old) + fabs(v_xi_new - v_xi_old) > 2e-3 ); therm->v_xi_old = therm->v_xi; therm->v_xi = v_xi_new; @@ -690,36 +694,30 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system, iso_bar->v_eps = v_eps_new; iso_bar->a_eps = a_eps_new; - fprintf( out_control->log, "V: %8.3ff\tsides{%8.3f, %8.3f, %8.3f}\n", - system->box.volume, - system->box.box[0][0],system->box.box[1][1],system->box.box[2][2] ); - fprintf(out_control->log,"eps:\ta- %8.3f v- %8.3f eps- %8.3f\n", + fprintf( out_control->log, "V: %8.3ff\tsides{%8.3f, %8.3f, %8.3f}\n", + system->box.volume, + system->box.box[0][0], system->box.box[1][1], system->box.box[2][2] ); + fprintf(out_control->log, "eps:\ta- %8.3f v- %8.3f eps- %8.3f\n", iso_bar->a_eps, iso_bar->v_eps, iso_bar->eps); - fprintf(out_control->log,"xi: \tG- %8.3f v- %8.3f xi - %8.3f\n", + fprintf(out_control->log, "xi: \tG- %8.3f v- %8.3f xi - %8.3f\n", therm->G_xi, therm->v_xi, therm->xi); } #endif -/* uses Berendsen-type coupling for both T and P. - All box dimensions are scaled by the same amount, +/* uses Berendsen-type coupling for both T and P. + All box dimensions are scaled by the same amount, there is no change in the angles between axes. */ -void Velocity_Verlet_Berendsen_NVT( reax_system* system, - control_params* control, - simulation_data *data, - static_storage *workspace, - list **lists, - output_controls *out_control - ) +void Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* control, + simulation_data *data, static_storage *workspace, list **lists, + output_controls *out_control ) { int i, steps, renbr; real inv_m, dt, lambda; rvec dx; reax_atom *atom; - fprintf (stderr, " Velocity_Verlet_Berendsen_NVT: step :%d \n", data->step); - #if defined(DEBUG_FOCUS) fprintf( stderr, "step%d\n", data->step ); #endif @@ -729,12 +727,19 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system, renbr = (steps % control->reneighbor == 0); /* velocity verlet, 1st part */ - for( i = 0; i < system->N; i++ ) { + for ( i = 0; i < system->N; i++ ) + { atom = &(system->atoms[i]); inv_m = 1.0 / system->reaxprm.sbp[atom->type].mass; /* Compute x(t + dt) */ rvec_ScaledSum( dx, dt, atom->v, 0.5 * -F_CONV * inv_m * SQR(dt), atom->f ); - rvec_Add( atom->x, dx ); + + //TODO: used rvec_Add in GPU version -- which is correct? + /* bNVT fix - Metin's suggestion */ + /* ORIGINAL CHANGE -- CHECK THE branch serial-bnvt for the fix */ + //rvec_Add( atom->x, dx ); + Inc_on_T3( atom->x, dx, &( system->box ) ); + /* Compute v(t + dt/2) */ rvec_ScaledAdd( atom->v, 0.5 * -F_CONV * inv_m * dt, atom->f ); } @@ -746,42 +751,50 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system, Reallocate( system, workspace, lists, renbr ); Reset( system, control, data, workspace, lists ); - if( renbr ) + if ( renbr ) + { Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); + } - Compute_Forces( system, control, data, workspace, - lists, out_control ); + Compute_Forces( system, control, data, workspace, lists, out_control ); /* velocity verlet, 2nd part */ - for( i = 0; i < system->N; i++ ) { + for ( i = 0; i < system->N; i++ ) + { atom = &(system->atoms[i]); inv_m = 1.0 / system->reaxprm.sbp[atom->type].mass; /* Compute v(t + dt) */ rvec_ScaledAdd( atom->v, 0.5 * dt * -F_CONV * inv_m, atom->f ); } -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf(stderr, "step%d: verlet2 done\n", data->step); #endif /* temperature scaler */ Compute_Kinetic_Energy( system, data ); lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0); - if( lambda < MIN_dT ) + if ( lambda < MIN_dT ) + { lambda = MIN_dT; + } else if (lambda > MAX_dT ) + { lambda = MAX_dT; + } lambda = SQRT( lambda ); + fprintf( stderr, "step:%d lambda -> %f \n", data->step, lambda ); + /* Scale velocities and positions at t+dt */ - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { atom = &(system->atoms[i]); rvec_Scale( atom->v, lambda, atom->v ); } Compute_Kinetic_Energy( system, data ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "step%d: scaled velocities\n", - data->step ); +#if defined(DEBUG_FOCUS) + fprintf( stderr, "step%d: scaled velocities\n", data->step ); #endif } diff --git a/PuReMD-GPU/src/integrate.h b/PuReMD-GPU/src/integrate.h index 6f5848f0de84e8a50ef2c5090194618b61f185fc..55b36c559f96d5e3d45ab69177e26f0d4136d8e1 100644 --- a/PuReMD-GPU/src/integrate.h +++ b/PuReMD-GPU/src/integrate.h @@ -23,29 +23,30 @@ #include "mytypes.h" + void Velocity_Verlet_NVE( reax_system*, control_params*, simulation_data*, static_storage*, list**, output_controls* ); + void Velocity_Verlet_Nose_Hoover_NVT( reax_system*, control_params*, - simulation_data*, static_storage*, - list**, output_controls* ); + simulation_data*, static_storage*, list**, output_controls* ); + void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system*, control_params*, - simulation_data*, static_storage*, - list**, output_controls* ); + simulation_data*, static_storage*, list**, output_controls* ); + void Velocity_Verlet_Flexible_NPT( reax_system*, control_params*, - simulation_data*, static_storage*, - list**, output_controls* ); + simulation_data*, static_storage*, list**, output_controls* ); + void Velocity_Verlet_Isotropic_NPT( reax_system*, control_params*, - simulation_data*, static_storage*, - list**, output_controls* ); + simulation_data*, static_storage*, list**, output_controls* ); + void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system*, control_params*, - simulation_data*, static_storage*, - list**, output_controls* ); + simulation_data*, static_storage*, list**, output_controls* ); + void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system*, control_params*, - simulation_data*, - static_storage*, list**, - output_controls* ); + simulation_data*, static_storage*, list**, output_controls* ); + void Velocity_Verlet_Berendsen_NVT( reax_system* , control_params* , - simulation_data *, static_storage *, - list **, output_controls * ); + simulation_data *, static_storage *, list **, output_controls * ); + #endif diff --git a/PuReMD-GPU/src/lin_alg.c b/PuReMD-GPU/src/lin_alg.c index cb141d475b0e2cf702901ed551287e0e238cdcd6..fb1e25bb83273730fcd74f82acad76ec7e5336e1 100644 --- a/PuReMD-GPU/src/lin_alg.c +++ b/PuReMD-GPU/src/lin_alg.c @@ -1,319 +1,1654 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ #include "lin_alg.h" +#include "allocate.h" #include "list.h" +#include "print_utils.h" +#include "tool_box.h" #include "vector.h" -#include "index_utils.h" -void Sparse_MatVec( sparse_matrix *A, real *x, real *b ) +typedef enum +{ + LOWER = 0, + UPPER = 1, +} TRIANGULARITY; + + +/* global to make OpenMP shared (Sparse_MatVec) */ +#ifdef _OPENMP +real *b_local = NULL; +#endif +/* global to make OpenMP shared (apply_preconditioner) */ +real *Dinv_L = NULL, *Dinv_U = NULL; +/* global to make OpenMP shared (tri_solve_level_sched) */ +int levels = 1; +int levels_L = 1, levels_U = 1; +unsigned int *row_levels_L = NULL, *level_rows_L = NULL, *level_rows_cnt_L = NULL; +unsigned int *row_levels_U = NULL, *level_rows_U = NULL, *level_rows_cnt_U = NULL; +unsigned int *row_levels, *level_rows, *level_rows_cnt; +unsigned int *top = NULL; +/* global to make OpenMP shared (graph_coloring) */ +unsigned int *color = NULL; +unsigned int *to_color = NULL; +unsigned int *conflict = NULL; +unsigned int *temp_ptr; +unsigned int *recolor = NULL; +unsigned int recolor_cnt; +unsigned int *color_top = NULL; +/* global to make OpenMP shared (sort_colors) */ +unsigned int *permuted_row_col = NULL; +unsigned int *permuted_row_col_inv = NULL; +real *y_p = NULL; +/* global to make OpenMP shared (permute_vector) */ +real *x_p = NULL; +unsigned int *mapping = NULL; +sparse_matrix *H_full; +sparse_matrix *H_p; +/* global to make OpenMP shared (jacobi_iter) */ +real *Dinv_b = NULL, *rp = NULL, *rp2 = NULL, *rp3 = NULL; + + +/* sparse matrix-vector product Ax=b + * where: + * A: lower triangular matrix, stored in CSR format + * x: vector + * b: vector (result) */ +static void Sparse_MatVec( const sparse_matrix * const A, + const real * const x, real * const b ) { int i, j, k, n, si, ei; real H; +#ifdef _OPENMP + unsigned int tid; +#endif n = A->n; - for( i = 0; i < n; ++i ) - b[i] = 0; + Vector_MakeZero( b, n ); + +#ifdef _OPENMP + tid = omp_get_thread_num(); + + #pragma omp master + { + + /* keep b_local for program duration to avoid allocate/free + * overhead per Sparse_MatVec call*/ + if ( b_local == NULL ) + { + if ( (b_local = (real*) malloc( omp_get_num_threads() * n * sizeof(real))) == NULL ) + { + exit( INSUFFICIENT_MEMORY ); + } + } + } + + #pragma omp barrier + + Vector_MakeZero( (real * const)b_local, omp_get_num_threads() * n ); + +#endif + #pragma omp for schedule(static) + for ( i = 0; i < n; ++i ) + { + si = A->start[i]; + ei = A->start[i + 1] - 1; + + for ( k = si; k < ei; ++k ) + { + j = A->j[k]; + H = A->val[k]; +#ifdef _OPENMP + b_local[tid * n + j] += H * x[i]; + b_local[tid * n + i] += H * x[j]; +#else + b[j] += H * x[i]; + b[i] += H * x[j]; +#endif + } + + // the diagonal entry is the last one in +#ifdef _OPENMP + b_local[tid * n + i] += A->val[k] * x[i]; +#else + b[i] += A->val[k] * x[i]; +#endif + } +#ifdef _OPENMP + #pragma omp for schedule(static) + for ( i = 0; i < n; ++i ) + { + for ( j = 0; j < omp_get_num_threads(); ++j ) + { + b[i] += b_local[j * n + i]; + } + } +#endif + +} + + +/* Transpose A and copy into A^T + * + * A: stored in CSR + * A_t: stored in CSR + */ +void Transpose( const sparse_matrix const *A, sparse_matrix const *A_t ) +{ + unsigned int i, j, pj, *A_t_top; + + if ( (A_t_top = (unsigned int*) calloc( A->n + 1, sizeof(unsigned int))) == NULL ) + { + fprintf( stderr, "Not enough space for matrix tranpose. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + memset( A_t->start, 0, (A->n + 1) * sizeof(unsigned int) ); + + /* count nonzeros in each column of A^T, store one row greater (see next loop) */ + for ( i = 0; i < A->n; ++i ) + { + for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) + { + ++A_t->start[A->j[pj] + 1]; + } + } + + /* setup the row pointers for A^T */ + for ( i = 1; i <= A->n; ++i ) + { + A_t_top[i] = A_t->start[i] = A_t->start[i] + A_t->start[i - 1]; + } + + /* fill in A^T */ + for ( i = 0; i < A->n; ++i ) + { + for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) + { + j = A->j[pj]; + A_t->j[A_t_top[j]] = i; + A_t->val[A_t_top[j]] = A->val[pj]; + ++A_t_top[j]; + } + } + + free( A_t_top ); +} + + +/* Transpose A in-place + * + * A: stored in CSR + */ +void Transpose_I( sparse_matrix * const A ) +{ + sparse_matrix * A_t; + + if ( Allocate_Matrix( A_t, A->n, A->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for transposing matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + Transpose( A, A_t ); + + memcpy( A->start, A_t->start, sizeof(int) * (A_t->n + 1) ); + memcpy( A->j, A_t->j, sizeof(int) * (A_t->start[A_t->n]) ); + memcpy( A->val, A_t->val, sizeof(real) * (A_t->start[A_t->n]) ); + + Deallocate_Matrix( A_t ); +} + + +/* Apply diagonal inverse (Jacobi) preconditioner to system residual + * + * Hdia_inv: diagonal inverse preconditioner (constructed using H) + * y: current residual + * x: preconditioned residual + * N: length of preconditioner and vectors (# rows in H) + */ +static void diag_pre_app( const real * const Hdia_inv, const real * const y, + real * const x, const int N ) +{ + unsigned int i; + + #pragma omp for schedule(static) + for ( i = 0; i < N; ++i ) + { + x[i] = y[i] * Hdia_inv[i]; + } +} + + +/* Solve triangular system LU*x = y using level scheduling + * + * LU: lower/upper triangular, stored in CSR + * y: constants in linear system (RHS) + * x: solution + * tri: triangularity of LU (lower/upper) + * + * Assumptions: + * LU has non-zero diagonals + * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ +static void tri_solve( const sparse_matrix * const LU, const real * const y, + real * const x, const TRIANGULARITY tri ) +{ + int i, pj, j, si, ei; + real val; + + #pragma omp master + { + if ( tri == LOWER ) + { + for ( i = 0; i < LU->n; ++i ) + { + x[i] = y[i]; + si = LU->start[i]; + ei = LU->start[i + 1]; + for ( pj = si; pj < ei - 1; ++pj ) + { + j = LU->j[pj]; + val = LU->val[pj]; + x[i] -= val * x[j]; + } + x[i] /= LU->val[pj]; + } + } + else + { + for ( i = LU->n - 1; i >= 0; --i ) + { + x[i] = y[i]; + si = LU->start[i]; + ei = LU->start[i + 1]; + for ( pj = si + 1; pj < ei; ++pj ) + { + j = LU->j[pj]; + val = LU->val[pj]; + x[i] -= val * x[j]; + } + x[i] /= LU->val[si]; + } + } + } +} + + +/* Solve triangular system LU*x = y using level scheduling + * + * LU: lower/upper triangular, stored in CSR + * y: constants in linear system (RHS) + * x: solution + * tri: triangularity of LU (lower/upper) + * find_levels: perform level search if positive, otherwise reuse existing levels + * + * Assumptions: + * LU has non-zero diagonals + * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ +static void tri_solve_level_sched( const sparse_matrix * const LU, const real * const y, + real * const x, const TRIANGULARITY tri, int find_levels ) +{ + int i, j, pj, local_row, local_level; + + #pragma omp master + { + if ( tri == LOWER ) + { + row_levels = row_levels_L; + level_rows = level_rows_L; + level_rows_cnt = level_rows_cnt_L; + levels = levels_L; + } + else + { + row_levels = row_levels_U; + level_rows = level_rows_U; + level_rows_cnt = level_rows_cnt_U; + levels = levels_U; + } + + if ( row_levels == NULL || level_rows == NULL || level_rows_cnt == NULL ) + { + if ( (row_levels = (unsigned int*) malloc((size_t)LU->n * sizeof(unsigned int))) == NULL + || (level_rows = (unsigned int*) malloc((size_t)LU->n * sizeof(unsigned int))) == NULL + || (level_rows_cnt = (unsigned int*) malloc((size_t)(LU->n + 1) * sizeof(unsigned int))) == NULL ) + { + fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + if ( top == NULL ) + { + if ( (top = (unsigned int*) malloc((size_t)(LU->n + 1) * sizeof(unsigned int))) == NULL ) + { + fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + /* find levels (row dependencies in substitutions) */ + if ( find_levels == TRUE ) + { + memset( row_levels, 0, LU->n * sizeof(unsigned int) ); + memset( level_rows_cnt, 0, LU->n * sizeof(unsigned int) ); + memset( top, 0, LU->n * sizeof(unsigned int) ); + levels = 1; + + if ( tri == LOWER ) + { + for ( i = 0; i < LU->n; ++i ) + { + local_level = 1; + for ( pj = LU->start[i]; pj < LU->start[i + 1] - 1; ++pj ) + { + local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); + } + + levels = MAX( levels, local_level ); + row_levels[i] = local_level; + ++level_rows_cnt[local_level]; + } + +//#if defined(DEBUG) + fprintf(stderr, "levels(L): %d\n", levels); + fprintf(stderr, "NNZ(L): %d\n", LU->start[LU->n]); +//#endif + } + else + { + for ( i = LU->n - 1; i >= 0; --i ) + { + local_level = 1; + for ( pj = LU->start[i] + 1; pj < LU->start[i + 1]; ++pj ) + { + local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); + } + + levels = MAX( levels, local_level ); + row_levels[i] = local_level; + ++level_rows_cnt[local_level]; + } + +//#if defined(DEBUG) + fprintf(stderr, "levels(U): %d\n", levels); + fprintf(stderr, "NNZ(U): %d\n", LU->start[LU->n]); +//#endif + } + + for ( i = 1; i < levels + 1; ++i ) + { + level_rows_cnt[i] += level_rows_cnt[i - 1]; + top[i] = level_rows_cnt[i]; + } + + for ( i = 0; i < LU->n; ++i ) + { + level_rows[top[row_levels[i] - 1]] = i; + ++top[row_levels[i] - 1]; + } + } + } + + #pragma omp barrier + + /* perform substitutions by level */ + if ( tri == LOWER ) + { + for ( i = 0; i < levels; ++i ) + { + #pragma omp for schedule(static) + for ( j = level_rows_cnt[i]; j < level_rows_cnt[i + 1]; ++j ) + { + local_row = level_rows[j]; + x[local_row] = y[local_row]; + for ( pj = LU->start[local_row]; pj < LU->start[local_row + 1] - 1; ++pj ) + { + x[local_row] -= LU->val[pj] * x[LU->j[pj]]; + + } + x[local_row] /= LU->val[pj]; + } + } + } + else + { + for ( i = 0; i < levels; ++i ) + { + #pragma omp for schedule(static) + for ( j = level_rows_cnt[i]; j < level_rows_cnt[i + 1]; ++j ) + { + local_row = level_rows[j]; + x[local_row] = y[local_row]; + for ( pj = LU->start[local_row] + 1; pj < LU->start[local_row + 1]; ++pj ) + { + x[local_row] -= LU->val[pj] * x[LU->j[pj]]; + + } + x[local_row] /= LU->val[LU->start[local_row]]; + } + } + } + + #pragma omp master + { + /* save level info for re-use if performing repeated triangular solves via preconditioning */ + if ( tri == LOWER ) + { + row_levels_L = row_levels; + level_rows_L = level_rows; + level_rows_cnt_L = level_rows_cnt; + levels_L = levels; + } + else + { + row_levels_U = row_levels; + level_rows_U = level_rows; + level_rows_cnt_U = level_rows_cnt; + levels_U = levels; + } + } + + #pragma omp barrier +} + + +static void compute_H_full( const sparse_matrix * const H ) +{ + int count, i, pj; + sparse_matrix *H_t; + + if ( Allocate_Matrix( H_t, H->n, H->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for full H. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* Set up the sparse matrix data structure for A. */ + Transpose( H, H_t ); + + count = 0; + for ( i = 0; i < H->n; ++i ) + { + H_full->start[i] = count; + + /* H: symmetric, lower triangular portion only stored */ + for ( pj = H->start[i]; pj < H->start[i + 1]; ++pj ) + { + H_full->val[count] = H->val[pj]; + H_full->j[count] = H->j[pj]; + ++count; + } + /* H^T: symmetric, upper triangular portion only stored; + * skip diagonal from H^T, as included from H above */ + for ( pj = H_t->start[i] + 1; pj < H_t->start[i + 1]; ++pj ) + { + H_full->val[count] = H_t->val[pj]; + H_full->j[count] = H_t->j[pj]; + ++count; + } + } + H_full->start[i] = count; + + Deallocate_Matrix( H_t ); +} + + +/* Iterative greedy shared-memory parallel graph coloring + * + * A: matrix to use for coloring, stored in CSR format; + * rows represent vertices, columns of entries within a row represent adjacent vertices + * (i.e., dependent rows for elimination during LU factorization) + * tri: triangularity of LU (lower/upper) + * color: vertex color (1-based) + * + * Reference: + * Umit V. Catalyurek et al. + * Graph Coloring Algorithms for Multi-core + * and Massively Threaded Architectures + * Parallel Computing, 2012 + */ +void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) +{ + #pragma omp parallel + { +#define MAX_COLOR (500) + int i, pj, v; + unsigned int temp; + int *fb_color; + + #pragma omp master + { + memset( color, 0, sizeof(unsigned int) * A->n ); + recolor_cnt = A->n; + } + + /* ordering of vertices to color depends on triangularity of factor + * for which coloring is to be used for */ + if ( tri == LOWER ) + { + #pragma omp for schedule(static) + for ( i = 0; i < A->n; ++i ) + { + to_color[i] = i; + } + } + else + { + #pragma omp for schedule(static) + for ( i = 0; i < A->n; ++i ) + { + to_color[i] = A->n - 1 - i; + } + } + + if ( (fb_color = (int*) malloc(sizeof(int) * MAX_COLOR)) == NULL ) + { + fprintf( stderr, "not enough memory for graph coloring. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + #pragma omp barrier + + while ( recolor_cnt > 0 ) + { + memset( fb_color, -1, sizeof(int) * MAX_COLOR ); + + /* color vertices */ + #pragma omp for schedule(static) + for ( i = 0; i < recolor_cnt; ++i ) + { + v = to_color[i]; + + /* colors of adjacent vertices are forbidden */ + for ( pj = A->start[v]; pj < A->start[v + 1]; ++pj ) + { + if ( v != A->j[pj] ) + { + fb_color[color[A->j[pj]]] = v; + } + } + + /* search for min. color which is not in conflict with adjacent vertices; + * start at 1 since 0 is default (invalid) color for all vertices */ + for ( pj = 1; fb_color[pj] == v; ++pj ); + + /* assign discovered color (no conflict in neighborhood of adjacent vertices) */ + color[v] = pj; + } + + /* determine if recoloring required */ + //TODO: switch to reduction on recolor_cnt (+) via parallel scan through recolor + #pragma omp master + { + temp = recolor_cnt; + recolor_cnt = 0; + + for ( i = 0; i < temp; ++i ) + { + v = to_color[i]; + + /* search for color conflicts with adjacent vertices */ + for ( pj = A->start[v]; pj < A->start[v + 1]; ++pj ) + { + if ( color[v] == color[A->j[pj]] && v > A->j[pj] ) + { + conflict[recolor_cnt] = v; + color[v] = 0; + ++recolor_cnt; + break; + } + } + } + + temp_ptr = to_color; + to_color = conflict; + conflict = temp_ptr; + } + + #pragma omp barrier + } + + free( fb_color ); + +//#if defined(DEBUG) +// #pragma omp master +// { +// for ( i = 0; i < A->n; ++i ) +// printf("Vertex: %5d, Color: %5d\n", i, color[i] ); +// } +//#endif + + #pragma omp barrier + } +} + + +/* Sort coloring + * + * n: number of entries in coloring + * tri: coloring to triangular factor to use (lower/upper) + */ +void sort_colors( const unsigned int n, const TRIANGULARITY tri ) +{ + unsigned int i; + + memset( color_top, 0, sizeof(unsigned int) * (n + 1) ); + + /* sort vertices by color (ascending within a color) + * 1) count colors + * 2) determine offsets of color ranges + * 3) sort by color + * + * note: color is 1-based */ + for ( i = 0; i < n; ++i ) + { + ++color_top[color[i]]; + } + for ( i = 1; i < n + 1; ++i ) + { + color_top[i] += color_top[i - 1]; + } + for ( i = 0; i < n; ++i ) + { + permuted_row_col[color_top[color[i] - 1]] = i; + ++color_top[color[i] - 1]; + } + + /* invert mapping to get map from current row/column to permuted (new) row/column */ + for ( i = 0; i < n; ++i ) + { + permuted_row_col_inv[permuted_row_col[i]] = i; + } +} + + +/* Apply permutation Q^T*x or Q*x based on graph coloring + * + * color: vertex color (1-based); vertices represent matrix rows/columns + * x: vector to permute (in-place) + * n: number of entries in x + * invert_map: if TRUE, use Q^T, otherwise use Q + * tri: coloring to triangular factor to use (lower/upper) + */ +static void permute_vector( real * const x, const unsigned int n, const int invert_map, + const TRIANGULARITY tri ) +{ + unsigned int i; + + #pragma omp master + { + if ( x_p == NULL ) + { + if ( (x_p = (real*) malloc(sizeof(real) * n)) == NULL ) + { + fprintf( stderr, "not enough memory for permuting vector. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + if ( invert_map == TRUE ) + { + mapping = permuted_row_col_inv; + } + else + { + mapping = permuted_row_col; + } + } + + #pragma omp barrier + + #pragma omp for schedule(static) + for ( i = 0; i < n; ++i ) + { + x_p[i] = x[mapping[i]]; + } + + #pragma omp master + { + memcpy( x, x_p, sizeof(real) * n ); + } + + #pragma omp barrier +} + + +/* Apply permutation Q^T*(LU)*Q based on graph coloring + * + * color: vertex color (1-based); vertices represent matrix rows/columns + * LU: matrix to permute, stored in CSR format + * tri: triangularity of LU (lower/upper) + */ +void permute_matrix( sparse_matrix * const LU, const TRIANGULARITY tri ) +{ + int i, pj, nr, nc; + sparse_matrix *LUtemp; + + if ( Allocate_Matrix( LUtemp, LU->n, LU->m ) == FAILURE ) + { + fprintf( stderr, "Not enough space for graph coloring (factor permutation). Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* count nonzeros in each row of permuted factor (re-use color_top for counting) */ + memset( color_top, 0, sizeof(unsigned int) * (LU->n + 1) ); + + if ( tri == LOWER ) + { + for ( i = 0; i < LU->n; ++i ) + { + nr = permuted_row_col_inv[i]; + + for ( pj = LU->start[i]; pj < LU->start[i + 1]; ++pj ) + { + nc = permuted_row_col_inv[LU->j[pj]]; + + if ( nc <= nr ) + { + ++color_top[nr + 1]; + } + /* correct entries to maintain triangularity (lower) */ + else + { + ++color_top[nc + 1]; + } + } + } + } + else + { + for ( i = LU->n - 1; i >= 0; --i ) + { + nr = permuted_row_col_inv[i]; + + for ( pj = LU->start[i]; pj < LU->start[i + 1]; ++pj ) + { + nc = permuted_row_col_inv[LU->j[pj]]; + + if ( nc >= nr ) + { + ++color_top[nr + 1]; + } + /* correct entries to maintain triangularity (upper) */ + else + { + ++color_top[nc + 1]; + } + } + } + } + + for ( i = 1; i < LU->n + 1; ++i ) + { + color_top[i] += color_top[i - 1]; + } + + memcpy( LUtemp->start, color_top, sizeof(unsigned int) * (LU->n + 1) ); + + /* permute factor */ + if ( tri == LOWER ) + { + for ( i = 0; i < LU->n; ++i ) + { + nr = permuted_row_col_inv[i]; + + for ( pj = LU->start[i]; pj < LU->start[i + 1]; ++pj ) + { + nc = permuted_row_col_inv[LU->j[pj]]; + + if ( nc <= nr ) + { + LUtemp->j[color_top[nr]] = nc; + LUtemp->val[color_top[nr]] = LU->val[pj]; + ++color_top[nr]; + } + /* correct entries to maintain triangularity (lower) */ + else + { + LUtemp->j[color_top[nc]] = nr; + LUtemp->val[color_top[nc]] = LU->val[pj]; + ++color_top[nc]; + } + } + } + } + else + { + for ( i = LU->n - 1; i >= 0; --i ) + { + nr = permuted_row_col_inv[i]; + + for ( pj = LU->start[i]; pj < LU->start[i + 1]; ++pj ) + { + nc = permuted_row_col_inv[LU->j[pj]]; + + if ( nc >= nr ) + { + LUtemp->j[color_top[nr]] = nc; + LUtemp->val[color_top[nr]] = LU->val[pj]; + ++color_top[nr]; + } + /* correct entries to maintain triangularity (upper) */ + else + { + LUtemp->j[color_top[nc]] = nr; + LUtemp->val[color_top[nc]] = LU->val[pj]; + ++color_top[nc]; + } + } + } + } + + memcpy( LU->start, LUtemp->start, sizeof(unsigned int) * (LU->n + 1) ); + memcpy( LU->j, LUtemp->j, sizeof(unsigned int) * LU->start[LU->n] ); + memcpy( LU->val, LUtemp->val, sizeof(real) * LU->start[LU->n] ); + + Deallocate_Matrix( LUtemp ); +} + + +/* Setup routines to build permuted QEq matrix H (via graph coloring), + * used for preconditioning (incomplete factorizations computed based on + * permuted H) + * + * H: symmetric, lower triangular portion only, stored in CSR format; + * H is permuted in-place + */ +sparse_matrix * setup_graph_coloring( sparse_matrix * const H ) +{ + if ( color == NULL ) + { + /* internal storage for graph coloring (global to facilitate simultaneous access to OpenMP threads) */ + if ( (color = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || + (to_color =(unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || + (conflict = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || + (recolor = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || + (color_top = (unsigned int*) malloc(sizeof(unsigned int) * (H->n + 1))) == NULL || + (permuted_row_col = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || + (permuted_row_col_inv = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || + (y_p = (real*) malloc(sizeof(real) * H->n)) == NULL || + (Allocate_Matrix( H_p, H->n, H->m ) == FAILURE ) || + (Allocate_Matrix( H_full, H->n, 2 * H->m - H->n ) == FAILURE ) ) + { + fprintf( stderr, "not enough memory for graph coloring. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + compute_H_full( H ); + + graph_coloring( H_full, LOWER ); + sort_colors( H_full->n, LOWER ); + + memcpy( H_p->start, H->start, sizeof(int) * (H->n + 1) ); + memcpy( H_p->j, H->j, sizeof(int) * (H->start[H->n]) ); + memcpy( H_p->val, H->val, sizeof(real) * (H->start[H->n]) ); + permute_matrix( H_p, LOWER ); + + return H_p; +} + + +/* Jacobi iteration using truncated Neumann series: x_{k+1} = Gx_k + D^{-1}b + * where: + * G = I - D^{-1}R + * R = triangular matrix + * D = diagonal matrix, diagonals from R + * + * Note: used during the backsolves when applying preconditioners with + * triangular factors in iterative linear solvers + * + * Note: Newmann series arises from series expansion of the inverse of + * the coefficient matrix in the triangular system */ +static void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, + const real * const b, real * const x, const TRIANGULARITY tri, const + unsigned int maxiter ) +{ + unsigned int i, k, si = 0, ei = 0, iter; + + iter = 0; + + #pragma omp master + { + if ( Dinv_b == NULL ) + { + if ( (Dinv_b = (real*) malloc(sizeof(real) * R->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + if ( rp == NULL ) + { + if ( (rp = (real*) malloc(sizeof(real) * R->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + if ( rp2 == NULL ) + { + if ( (rp2 = (real*) malloc(sizeof(real) * R->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + } + + #pragma omp barrier + + Vector_MakeZero( rp, R->n ); + + /* precompute and cache, as invariant in loop below */ + #pragma omp for schedule(static) + for ( i = 0; i < R->n; ++i ) + { + Dinv_b[i] = Dinv[i] * b[i]; + } + + do + { + // x_{k+1} = G*x_{k} + Dinv*b; + #pragma omp for schedule(guided) + for ( i = 0; i < R->n; ++i ) + { + if (tri == LOWER) + { + si = R->start[i]; + ei = R->start[i + 1] - 1; + } + else + { + + si = R->start[i] + 1; + ei = R->start[i + 1]; + } + + rp2[i] = 0.; + + for ( k = si; k < ei; ++k ) + { + rp2[i] += R->val[k] * rp[R->j[k]]; + } + + rp2[i] *= -Dinv[i]; + rp2[i] += Dinv_b[i]; + } + + #pragma omp master + { + rp3 = rp; + rp = rp2; + rp2 = rp3; + } + + #pragma omp barrier + + ++iter; + } + while ( iter < maxiter ); + + Vector_Copy( x, rp, R->n ); +} + + +/* Solve triangular system LU*x = y using level scheduling + * + * workspace: data struct containing matrices, lower/upper triangular, stored in CSR + * control: data struct containing parameters + * y: constants in linear system (RHS) + * x: solution + * fresh_pre: parameter indicating if this is a newly computed (fresh) preconditioner + * + * Assumptions: + * Matrices have non-zero diagonals + * Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */ +static void apply_preconditioner( const static_storage * const workspace, + const control_params * const control, const real * const y, + real * const x, const int fresh_pre ) +{ + int i, si; + + switch ( control->pre_app_type ) + { + case NONE_PA: + break; + case TRI_SOLVE_PA: + switch ( control->pre_comp_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve( workspace->L, y, x, LOWER ); + tri_solve( workspace->U, x, x, UPPER ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + break; + case TRI_SOLVE_LEVEL_SCHED_PA: + switch ( control->pre_comp_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve_level_sched( workspace->L, y, x, LOWER, fresh_pre ); + tri_solve_level_sched( workspace->U, x, x, UPPER, fresh_pre ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + break; + case TRI_SOLVE_GC_PA: + switch ( control->pre_comp_type ) + { + case DIAG_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + #pragma omp master + { + memcpy( y_p, y, sizeof(real) * workspace->H->n ); + } - for( i = 0; i < n; ++i ) { - si = A->start[i]; - ei = A->start[i+1]-1; + #pragma omp barrier - for( k = si; k < ei; ++k ) { - j = A->entries[k].j; - H = A->entries[k].val; - b[j] += H * x[i]; - b[i] += H * x[j]; + permute_vector( y_p, workspace->H->n, FALSE, LOWER ); + tri_solve_level_sched( workspace->L, y_p, x, LOWER, fresh_pre ); + tri_solve_level_sched( workspace->U, x, x, UPPER, fresh_pre ); + permute_vector( x, workspace->H->n, TRUE, UPPER ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; } + break; + case JACOBI_ITER_PA: + switch ( control->pre_comp_type ) + { + case DIAG_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + #pragma omp master + { + if ( Dinv_L == NULL ) + { + if ( (Dinv_L = (real*) malloc(sizeof(real) * workspace->L->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + } - // the diagonal entry is the last one in - b[i] += A->entries[k].val * x[i]; - } -} + #pragma omp barrier + /* construct D^{-1}_L */ + if ( fresh_pre == TRUE ) + { + #pragma omp for schedule(static) + for ( i = 0; i < workspace->L->n; ++i ) + { + si = workspace->L->start[i + 1] - 1; + Dinv_L[i] = 1. / workspace->L->val[si]; + } + } -void Forward_Subs( sparse_matrix *L, real *b, real *y ) -{ - int i, pj, j, si, ei; - real val; + jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->pre_app_jacobi_iters ); - for( i = 0; i < L->n; ++i ) { - y[i] = b[i]; - si = L->start[i]; - ei = L->start[i+1]; - for( pj = si; pj < ei-1; ++pj ){ - j = L->entries[pj].j; - val = L->entries[pj].val; - y[i] -= val * y[j]; - } - y[i] /= L->entries[pj].val; - } -} + #pragma omp master + { + if ( Dinv_U == NULL ) + { + if ( (Dinv_U = (real*) malloc(sizeof(real) * workspace->U->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + } + #pragma omp barrier -void Backward_Subs( sparse_matrix *U, real *y, real *x ) -{ - int i, pj, j, si, ei; - real val; + /* construct D^{-1}_U */ + if ( fresh_pre == TRUE ) + { + #pragma omp for schedule(static) + for ( i = 0; i < workspace->U->n; ++i ) + { + si = workspace->U->start[i]; + Dinv_U[i] = 1. / workspace->U->val[si]; + } + } - for( i = U->n-1; i >= 0; --i ) { - x[i] = y[i]; - si = U->start[i]; - ei = U->start[i+1]; - for( pj = si+1; pj < ei; ++pj ){ - j = U->entries[pj].j; - val = U->entries[pj].val; - x[i] -= val * x[j]; + jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->pre_app_jacobi_iters ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; } - x[i] /= U->entries[si].val; + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + return; } -int GMRES( static_storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, FILE *fout, reax_system* system) +/* generalized minimual residual iterative solver for sparse linear systems */ +int GMRES( const static_storage * const workspace, const control_params * const control, + simulation_data * const data, const sparse_matrix * const H, + const real * const b, const real tol, real * const x, + const FILE * const fout, const int fresh_pre ) { - int i, j, k, itr, N; - real cc, tmp1, tmp2, temp, bnorm; + int i, j, k, itr, N, g_j, g_itr; + real cc, tmp1, tmp2, temp, ret_temp, bnorm, time_start; N = H->n; - bnorm = Norm( b, N ); - - /* apply the diagonal pre-conditioner to rhs */ - for( i = 0; i < N; ++i ) - workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; - - /* GMRES outer-loop */ - for( itr = 0; itr < MAX_ITR; ++itr ) { - /* calculate r0 */ - Sparse_MatVec( H, x, workspace->b_prm ); - for( i = 0; i < N; ++i ) - workspace->b_prm[i] *= workspace->Hdia_inv[i]; /* pre-conditioner */ + #pragma omp parallel default(none) private(i, j, k, itr, bnorm, ret_temp) \ + shared(N, cc, tmp1, tmp2, temp, time_start, g_itr, g_j, stderr) + { + #pragma omp master + { + time_start = Get_Time( ); + } + bnorm = Norm( b, N ); + #pragma omp master + { + data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + } + if ( control->pre_comp_type == DIAG_PC ) + { + /* apply preconditioner to RHS */ + #pragma omp master + { + time_start = Get_Time( ); + } + apply_preconditioner( workspace, control, b, workspace->b_prc, fresh_pre ); + #pragma omp master + { + data->timing.pre_app += Get_Timing_Info( time_start ); + } + } - Vector_Sum(&workspace->v[ index_wkspace_sys (0,0,system->N) ], 1.,workspace->b_prc, -1., workspace->b_prm, N); - workspace->g[0] = Norm( &workspace->v[index_wkspace_sys (0,0,system->N)], N ); - Vector_Scale( &workspace->v[ index_wkspace_sys (0,0,system->N) ], 1.0/workspace->g[0], &workspace->v[index_wkspace_sys(0,0,system->N)], N ); + /* GMRES outer-loop */ + for ( itr = 0; itr < MAX_ITR; ++itr ) + { + /* calculate r0 */ + #pragma omp master + { + time_start = Get_Time( ); + } + Sparse_MatVec( H, x, workspace->b_prm ); + #pragma omp master + { + data->timing.solver_spmv += Get_Timing_Info( time_start ); + } - /* GMRES inner-loop */ - for( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) { - /* matvec */ - Sparse_MatVec( H, &workspace->v[index_wkspace_sys(j,0,system->N)], &workspace->v[index_wkspace_sys(j+1,0,system->N)] ); + if ( control->pre_comp_type == DIAG_PC ) + { + #pragma omp master + { + time_start = Get_Time( ); + } + apply_preconditioner( workspace, control, workspace->b_prm, workspace->b_prm, FALSE ); + #pragma omp master + { + data->timing.pre_app += Get_Timing_Info( time_start ); + } + } - for( k = 0; k < N; ++k ) - workspace->v[ index_wkspace_sys (j+1,k,system->N)] *= workspace->Hdia_inv[k]; /*pre-conditioner*/ + if ( control->pre_comp_type == DIAG_PC ) + { + #pragma omp master + { + time_start = Get_Time( ); + } + Vector_Sum( workspace->v, 1., workspace->b_prc, -1., workspace->b_prm, N ); + #pragma omp master + { + data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + } + } + else + { + #pragma omp master + { + time_start = Get_Time( ); + } + Vector_Sum( workspace->v, 1., b, -1., workspace->b_prm, N ); + #pragma omp master + { + data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + } + } - /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for( i = 0; i <= j; i++ ) { - workspace->h[ index_wkspace_res (i,j) ] = Dot( &workspace->v[index_wkspace_sys(i,0,system->N)], &workspace->v[index_wkspace_sys(j+1,0,system->N)], N ); - Vector_Add( &workspace->v[index_wkspace_sys(j+1,0,system->N)], - -workspace->h[index_wkspace_res (i,j) ], &workspace->v[index_wkspace_sys(i,0,system->N)], N ); + if ( control->pre_comp_type != DIAG_PC ) + { + #pragma omp master + { + time_start = Get_Time( ); + } + apply_preconditioner( workspace, control, workspace->v, workspace->v, + itr == 0 ? fresh_pre : FALSE ); + #pragma omp master + { + data->timing.pre_app += Get_Timing_Info( time_start ); + } } + #pragma omp master + { + time_start = Get_Time( ); + } + ret_temp = Norm( workspace->v, N ); + #pragma omp single + { + workspace->g[0] = ret_temp; + } + Vector_Scale( workspace->v, 1. / workspace->g[0], workspace->v, N ); + #pragma omp master + { + data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + } - workspace->h[ index_wkspace_res (j+1,j) ] = Norm( &workspace->v[index_wkspace_sys(j+1,0,system->N)], N ); - Vector_Scale( &workspace->v[index_wkspace_sys(j+1,0,system->N)], - 1. / workspace->h[ index_wkspace_res (j+1,j) ], &workspace->v[index_wkspace_sys(j+1,0,system->N)], N ); - // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); + /* GMRES inner-loop */ + for ( j = 0; j < RESTART && FABS(workspace->g[j]) / bnorm > tol; j++ ) + { + /* matvec */ + #pragma omp master + { + time_start = Get_Time( ); + } + Sparse_MatVec( H, workspace->v + j * N, workspace->v + (j + 1) * N ); + #pragma omp master + { + data->timing.solver_spmv += Get_Timing_Info( time_start ); + } + #pragma omp master + { + time_start = Get_Time( ); + } + apply_preconditioner( workspace, control, + workspace->v + (j + 1) * N, workspace->v + (j + 1) * N, FALSE ); + #pragma omp master + { + data->timing.pre_app += Get_Timing_Info( time_start ); + } - /* Givens rotations on the upper-Hessenberg matrix to make it U */ - for( i = 0; i <= j; i++ ) { - if( i == j ) { - cc = SQRT( SQR(workspace->h[ index_wkspace_res (j,j) ])+SQR(workspace->h[ index_wkspace_res (j+1,j) ]) ); - workspace->hc[j] = workspace->h[ index_wkspace_res (j,j) ] / cc; - workspace->hs[j] = workspace->h[ index_wkspace_res (j+1,j) ] / cc; + if ( control->pre_comp_type == DIAG_PC ) + { + /* apply modified Gram-Schmidt to orthogonalize the new residual */ + #pragma omp master + { + time_start = Get_Time( ); + } + for ( i = 0; i <= j; i++ ) + { + workspace->h[(RESTART + 1) * i + j] = + Dot( workspace->v + i * N, workspace->v + (j + 1) * N, N ); + Vector_Add( workspace->v + (j + 1) * N, -workspace->h[(RESTART + 1) * i + j], + workspace->v + i * N, N ); + } + #pragma omp master + { + data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + } + } + else + { + //TODO: investigate correctness of not explicitly orthogonalizing first few vectors + /* apply modified Gram-Schmidt to orthogonalize the new residual */ + #pragma omp master + { + time_start = Get_Time( ); + for ( i = 0; i < j - 1; i++ ) + { + workspace->h[(RESTART + 1) * i + j] = 0; + } + } + + for ( i = MAX(j - 1, 0); i <= j; i++ ) + { + ret_temp = Dot( workspace->v + i * N, workspace->v + (j + 1) * N, N ); + #pragma omp single + { + workspace->h[(RESTART + 1) * i + j] = ret_temp; + } + Vector_Add( workspace->v + (j + 1) * N, + -workspace->h[(RESTART + 1) * i + j], workspace->v + i * N, N ); + } + #pragma omp master + { + data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + } } - tmp1 = workspace->hc[i] * workspace->h[ index_wkspace_res (i,j) ] + - workspace->hs[i] * workspace->h[ index_wkspace_res (i+1,j) ]; - tmp2 = -workspace->hs[i] * workspace->h[ index_wkspace_res (i,j) ] + - workspace->hc[i] * workspace->h[ index_wkspace_res (i+1,j) ]; + #pragma omp master + { + time_start = Get_Time( ); + } + ret_temp = Norm( workspace->v + (j + 1) * N, N ); + #pragma omp single + { + workspace->h[(RESTART + 1) * (j + 1) + j] = ret_temp; + } + Vector_Scale( workspace->v + (j + 1) * N, + 1. / workspace->h[(RESTART + 1) * (j + 1) + j], + workspace->v + (j + 1) * N, N ); + #pragma omp master + { + data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + } +#if defined(DEBUG) + fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); +#endif - workspace->h[ index_wkspace_res (i,j) ] = tmp1; - workspace->h[ index_wkspace_res (i+1,j) ] = tmp2; - } + #pragma omp master + { + time_start = Get_Time( ); + if ( control->pre_comp_type == DIAG_PC ) + { + /* Givens rotations on the upper-Hessenberg matrix to make it U */ + for ( i = 0; i <= j; i++ ) + { + if ( i == j ) + { + cc = SQRT( SQR(workspace->h[(RESTART + 1) * j + j]) + + SQR(workspace->h[(RESTART + 1) * (j + 1) + j]) ); + workspace->hc[j] = workspace->h[(RESTART + 1) * j + j] / cc; + workspace->hs[j] = workspace->h[(RESTART + 1) * (j + 1) + j] / cc; + } + + tmp1 = workspace->hc[i] * workspace->h[(RESTART + 1) * i + j] + + workspace->hs[i] * workspace->h[(RESTART + 1) * (i + 1) + j]; + tmp2 = -workspace->hs[i] * workspace->h[(RESTART + 1) * i + j] + + workspace->hc[i] * workspace->h[(RESTART + 1) * (i + 1) + j]; + + workspace->h[(RESTART + 1) * i + j] = tmp1; + workspace->h[(RESTART + 1) * (i + 1) + j] = tmp2; + } + } + else + { + //TODO: investigate correctness of not explicitly orthogonalizing first few vectors + /* Givens rotations on the upper-Hessenberg matrix to make it U */ + for ( i = MAX(j - 1, 0); i <= j; i++ ) + { + if ( i == j ) + { + cc = SQRT( SQR(workspace->h[(RESTART + 1) * j + j]) + + SQR(workspace->h[(RESTART + 1) * (j + 1) + j]) ); + workspace->hc[j] = workspace->h[(RESTART + 1) * j + j] / cc; + workspace->hs[j] = workspace->h[(RESTART + 1) * (j + 1) + j] / cc; + } + + tmp1 = workspace->hc[i] * workspace->h[(RESTART + 1) * i + j] + + workspace->hs[i] * workspace->h[(RESTART + 1) * (i + 1) + j]; + tmp2 = -workspace->hs[i] * workspace->h[(RESTART + 1) * i + j] + + workspace->hc[i] * workspace->h[(RESTART + 1) * (i + 1) + j]; + + workspace->h[(RESTART + 1) * i + j] = tmp1; + workspace->h[(RESTART + 1) * (i + 1) + j] = tmp2; + } + } + + /* apply Givens rotations to the rhs as well */ + tmp1 = workspace->hc[j] * workspace->g[j]; + tmp2 = -workspace->hs[j] * workspace->g[j]; + workspace->g[j] = tmp1; + workspace->g[j + 1] = tmp2; + data->timing.solver_orthog += Get_Timing_Info( time_start ); + } - /* apply Givens rotations to the rhs as well */ - tmp1 = workspace->hc[j] * workspace->g[j]; - tmp2 = -workspace->hs[j] * workspace->g[j]; - workspace->g[j] = tmp1; - workspace->g[j+1] = tmp2; + #pragma omp barrier - // fprintf( stderr, "h: " ); - // for( i = 0; i <= j+1; ++i ) - // fprintf( stderr, "%.6f ", workspace->h[i][j] ); - // fprintf( stderr, "\n" ); - //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] ); - } + //fprintf( stderr, "h: " ); + //for( i = 0; i <= j+1; ++i ) + //fprintf( stderr, "%.6f ", workspace->h[i][j] ); + //fprintf( stderr, "\n" ); + //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] ); + } + /* solve Hy = g: H is now upper-triangular, do back-substitution */ + #pragma omp master + { + time_start = Get_Time( ); + for ( i = j - 1; i >= 0; i-- ) + { + temp = workspace->g[i]; + for ( k = j - 1; k > i; k-- ) + { + temp -= workspace->h[(RESTART + 1) * i + k] * workspace->y[k]; + } - /* solve Hy = g. - H is now upper-triangular, do back-substitution */ - for( i = j-1; i >= 0; i-- ) { - temp = workspace->g[i]; - for( k = j-1; k > i; k-- ) - temp -= workspace->h[ index_wkspace_res (i,k) ] * workspace->y[k]; + workspace->y[i] = temp / workspace->h[(RESTART + 1) * i + i]; + } + data->timing.solver_tri_solve += Get_Timing_Info( time_start ); - workspace->y[i] = temp / workspace->h[ index_wkspace_res (i,i) ]; - } + /* update x = x_0 + Vy */ + time_start = Get_Time( ); + } + Vector_MakeZero( workspace->p, N ); + for ( i = 0; i < j; i++ ) + { + Vector_Add( workspace->p, workspace->y[i], workspace->v + i * N, N ); + } + Vector_Add( x, 1., workspace->p, N ); + #pragma omp master + { + data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + } - /* update x = x_0 + Vy */ - for( i = 0; i < j; i++ ) - Vector_Add( x, workspace->y[i], &workspace->v[index_wkspace_sys(i,0,system->N)], N ); + /* stopping condition */ + if ( FABS(workspace->g[j]) / bnorm <= tol ) + { + break; + } + } - /* stopping condition */ - if( fabs(workspace->g[j]) / bnorm <= tol ) - break; + #pragma omp master + { + g_itr = itr; + g_j = j; + } } // Sparse_MatVec( H, x, workspace->b_prm ); // for( i = 0; i < N; ++i ) - // workspace->b_prm[i] *= workspace->Hdia_inv[i]; + // workspace->b_prm[i] *= workspace->Hdia_inv[i]; // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", + // fprintf( fout, "%10.5f%15.12f%15.12f\n", // workspace->b_prc[i], workspace->b_prm[i], x[i] );*/ - // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n", + // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n", // itr, j, fabs( workspace->g[j] ) / bnorm ); - // data->timing.matvec += itr * RESTART + j; + // data->timing.solver_iters += itr * RESTART + j; - if( itr >= MAX_ITR ) { + if ( g_itr >= MAX_ITR ) + { fprintf( stderr, "GMRES convergence failed\n" ); // return -1; - return itr * (RESTART+1) + j + 1; + return g_itr * (RESTART + 1) + g_j + 1; } - return itr * (RESTART+1) + j + 1; + return g_itr * (RESTART + 1) + g_j + 1; } -int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, FILE *fout, reax_system *system) +int GMRES_HouseHolder( const static_storage * const workspace, const control_params * const control, + simulation_data * const data, const sparse_matrix * const H, + const real * const b, real tol, real * const x, + const FILE * const fout, const int fresh_pre ) { int i, j, k, itr, N; real cc, tmp1, tmp2, temp, bnorm; - real v[10000], z[RESTART+2][10000], w[RESTART+2]; - real u[RESTART+2][10000]; + real v[10000], z[RESTART + 2][10000], w[RESTART + 2]; + real u[RESTART + 2][10000]; N = H->n; bnorm = Norm( b, N ); /* apply the diagonal pre-conditioner to rhs */ - for( i = 0; i < N; ++i ) - workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; + for ( i = 0; i < N; ++i ) + { + workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; + } // memset( x, 0, sizeof(real) * N ); /* GMRES outer-loop */ - for( itr = 0; itr < MAX_ITR; ++itr ) { + for ( itr = 0; itr < MAX_ITR; ++itr ) + { /* compute z = r0 */ - Sparse_MatVec( H, x, workspace->b_prm ); - for( i = 0; i < N; ++i ) + Sparse_MatVec( H, x, workspace->b_prm ); + for ( i = 0; i < N; ++i ) + { workspace->b_prm[i] *= workspace->Hdia_inv[i]; /* pre-conditioner */ + } Vector_Sum( z[0], 1., workspace->b_prc, -1., workspace->b_prm, N ); - Vector_MakeZero( w, RESTART+1 ); + Vector_MakeZero( w, RESTART + 1 ); w[0] = Norm( z[0], N ); Vector_Copy( u[0], z[0], N ); u[0][0] += ( u[0][0] < 0.0 ? -1 : 1 ) * w[0]; Vector_Scale( u[0], 1 / Norm( u[0], N ), u[0], N ); - w[0] *= ( u[0][0] < 0.0 ? 1 :-1 ); + w[0] *= ( u[0][0] < 0.0 ? 1 : -1 ); // fprintf( stderr, "\n\n%12.6f\n", w[0] ); /* GMRES inner-loop */ - for( j = 0; j < RESTART && fabs( w[j] ) / bnorm > tol; j++ ) { + for ( j = 0; j < RESTART && fabs( w[j] ) / bnorm > tol; j++ ) + { /* compute v_j */ Vector_Scale( z[j], -2 * u[j][j], u[j], N ); z[j][j] += 1.; /* due to e_j */ - for( i = j-1; i >= 0; --i ) - Vector_Add( z[j]+i, -2 * Dot( u[i]+i, z[j]+i, N-i ), u[i]+i, N-i ); - + for ( i = j - 1; i >= 0; --i ) + { + Vector_Add( z[j] + i, -2 * Dot( u[i] + i, z[j] + i, N - i ), u[i] + i, N - i ); + } /* matvec */ Sparse_MatVec( H, z[j], v ); - for( k = 0; k < N; ++k ) + for ( k = 0; k < N; ++k ) + { v[k] *= workspace->Hdia_inv[k]; /* pre-conditioner */ + } - for( i = 0; i <= j; ++i ) - Vector_Add( v+i, -2 * Dot( u[i]+i, v+i, N-i ), u[i]+i, N-i ); - + for ( i = 0; i <= j; ++i ) + { + Vector_Add( v + i, -2 * Dot( u[i] + i, v + i, N - i ), u[i] + i, N - i ); + } - if( !Vector_isZero( v + (j+1), N - (j+1) ) ) { + if ( !Vector_isZero( v + (j + 1), N - (j + 1) ) ) + { /* compute the HouseHolder unit vector u_j+1 */ - for( i = 0; i <= j; ++i ) - u[j+1][i] = 0; + for ( i = 0; i <= j; ++i ) + { + u[j + 1][i] = 0; + } - Vector_Copy( u[j+1] + (j+1), v + (j+1), N - (j+1) ); + Vector_Copy( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ); - u[j+1][j+1] += ( v[j+1]<0.0 ? -1:1 ) * Norm( v+(j+1), N-(j+1) ); + u[j + 1][j + 1] += ( v[j + 1] < 0.0 ? -1 : 1 ) * Norm( v + (j + 1), N - (j + 1) ); - Vector_Scale( u[j+1], 1 / Norm( u[j+1], N ), u[j+1], N ); + Vector_Scale( u[j + 1], 1 / Norm( u[j + 1], N ), u[j + 1], N ); /* overwrite v with P_m+1 * v */ - v[j+1] -= 2 * Dot( u[j+1]+(j+1), v+(j+1), N-(j+1) ) * u[j+1][j+1]; - Vector_MakeZero( v + (j+2), N - (j+2) ); + v[j + 1] -= 2 * Dot( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ) * u[j + 1][j + 1]; + Vector_MakeZero( v + (j + 2), N - (j + 2) ); // Vector_Add( v, -2 * Dot( u[j+1], v, N ), u[j+1], N ); } /* prev Givens rots on the upper-Hessenberg matrix to make it U */ - for( i = 0; i < j; i++ ) { - tmp1 = workspace->hc[i] * v[i] + workspace->hs[i] * v[i+1]; - tmp2 = -workspace->hs[i] * v[i] + workspace->hc[i] * v[i+1]; + for ( i = 0; i < j; i++ ) + { + tmp1 = workspace->hc[i] * v[i] + workspace->hs[i] * v[i + 1]; + tmp2 = -workspace->hs[i] * v[i] + workspace->hc[i] * v[i + 1]; v[i] = tmp1; - v[i+1] = tmp2; + v[i + 1] = tmp2; } /* apply the new Givens rotation to H and right-hand side */ - if( fabs(v[j+1]) >= ALMOST_ZERO ) { - cc = SQRT( SQR( v[j] ) + SQR( v[j+1] ) ); + if ( fabs(v[j + 1]) >= ALMOST_ZERO ) + { + cc = SQRT( SQR( v[j] ) + SQR( v[j + 1] ) ); workspace->hc[j] = v[j] / cc; - workspace->hs[j] = v[j+1] / cc; + workspace->hs[j] = v[j + 1] / cc; - tmp1 = workspace->hc[j] * v[j] + workspace->hs[j] * v[j+1]; - tmp2 = -workspace->hs[j] * v[j] + workspace->hc[j] * v[j+1]; + tmp1 = workspace->hc[j] * v[j] + workspace->hs[j] * v[j + 1]; + tmp2 = -workspace->hs[j] * v[j] + workspace->hc[j] * v[j + 1]; v[j] = tmp1; - v[j+1] = tmp2; + v[j + 1] = tmp2; /* Givens rotations to rhs */ tmp1 = workspace->hc[j] * w[j]; tmp2 = -workspace->hs[j] * w[j]; w[j] = tmp1; - w[j+1] = tmp2; + w[j + 1] = tmp2; } /* extend R */ - for( i = 0; i <= j; ++i ) - workspace->h[ index_wkspace_res (i,j) ] = v[i]; + for ( i = 0; i <= j; ++i ) + { + workspace->h[(RESTART + 1) * i + j] = v[i]; + } // fprintf( stderr, "h:" ); @@ -326,12 +1661,15 @@ int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H, /* solve Hy = w. H is now upper-triangular, do back-substitution */ - for( i = j-1; i >= 0; i-- ) { - temp = w[i]; - for( k = j-1; k > i; k-- ) - temp -= workspace->h[ index_wkspace_res (i,k) ] * workspace->y[k]; + for ( i = j - 1; i >= 0; i-- ) + { + temp = w[i]; + for ( k = j - 1; k > i; k-- ) + { + temp -= workspace->h[(RESTART + 1) * i + k] * workspace->y[k]; + } - workspace->y[i] = temp / workspace->h[ index_wkspace_res (i,i) ]; + workspace->y[i] = temp / workspace->h[(RESTART + 1) * i + i]; } // fprintf( stderr, "y: " ); @@ -345,9 +1683,9 @@ int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H, // { // Vector_Copy( v, z, N ); // v[i] += workspace->y[i]; - // + // // Vector_Sum( z, 1., v, -2 * Dot( u[i], v, N ), u[i], N ); - // } + // } // // fprintf( stderr, "\nz: " ); // for( k = 0; k < N; ++k ) @@ -358,16 +1696,20 @@ int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H, // fprintf( stderr, "%6.2f ", x[i] ); // Vector_Add( x, 1, z, N ); - for( i = j-1; i >= 0; i-- ) + for ( i = j - 1; i >= 0; i-- ) + { Vector_Add( x, workspace->y[i], z[i], N ); + } // fprintf( stderr, "\nx_aft: " ); // for( i = 0; i < N; ++i ) // fprintf( stderr, "%6.2f ", x[i] ); /* stopping condition */ - if( fabs( w[j] ) / bnorm <= tol ) + if ( fabs( w[j] ) / bnorm <= tol ) + { break; + } } // Sparse_MatVec( H, x, workspace->b_prm ); @@ -376,152 +1718,26 @@ int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H, // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", + // fprintf( fout, "%10.5f%15.12f%15.12f\n", // workspace->b_prc[i], workspace->b_prm[i], x[i] ); - //fprintf( fout,"GMRES outer:%d, inner:%d iters - residual norm: %15.10f\n", + //fprintf( fout,"GMRES outer:%d, inner:%d iters - residual norm: %15.10f\n", // itr, j, fabs( workspace->g[j] ) / bnorm ); - if( itr >= MAX_ITR ) { - fprintf( stderr, "GMRES convergence failed\n" ); - // return -1; - return itr * (RESTART+1) + j + 1; - } - - return itr * (RESTART+1) + j + 1; -} - - -int PGMRES( static_storage *workspace, sparse_matrix *H, real *b, real tol, - sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout, reax_system *system ) -{ - int i, j, k, itr, N; - real cc, tmp1, tmp2, temp, bnorm; - - N = H->n; - bnorm = Norm( b, N ); - - /* GMRES outer-loop */ - for( itr = 0; itr < MAX_ITR; ++itr ) + if ( itr >= MAX_ITR ) { - /* calculate r0 */ - Sparse_MatVec( H, x, workspace->b_prm ); - Vector_Sum( &workspace->v[index_wkspace_sys(0,0,system->N)], 1., b, -1., workspace->b_prm, N ); - Forward_Subs( L, &workspace->v[index_wkspace_sys(0,0,system->N)], &workspace->v[index_wkspace_sys(0,0,system->N)] ); - Backward_Subs( U, &workspace->v[index_wkspace_sys(0,0,system->N)], &workspace->v[index_wkspace_sys(0,0,system->N)] ); - workspace->g[0] = Norm( &workspace->v[index_wkspace_sys(0,0,system->N)], N ); - Vector_Scale( &workspace->v[index_wkspace_sys(0,0,system->N)], 1. / workspace->g[0], &workspace->v[index_wkspace_sys (0,0,system->N)], N ); - //fprintf( stderr, "res: %.15e\n", workspace->g[0] ); - - /* GMRES inner-loop */ - for( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) - { - /* matvec */ - Sparse_MatVec( H, &workspace->v[index_wkspace_sys (j,0,system->N)], &workspace->v[index_wkspace_sys (j+1,0,system->N)] ); - Forward_Subs( L, &workspace->v[index_wkspace_sys(j+1,0,system->N)], &workspace->v[index_wkspace_sys(j+1,0,system->N)] ); - Backward_Subs( U, &workspace->v[index_wkspace_sys(j+1,0,system->N)], &workspace->v[index_wkspace_sys(j+1,0,system->N)] ); - - /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for( i = 0; i < j-1; i++ ) - { - workspace->h[ index_wkspace_res (i,j)] = 0; - } - - //for( i = 0; i <= j; i++ ) { - for( i = MAX(j-1,0); i <= j; i++ ) { - workspace->h[index_wkspace_res (i,j)] = Dot( &workspace->v[index_wkspace_sys (i,0,system->N)], &workspace->v[index_wkspace_sys(j+1,0,system->N)], N ); - Vector_Add( &workspace->v[index_wkspace_sys(j+1,0,system->N)],-workspace->h[ index_wkspace_res (i,j) ], &workspace->v[index_wkspace_sys(i,0,system->N)], N ); - } - - workspace->h[index_wkspace_res (j+1,j) ] = Norm( &workspace->v[index_wkspace_sys (j+1,0,system->N)], N ); - Vector_Scale( &workspace->v[index_wkspace_sys(j+1,0,system->N)], - 1. / workspace->h[ index_wkspace_res (j+1,j)], &workspace->v[index_wkspace_sys(j+1,0,system->N)], N ); - // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); - - /* Givens rotations on the upper-Hessenberg matrix to make it U */ - for( i = MAX(j-1,0); i <= j; i++ ) - { - if( i == j ) - { - cc = SQRT( SQR(workspace->h[ index_wkspace_res (j,j) ])+SQR(workspace->h[ index_wkspace_res (j+1,j) ]) ); - workspace->hc[j] = workspace->h[ index_wkspace_res (j,j) ] / cc; - workspace->hs[j] = workspace->h[ index_wkspace_res (j+1,j) ] / cc; - } - - tmp1 = workspace->hc[i] * workspace->h[ index_wkspace_res (i,j) ] + - workspace->hs[i] * workspace->h[index_wkspace_res (i+1,j) ]; - tmp2 = -workspace->hs[i] * workspace->h[index_wkspace_res (i,j)] + - workspace->hc[i] * workspace->h[index_wkspace_res (i+1,j) ]; - - workspace->h[ index_wkspace_res (i,j) ] = tmp1; - workspace->h[ index_wkspace_res (i+1,j) ] = tmp2; - } - - /* apply Givens rotations to the rhs as well */ - tmp1 = workspace->hc[j] * workspace->g[j]; - tmp2 = -workspace->hs[j] * workspace->g[j]; - workspace->g[j] = tmp1; - workspace->g[j+1] = tmp2; - - //fprintf( stderr, "h: " ); - //for( i = 0; i <= j+1; ++i ) - //fprintf( stderr, "%.6f ", workspace->h[i][j] ); - //fprintf( stderr, "\n" ); - //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] ); - } - - - /* solve Hy = g: H is now upper-triangular, do back-substitution */ - for( i = j-1; i >= 0; i-- ) - { - temp = workspace->g[i]; - for( k = j-1; k > i; k-- ) - { - temp -= workspace->h[ index_wkspace_res (i,k) ] * workspace->y[k]; - } - - workspace->y[i] = temp / workspace->h[index_wkspace_res (i,i)]; - } - - /* update x = x_0 + Vy */ - Vector_MakeZero( workspace->p, N ); - for( i = 0; i < j; i++ ) - Vector_Add( workspace->p, workspace->y[i], &workspace->v[index_wkspace_sys(i,0,system->N)], N ); - //Backward_Subs( U, workspace->p, workspace->p ); - //Forward_Subs( L, workspace->p, workspace->p ); - Vector_Add( x, 1., workspace->p, N ); - - /* stopping condition */ - if( fabs(workspace->g[j]) / bnorm <= tol ) - { - break; - } - } - - // Sparse_MatVec( H, x, workspace->b_prm ); - // for( i = 0; i < N; ++i ) - // workspace->b_prm[i] *= workspace->Hdia_inv[i]; - // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", - // workspace->b_prc[i], workspace->b_prm[i], x[i] );*/ - - // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n", - // itr, j, fabs( workspace->g[j] ) / bnorm ); - // data->timing.matvec += itr * RESTART + j; - - if( itr >= MAX_ITR ) { fprintf( stderr, "GMRES convergence failed\n" ); // return -1; - return itr * (RESTART+1) + j + 1; + return itr * (RESTART + 1) + j + 1; } - return itr * (RESTART+1) + j + 1; + return itr * (RESTART + 1) + j + 1; } -int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, - sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout, reax_system* system ) +/* Preconditioned Conjugate Gradient */ +int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, + sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout ) { int i, N; real tmp, alpha, beta, b_norm, r_norm; @@ -537,12 +1753,12 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, //Print_Soln( workspace, x, q, b, N ); //fprintf( stderr, "res: %.15e\n", r_norm ); - Forward_Subs( L, workspace->r, workspace->d ); - Backward_Subs( U, workspace->d, workspace->p ); + tri_solve( L, workspace->r, workspace->d, LOWER ); + tri_solve( U, workspace->d, workspace->p, UPPER ); sig_new = Dot( workspace->r, workspace->p, N ); sig0 = sig_new; - for( i = 0; i < 200 && r_norm/b_norm > tol; ++i ) + for ( i = 0; i < 200 && r_norm / b_norm > tol; ++i ) { //for( i = 0; i < 200 && sig_new > SQR(tol) * sig0; ++i ) { Sparse_MatVec( A, workspace->p, workspace->q ); @@ -556,8 +1772,8 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, r_norm = Norm(workspace->r, N); //fprintf( stderr, "res: %.15e\n", r_norm ); - Forward_Subs( L, workspace->r, workspace->d ); - Backward_Subs( U, workspace->d, workspace->d ); + tri_solve( L, workspace->r, workspace->d, LOWER ); + tri_solve( U, workspace->d, workspace->d, UPPER ); sig_old = sig_new; sig_new = Dot( workspace->r, workspace->d, N ); beta = sig_new / sig_old; @@ -565,7 +1781,8 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, } //fprintf( fout, "CG took %d iterations\n", i ); - if( i >= 200 ) { + if ( i >= 200 ) + { fprintf( stderr, "CG convergence failed!\n" ); return i; } @@ -574,8 +1791,9 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, } -int CG( static_storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, FILE *fout, reax_system *system) +/* Conjugate Gradient */ +int CG( static_storage *workspace, sparse_matrix *H, + real *b, real tol, real *x, FILE *fout ) { int i, j, N; real tmp, alpha, beta, b_norm; @@ -587,29 +1805,34 @@ int CG( static_storage *workspace, sparse_matrix *H, Sparse_MatVec( H, x, workspace->q ); Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); - for( j = 0; j < N; ++j ) + for ( j = 0; j < N; ++j ) + { workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } sig_new = Dot( workspace->r, workspace->d, N ); sig0 = sig_new; //Print_Soln( workspace, x, q, b, N ); - //fprintf( stderr, "sig_new: %24.15e, d_norm:%24.15e, q_norm:%24.15e\n", + //fprintf( stderr, "sig_new: %24.15e, d_norm:%24.15e, q_norm:%24.15e\n", // sqrt(sig_new), Norm(workspace->d,N), Norm(workspace->q,N) ); //fprintf( stderr, "sig_new: %f\n", sig_new ); - for( i = 0; i < 300 && SQRT(sig_new) / b_norm > tol; ++i ) { + for ( i = 0; i < 300 && SQRT(sig_new) / b_norm > tol; ++i ) + { //for( i = 0; i < 300 && sig_new > SQR(tol)*sig0; ++i ) { Sparse_MatVec( H, workspace->d, workspace->q ); tmp = Dot( workspace->d, workspace->q, N ); //fprintf( stderr, "tmp: %f\n", tmp ); - alpha = sig_new / tmp; + alpha = sig_new / tmp; Vector_Add( x, alpha, workspace->d, N ); //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n", // Norm(workspace->d,N), Norm(workspace->q,N), tmp ); - Vector_Add( workspace->r, -alpha, workspace->q, N ); - for( j = 0; j < N; ++j ) + Vector_Add( workspace->r, -alpha, workspace->q, N ); + for ( j = 0; j < N; ++j ) + { workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } sig_old = sig_new; sig_new = Dot( workspace->r, workspace->p, N ); @@ -620,7 +1843,8 @@ int CG( static_storage *workspace, sparse_matrix *H, fprintf( stderr, "CG took %d iterations\n", i ); - if( i >= 300 ) { + if ( i >= 300 ) + { fprintf( stderr, "CG convergence failed!\n" ); return i; } @@ -630,8 +1854,8 @@ int CG( static_storage *workspace, sparse_matrix *H, /* Steepest Descent */ -int SDM( static_storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, FILE *fout ) +int SDM( static_storage *workspace, sparse_matrix *H, + real *b, real tol, real *x, FILE *fout ) { int i, j, N; real tmp, alpha, beta, b_norm; @@ -643,23 +1867,28 @@ int SDM( static_storage *workspace, sparse_matrix *H, Sparse_MatVec( H, x, workspace->q ); Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); - for( j = 0; j < N; ++j ) + for ( j = 0; j < N; ++j ) + { workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } sig = Dot( workspace->r, workspace->d, N ); sig0 = sig; - for( i = 0; i < 300 && SQRT(sig) / b_norm > tol; ++i ) { + for ( i = 0; i < 300 && SQRT(sig) / b_norm > tol; ++i ) + { Sparse_MatVec( H, workspace->d, workspace->q ); sig = Dot( workspace->r, workspace->d, N ); tmp = Dot( workspace->d, workspace->q, N ); - alpha = sig / tmp; + alpha = sig / tmp; Vector_Add( x, alpha, workspace->d, N ); Vector_Add( workspace->r, -alpha, workspace->q, N ); - for( j = 0; j < N; ++j ) + for ( j = 0; j < N; ++j ) + { workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n", // Norm(workspace->d,N), Norm(workspace->q,N), tmp ); @@ -667,10 +1896,55 @@ int SDM( static_storage *workspace, sparse_matrix *H, fprintf( stderr, "SDM took %d iterations\n", i ); - if( i >= 300 ) { + if ( i >= 300 ) + { fprintf( stderr, "SDM convergence failed!\n" ); return i; } return i; } + + +/* Estimate the stability of a 2-side preconditioning scheme + * using the factorization A \approx LU. Specifically, estimate the 1-norm of A^{-1} + * using the 1-norm of (LU)^{-1}e, with e = [1 1 ... 1]^T through 2 triangular solves: + * 1) Ly = e + * 2) Ux = y where y = Ux + * That is, we seek to solve e = LUx for unknown x + * + * Reference: Incomplete LU Preconditioning with the Multilevel Fast Multipole Algorithm + * for Electromagnetic Scattering, SIAM J. Sci. Computing, 2007 */ +real condest( const sparse_matrix * const L, const sparse_matrix * const U ) +{ + unsigned int i, N; + real *e, c; + + N = L->n; + + if ( (e = (real*) malloc(sizeof(real) * N)) == NULL ) + { + fprintf( stderr, "Not enough memory for condest. Terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + memset( e, 1., N * sizeof(real) ); + + tri_solve( L, e, e, LOWER ); + tri_solve( U, e, e, UPPER ); + + /* compute 1-norm of vector e */ + c = FABS(e[0]); + for ( i = 1; i < N; ++i) + { + if ( FABS(e[i]) > c ) + { + c = FABS(e[i]); + } + + } + + free( e ); + + return c; +} diff --git a/PuReMD-GPU/src/lin_alg.h b/PuReMD-GPU/src/lin_alg.h index a515a959494a6eca40fe9f338d2a08118ff3e39a..317afbf94cf2a26a4f48be4a0ad9c66bcef42085 100644 --- a/PuReMD-GPU/src/lin_alg.h +++ b/PuReMD-GPU/src/lin_alg.h @@ -21,28 +21,31 @@ #ifndef __LIN_ALG_H_ #define __LIN_ALG_H_ -#define SIGN(x) (x < 0.0 ? -1 : 1); - #include "mytypes.h" -int GMRES( static_storage*, sparse_matrix*, - real*, real, real*, FILE* , reax_system* ); +void Transpose( const sparse_matrix const *, sparse_matrix const * ); +void Transpose_I( sparse_matrix * const ); -int GMRES_HouseHolder( static_storage*, sparse_matrix*, - real*, real, real*, FILE* , reax_system* ); +sparse_matrix * setup_graph_coloring( sparse_matrix * const ); -int PGMRES( static_storage*, sparse_matrix*, real*, real, - sparse_matrix*, sparse_matrix*, real*, FILE*, reax_system* ); +int GMRES( const static_storage * const, const control_params * const, + simulation_data * const, const sparse_matrix * const, + const real * const, const real, real * const, + const FILE * const, const int ); -int PCG( static_storage*, sparse_matrix*, real*, real, - sparse_matrix*, sparse_matrix*, real*, FILE*, reax_system* ); +int GMRES_HouseHolder( const static_storage * const, const control_params * const, + simulation_data * const, const sparse_matrix * const, + const real * const, const real, real * const, + const FILE * const, const int ); int CG( static_storage*, sparse_matrix*, - real*, real, real*, FILE*, reax_system* ); + real*, real, real*, FILE* ); + +int SDM( static_storage*, sparse_matrix*, + real*, real, real*, FILE* ); -int uyduruk_GMRES( static_storage*, sparse_matrix*, - real*, real, real*, int, FILE*, reax_system* ); +real condest( const sparse_matrix * const, const sparse_matrix * const ); #endif diff --git a/PuReMD-GPU/src/list.c b/PuReMD-GPU/src/list.c index c6f0e55ebad4fc59c07f253a1d216d3242115aff..c52a4cc1cf2b2a8c1d32fdda71c8b0aa7808992a 100644 --- a/PuReMD-GPU/src/list.c +++ b/PuReMD-GPU/src/list.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -21,9 +22,9 @@ #include "list.h" -char Make_List(int n, int num_intrs, int type, list* l) +int Make_List( int n, int num_intrs, int type, list* l ) { - char success=1; + int ret = SUCCESS; l->n = n; l->num_intrs = num_intrs; @@ -31,116 +32,170 @@ char Make_List(int n, int num_intrs, int type, list* l) l->index = (int*) malloc( n * sizeof(int) ); l->end_index = (int*) malloc( n * sizeof(int) ); - if (l->index == NULL) success = 0; - if (l->end_index == NULL) success = 0; + if (l->index == NULL) + { + ret = FAILURE; + } + if (l->end_index == NULL) + { + ret = FAILURE; + } l->type = type; - switch(type) + switch (type) { - case TYP_VOID: - l->select.v = (void *) malloc(l->num_intrs*sizeof(void)); - if (l->select.v == NULL) success = 0; - break; - - case TYP_THREE_BODY: - l->select.three_body_list = (three_body_interaction_data*) - malloc(l->num_intrs*sizeof(three_body_interaction_data)); - if (l->select.three_body_list == NULL) success = 0; - break; - - case TYP_BOND: - l->select.bond_list = (bond_data*) + case TYP_VOID: + l->select.v = (void *) malloc(l->num_intrs * sizeof(void)); + if (l->select.v == NULL) + { + ret = FAILURE; + } + break; + + case TYP_THREE_BODY: + l->select.three_body_list = (three_body_interaction_data*) + malloc(l->num_intrs * sizeof(three_body_interaction_data)); + if (l->select.three_body_list == NULL) + { + ret = FAILURE; + } + break; + + case TYP_BOND: + l->select.bond_list = (bond_data*) malloc(l->num_intrs * sizeof(bond_data)); - if (l->select.bond_list == NULL) success = 0; - break; - - case TYP_DBO: - l->select.dbo_list = (dbond_data*) + if (l->select.bond_list == NULL) + { + ret = FAILURE; + } + break; + + case TYP_DBO: + l->select.dbo_list = (dbond_data*) malloc(l->num_intrs * sizeof(dbond_data)); - if (l->select.dbo_list == NULL) success = 0; - break; - - case TYP_DDELTA: - l->select.dDelta_list = (dDelta_data*) - malloc(l->num_intrs*sizeof(dDelta_data)); - if (l->select.dDelta_list == NULL) success = 0; - break; - - case TYP_FAR_NEIGHBOR: - l->select.far_nbr_list = (far_neighbor_data*) - malloc(l->num_intrs*sizeof(far_neighbor_data)); - if (l->select.far_nbr_list == NULL) success = 0; - break; - - case TYP_NEAR_NEIGHBOR: - l->select.near_nbr_list = (near_neighbor_data*) - malloc(l->num_intrs*sizeof(near_neighbor_data)); - if (l->select.near_nbr_list == NULL) success = 0; - break; - - case TYP_HBOND: - l->select.hbond_list = (hbond_data*) + if (l->select.dbo_list == NULL) + { + ret = FAILURE; + } + break; + + case TYP_DDELTA: + l->select.dDelta_list = (dDelta_data*) + malloc(l->num_intrs * sizeof(dDelta_data)); + if (l->select.dDelta_list == NULL) + { + ret = FAILURE; + } + break; + + case TYP_FAR_NEIGHBOR: + l->select.far_nbr_list = (far_neighbor_data*) + malloc(l->num_intrs * sizeof(far_neighbor_data)); + if (l->select.far_nbr_list == NULL) + { + ret = FAILURE; + } + break; + + case TYP_NEAR_NEIGHBOR: + l->select.near_nbr_list = (near_neighbor_data*) + malloc(l->num_intrs * sizeof(near_neighbor_data)); + if (l->select.near_nbr_list == NULL) + { + ret = FAILURE; + } + break; + + case TYP_HBOND: + l->select.hbond_list = (hbond_data*) malloc( l->num_intrs * sizeof(hbond_data) ); - if (l->select.hbond_list == NULL) success = 0; - break; - - default: - l->select.v = (void *) malloc(l->num_intrs*sizeof(void)); - if (l->select.v == NULL) success = 0; - l->type = TYP_VOID; - break; + if (l->select.hbond_list == NULL) + { + ret = FAILURE; + } + break; + + default: + l->select.v = (void *) malloc(l->num_intrs * sizeof(void)); + if (l->select.v == NULL) + { + ret = FAILURE; + } + l->type = TYP_VOID; + break; } - return success; + return ret; } -void Delete_List(list* l) +void Delete_List( list* l ) { - if( l->index != NULL ) + if ( l->index != NULL ) + { free(l->index); - if( l->end_index != NULL ) + } + if ( l->end_index != NULL ) + { free(l->end_index); + } - switch(l->type) + switch (l->type) { - case TYP_VOID: - if( l->select.v != NULL ) - free(l->select.v); - break; - case TYP_THREE_BODY: - if( l->select.three_body_list != NULL ) - free(l->select.three_body_list); - break; - case TYP_BOND: - if( l->select.bond_list != NULL ) - free(l->select.bond_list); - break; - case TYP_DBO: - if( l->select.dbo_list != NULL ) - free(l->select.dbo_list); - break; - case TYP_DDELTA: - if( l->select.dDelta_list != NULL ) - free(l->select.dDelta_list); - break; - case TYP_FAR_NEIGHBOR: - if( l->select.far_nbr_list != NULL ) - free(l->select.far_nbr_list); - break; - case TYP_NEAR_NEIGHBOR: - if( l->select.near_nbr_list != NULL ) - free(l->select.near_nbr_list); - break; - case TYP_HBOND: - if( l->select.hbond_list != NULL ) - free(l->select.hbond_list); - break; - - default: - // Report fatal error - break; + case TYP_VOID: + if ( l->select.v != NULL ) + { + free(l->select.v); + } + break; + case TYP_THREE_BODY: + if ( l->select.three_body_list != NULL ) + { + free(l->select.three_body_list); + } + break; + case TYP_BOND: + if ( l->select.bond_list != NULL ) + { + free(l->select.bond_list); + } + break; + case TYP_DBO: + if ( l->select.dbo_list != NULL ) + { + free(l->select.dbo_list); + } + break; + case TYP_DDELTA: + if ( l->select.dDelta_list != NULL ) + { + free(l->select.dDelta_list); + } + break; + case TYP_FAR_NEIGHBOR: + if ( l->select.far_nbr_list != NULL ) + { + free(l->select.far_nbr_list); + } + break; + case TYP_NEAR_NEIGHBOR: + if ( l->select.near_nbr_list != NULL ) + { + free(l->select.near_nbr_list); + } + break; + case TYP_HBOND: + if ( l->select.hbond_list != NULL ) + { + free(l->select.hbond_list); + } + break; + + default: + fprintf( stderr, "Unrecognized list type. Terminating...\n" ); + exit( UNKNOWN_OPTION ); + break; } -} +} diff --git a/PuReMD-GPU/src/list.h b/PuReMD-GPU/src/list.h index b90c41419271ca6b859be08ea4005fbe9107c029..5ee4544212218488e6fa84477f8a446f66e73544 100644 --- a/PuReMD-GPU/src/list.h +++ b/PuReMD-GPU/src/list.h @@ -24,7 +24,7 @@ #include "mytypes.h" -char Make_List( int, int, int, list* ); +int Make_List( int, int, int, list* ); void Delete_List( list* ); diff --git a/PuReMD-GPU/src/lookup.c b/PuReMD-GPU/src/lookup.c index c439709dc09c77775ed716a39db797fa8c831585..b67bf5b7b96e91562a34ab2af3bbe421a1f5c19c 100644 --- a/PuReMD-GPU/src/lookup.c +++ b/PuReMD-GPU/src/lookup.c @@ -1,28 +1,28 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ #include "lookup.h" -#include "two_body_interactions.h" - #include "index_utils.h" +#include "two_body_interactions.h" void Make_Lookup_Table(real xmin, real xmax, int n, @@ -33,44 +33,48 @@ void Make_Lookup_Table(real xmin, real xmax, int n, t->xmin = xmin; t->xmax = xmax; t->n = n; - t->dx = (xmax - xmin)/(n-1); + t->dx = (xmax - xmin) / (n - 1); t->inv_dx = 1.0 / t->dx; - t->a = (n-1)/(xmax-xmin); - t->y = (real*) malloc(n*sizeof(real)); + t->a = (n - 1) / (xmax - xmin); + t->y = (real*) malloc(n * sizeof(real)); - for(i=0; i < n; i++) - t->y[i] = f(i*t->dx + t->xmin); + for (i = 0; i < n; i++) + t->y[i] = f(i * t->dx + t->xmin); - // //fprintf(stdout,"dx = %lf\n",t->dx); + // fprintf(stdout,"dx = %lf\n",t->dx); // for(i=0; i < n; i++) - // //fprintf( stdout,"%d %lf %lf %lf\n", + // fprintf( stdout,"%d %lf %lf %lf\n", // i, i/t->a+t->xmin, t->y[i], exp(i/t->a+t->xmin) ); } /* Fills solution into x. Warning: will modify c and d! */ void Tridiagonal_Solve( const real *a, const real *b, - real *c, real *d, real *x, unsigned int n){ + real *c, real *d, real *x, unsigned int n) +{ int i; real id; /* Modify the coefficients. */ - c[0] /= b[0]; /* Division by zero risk. */ - d[0] /= b[0]; /* Division by zero would imply a singular matrix. */ - for(i = 1; i < n; i++){ - id = (b[i] - c[i-1] * a[i]); /* Division by zero risk. */ - c[i] /= id; /* Last value calculated is redundant. */ - d[i] = (d[i] - d[i-1] * a[i])/id; + c[0] /= b[0]; /* Division by zero risk. */ + d[0] /= b[0]; /* Division by zero would imply a singular matrix. */ + for (i = 1; i < n; i++) + { + id = (b[i] - c[i - 1] * a[i]); /* Division by zero risk. */ + c[i] /= id; /* Last value calculated is redundant. */ + d[i] = (d[i] - d[i - 1] * a[i]) / id; } /* Now back substitute. */ x[n - 1] = d[n - 1]; - for(i = n - 2; i >= 0; i--) + for (i = n - 2; i >= 0; i--) + { x[i] = d[i] - c[i] * x[i + 1]; + } } -void Natural_Cubic_Spline( const real *h, const real *f, +void Natural_Cubic_Spline( const real *h, const real *f, cubic_spline_coef *coef, unsigned int n ) { int i; @@ -84,43 +88,53 @@ void Natural_Cubic_Spline( const real *h, const real *f, v = (real*) malloc( n * sizeof(real) ); /* build the linear system */ - a[0] = a[1] = a[n-1] = 0; - for( i = 2; i < n-1; ++i ) - a[i] = h[i-1]; + a[0] = a[1] = a[n - 1] = 0; + for ( i = 2; i < n - 1; ++i ) + { + a[i] = h[i - 1]; + } - b[0] = b[n-1] = 0; - for( i = 1; i < n-1; ++i ) - b[i] = 2 * (h[i-1] + h[i]); + b[0] = b[n - 1] = 0; + for ( i = 1; i < n - 1; ++i ) + { + b[i] = 2 * (h[i - 1] + h[i]); + } - c[0] = c[n-2] = c[n-1] = 0; - for( i = 1; i < n-2; ++i ) + c[0] = c[n - 2] = c[n - 1] = 0; + for ( i = 1; i < n - 2; ++i ) + { c[i] = h[i]; + } - d[0] = d[n-1] = 0; - for( i = 1; i < n-1; ++i ) - d[i] = 6 * ((f[i+1]-f[i])/h[i] - (f[i]-f[i-1])/h[i-1]); + d[0] = d[n - 1] = 0; + for ( i = 1; i < n - 1; ++i ) + { + d[i] = 6 * ((f[i + 1] - f[i]) / h[i] - (f[i] - f[i - 1]) / h[i - 1]); + } - /*//fprintf( stderr, "i a b c d\n" ); + /*fprintf( stderr, "i a b c d\n" ); for( i = 0; i < n; ++i ) - //fprintf( stderr, "%d %f %f %f %f\n", i, a[i], b[i], c[i], d[i] );*/ + fprintf( stderr, "%d %f %f %f %f\n", i, a[i], b[i], c[i], d[i] );*/ v[0] = 0; - v[n-1] = 0; - Tridiagonal_Solve( &(a[1]), &(b[1]), &(c[1]), &(d[1]), &(v[1]), n-2 ); - - for( i = 1; i < n; ++i ){ - coef[i-1].d = (v[i] - v[i-1]) / (6*h[i-1]); - coef[i-1].c = v[i]/2; - coef[i-1].b = (f[i]-f[i-1])/h[i-1] + h[i-1]*(2*v[i] + v[i-1])/6; - coef[i-1].a = f[i]; + v[n - 1] = 0; + Tridiagonal_Solve( &(a[1]), &(b[1]), &(c[1]), &(d[1]), &(v[1]), n - 2 ); + + for ( i = 1; i < n; ++i ) + { + coef[i - 1].d = (v[i] - v[i - 1]) / (6 * h[i - 1]); + coef[i - 1].c = v[i] / 2; + coef[i - 1].b = (f[i] - f[i - 1]) / h[i - 1] + h[i - 1] * (2 * v[i] + v[i - 1]) / 6; + coef[i - 1].a = f[i]; } - /*//fprintf( stderr, "i v coef\n" ); + /*fprintf( stderr, "i v coef\n" ); for( i = 0; i < n; ++i ) - //fprintf( stderr, "%d %f %f %f %f %f\n", - i, v[i], coef[i].a, coef[i].b, coef[i].c, coef[i].d ); */ + fprintf( stderr, "%d %f %f %f %f %f\n", + i, v[i], coef[i].a, coef[i].b, coef[i].c, coef[i].d ); */ } + void Complete_Cubic_Spline( const real *h, const real *f, real v0, real vlast, cubic_spline_coef *coef, unsigned int n ) { @@ -136,39 +150,48 @@ void Complete_Cubic_Spline( const real *h, const real *f, real v0, real vlast, /* build the linear system */ a[0] = 0; - for( i = 1; i < n; ++i ) - a[i] = h[i-1]; + for ( i = 1; i < n; ++i ) + { + a[i] = h[i - 1]; + } - b[0] = 2*h[0]; - for( i = 1; i < n; ++i ) - b[i] = 2 * (h[i-1] + h[i]); + b[0] = 2 * h[0]; + for ( i = 1; i < n; ++i ) + { + b[i] = 2 * (h[i - 1] + h[i]); + } - c[n-1] = 0; - for( i = 0; i < n-1; ++i ) + c[n - 1] = 0; + for ( i = 0; i < n - 1; ++i ) + { c[i] = h[i]; + } - d[0] = 6 * (f[1]-f[0])/h[0] - 6 * v0; - d[n-1] = 6 * vlast - 6 * (f[n-1]-f[n-2]/h[n-2]); - for( i = 1; i < n-1; ++i ) - d[i] = 6 * ((f[i+1]-f[i])/h[i] - (f[i]-f[i-1])/h[i-1]); + d[0] = 6 * (f[1] - f[0]) / h[0] - 6 * v0; + d[n - 1] = 6 * vlast - 6 * (f[n - 1] - f[n - 2] / h[n - 2]); + for ( i = 1; i < n - 1; ++i ) + { + d[i] = 6 * ((f[i + 1] - f[i]) / h[i] - (f[i] - f[i - 1]) / h[i - 1]); + } - /*//fprintf( stderr, "i a b c d\n" ); + /*fprintf( stderr, "i a b c d\n" ); for( i = 0; i < n; ++i ) - //fprintf( stderr, "%d %f %f %f %f\n", i, a[i], b[i], c[i], d[i] );*/ + fprintf( stderr, "%d %f %f %f %f\n", i, a[i], b[i], c[i], d[i] );*/ Tridiagonal_Solve( &(a[0]), &(b[0]), &(c[0]), &(d[0]), &(v[0]), n ); // Tridiagonal_Solve( &(a[1]), &(b[1]), &(c[1]), &(d[1]), &(v[1]), n-2 ); - for( i = 1; i < n; ++i ){ - coef[i-1].d = (v[i] - v[i-1]) / (6*h[i-1]); - coef[i-1].c = v[i]/2; - coef[i-1].b = (f[i]-f[i-1])/h[i-1] + h[i-1]*(2*v[i] + v[i-1])/6; - coef[i-1].a = f[i]; + for ( i = 1; i < n; ++i ) + { + coef[i - 1].d = (v[i] - v[i - 1]) / (6 * h[i - 1]); + coef[i - 1].c = v[i] / 2; + coef[i - 1].b = (f[i] - f[i - 1]) / h[i - 1] + h[i - 1] * (2 * v[i] + v[i - 1]) / 6; + coef[i - 1].a = f[i]; } - /*//fprintf( stderr, "i v coef\n" ); + /*fprintf( stderr, "i v coef\n" ); for( i = 0; i < n; ++i ) - //fprintf( stderr, "%d %f %f %f %f %f\n", - i, v[i], coef[i].a, coef[i].b, coef[i].c, coef[i].d ); */ + fprintf( stderr, "%d %f %f %f %f %f\n", + i, v[i], coef[i].a, coef[i].b, coef[i].c, coef[i].d ); */ } @@ -178,21 +201,24 @@ void LR_Lookup( LR_lookup_table *t, real r, LR_data *y ) real base, dif; i = (int)(r * t->inv_dx); - if( i == 0 ) ++i; - base = (real)(i+1) * t->dx; + if ( i == 0 ) + { + ++i; + } + base = (real)(i + 1) * t->dx; dif = r - base; - ////fprintf( stderr, "r: %f, i: %d, base: %f, dif: %f\n", r, i, base, dif ); + //fprintf( stderr, "r: %f, i: %d, base: %f, dif: %f\n", r, i, base, dif ); - y->e_vdW = ((t->vdW[i].d*dif + t->vdW[i].c)*dif + t->vdW[i].b)*dif + - t->vdW[i].a; - y->CEvd = ((t->CEvd[i].d*dif + t->CEvd[i].c)*dif + - t->CEvd[i].b)*dif + t->CEvd[i].a; + y->e_vdW = ((t->vdW[i].d * dif + t->vdW[i].c) * dif + t->vdW[i].b) * dif + + t->vdW[i].a; + y->CEvd = ((t->CEvd[i].d * dif + t->CEvd[i].c) * dif + + t->CEvd[i].b) * dif + t->CEvd[i].a; //y->CEvd = (3*t->vdW[i].d*dif + 2*t->vdW[i].c)*dif + t->vdW[i].b; - y->e_ele = ((t->ele[i].d*dif + t->ele[i].c)*dif + t->ele[i].b)*dif + - t->ele[i].a; - y->CEclmb = ((t->CEclmb[i].d*dif + t->CEclmb[i].c)*dif + t->CEclmb[i].b)*dif + - t->CEclmb[i].a; + y->e_ele = ((t->ele[i].d * dif + t->ele[i].c) * dif + t->ele[i].b) * dif + + t->ele[i].a; + y->CEclmb = ((t->CEclmb[i].d * dif + t->CEclmb[i].c) * dif + t->CEclmb[i].b) * dif + + t->CEclmb[i].a; y->H = y->e_ele * EV_to_KCALpMOL / C_ele; //y->H = ((t->H[i].d*dif + t->H[i].c)*dif + t->H[i].b)*dif + t->H[i].a; @@ -221,147 +247,158 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) num_atom_types = system->reaxprm.num_atom_types; dr = control->r_cut / control->tabulate; - h = (real*) malloc( (control->tabulate+1) * sizeof(real) ); - fh = (real*) malloc( (control->tabulate+1) * sizeof(real) ); - fvdw = (real*) malloc( (control->tabulate+1) * sizeof(real) ); - fCEvd = (real*) malloc( (control->tabulate+1) * sizeof(real) ); - fele = (real*) malloc( (control->tabulate+1) * sizeof(real) ); - fCEclmb = (real*) malloc( (control->tabulate+1) * sizeof(real) ); - - /* allocate Long-Range LookUp Table space based on + h = (real*) malloc( (control->tabulate + 1) * sizeof(real) ); + fh = (real*) malloc( (control->tabulate + 1) * sizeof(real) ); + fvdw = (real*) malloc( (control->tabulate + 1) * sizeof(real) ); + fCEvd = (real*) malloc( (control->tabulate + 1) * sizeof(real) ); + fele = (real*) malloc( (control->tabulate + 1) * sizeof(real) ); + fCEclmb = (real*) malloc( (control->tabulate + 1) * sizeof(real) ); + + /* allocate Long-Range LookUp Table space based on number of atom types in the ffield file */ - //LR = (LR_lookup_table**) malloc( num_atom_types * sizeof(LR_lookup_table*) ); - //for( i = 0; i < num_atom_types; ++i ) - // LR[i] = (LR_lookup_table*) malloc(num_atom_types * sizeof(LR_lookup_table)); - - LR = (LR_lookup_table*) malloc(num_atom_types * num_atom_types * sizeof(LR_lookup_table)); + LR = (LR_lookup_table*) malloc( num_atom_types * num_atom_types * sizeof(LR_lookup_table) ); /* most atom types in ffield file will not exist in the current simulation. to avoid unnecessary lookup table space, determine the atom types that exist in the current simulation */ - for( i = 0; i < MAX_ATOM_TYPES; ++i ) + for ( i = 0; i < MAX_ATOM_TYPES; ++i ) + { existing_types[i] = 0; - for( i = 0; i < system->N; ++i ) + } + for ( i = 0; i < system->N; ++i ) + { existing_types[ system->atoms[i].type ] = 1; + } /* fill in the lookup table entries for existing atom types. only lower half should be enough. */ - for( i = 0; i < num_atom_types; ++i ) - if( existing_types[i] ) - for( j = i; j < num_atom_types; ++j ) - if( existing_types[j] ) { - LR[ index_lr (i,j,num_atom_types) ].xmin = 0; - LR[ index_lr (i,j,num_atom_types) ].xmax = control->r_cut; - LR[ index_lr (i,j,num_atom_types) ].n = control->tabulate + 1; - LR[ index_lr (i,j,num_atom_types) ].dx = dr; - LR[ index_lr (i,j,num_atom_types) ].inv_dx = control->tabulate / control->r_cut; - LR[ index_lr (i,j,num_atom_types) ].y = (LR_data*) - malloc(LR[ index_lr (i,j,num_atom_types) ].n * sizeof(LR_data)); - LR[ index_lr (i,j,num_atom_types) ].H = (cubic_spline_coef*) - malloc(LR[ index_lr (i,j,num_atom_types) ].n * sizeof(cubic_spline_coef)); - LR[ index_lr (i,j,num_atom_types) ].vdW = (cubic_spline_coef*) - malloc(LR[ index_lr (i,j,num_atom_types) ].n * sizeof(cubic_spline_coef)); - LR[ index_lr (i,j,num_atom_types) ].CEvd = (cubic_spline_coef*) - malloc(LR[ index_lr (i,j,num_atom_types) ].n * sizeof(cubic_spline_coef)); - LR[ index_lr (i,j,num_atom_types) ].ele = (cubic_spline_coef*) - malloc(LR[ index_lr (i,j,num_atom_types) ].n * sizeof(cubic_spline_coef)); - LR[ index_lr (i,j,num_atom_types) ].CEclmb = (cubic_spline_coef*) - malloc(LR[ index_lr (i,j,num_atom_types) ].n * sizeof(cubic_spline_coef)); - - for( r = 1; r <= control->tabulate; ++r ) { - LR_vdW_Coulomb( system, control, i, j, r * dr, &(LR[ index_lr (i,j,num_atom_types) ].y[r]) ); - h[r] = LR[ index_lr (i,j,num_atom_types) ].dx; - fh[r] = LR[ index_lr (i,j,num_atom_types) ].y[r].H; - fvdw[r] = LR[ index_lr (i,j,num_atom_types) ].y[r].e_vdW; - fCEvd[r] = LR[ index_lr (i,j,num_atom_types) ].y[r].CEvd; - fele[r] = LR[ index_lr (i,j,num_atom_types) ].y[r].e_ele; - fCEclmb[r] = LR[ index_lr (i,j,num_atom_types) ].y[r].CEclmb; - - if( r == 1 ){ - v0_vdw = LR[ index_lr (i,j,num_atom_types) ].y[r].CEvd; - v0_ele = LR[ index_lr (i,j,num_atom_types) ].y[r].CEclmb; + for ( i = 0; i < num_atom_types; ++i ) + { + if ( existing_types[i] ) + { + for ( j = i; j < num_atom_types; ++j ) + { + if ( existing_types[j] ) + { + LR[ index_lr(i,j,num_atom_types) ].xmin = 0; + LR[ index_lr(i,j,num_atom_types) ].xmax = control->r_cut; + LR[ index_lr(i,j,num_atom_types) ].n = control->tabulate + 1; + LR[ index_lr(i,j,num_atom_types) ].dx = dr; + LR[ index_lr(i,j,num_atom_types) ].inv_dx = control->tabulate / control->r_cut; + LR[ index_lr(i,j,num_atom_types) ].y = (LR_data*) + malloc( LR[index_lr(i,j,num_atom_types)].n * sizeof(LR_data) ); + LR[ index_lr(i,j,num_atom_types) ].H = (cubic_spline_coef*) + malloc( LR[index_lr(i,j,num_atom_types)].n * sizeof(cubic_spline_coef) ); + LR[ index_lr(i,j,num_atom_types) ].vdW = (cubic_spline_coef*) + malloc( LR[index_lr(i,j,num_atom_types)].n * sizeof(cubic_spline_coef) ); + LR[ index_lr(i,j,num_atom_types) ].CEvd = (cubic_spline_coef*) + malloc( LR[index_lr(i,j,num_atom_types)].n * sizeof(cubic_spline_coef) ); + LR[ index_lr(i,j,num_atom_types) ].ele = (cubic_spline_coef*) + malloc( LR[index_lr(i,j,num_atom_types)].n * sizeof(cubic_spline_coef) ); + LR[ index_lr(i,j,num_atom_types) ].CEclmb = (cubic_spline_coef*) + malloc( LR[index_lr(i,j,num_atom_types)].n * sizeof(cubic_spline_coef) ); + + for ( r = 1; r <= control->tabulate; ++r ) + { + LR_vdW_Coulomb( system, control, i, j, r * dr, + &(LR[ index_lr(i,j,num_atom_types) ].y[r]) ); + h[r] = LR[ index_lr(i,j,num_atom_types) ].dx; + fh[r] = LR[ index_lr(i,j,num_atom_types) ].y[r].H; + fvdw[r] = LR[ index_lr(i,j,num_atom_types) ].y[r].e_vdW; + fCEvd[r] = LR[ index_lr(i,j,num_atom_types) ].y[r].CEvd; + fele[r] = LR[ index_lr(i,j,num_atom_types) ].y[r].e_ele; + fCEclmb[r] = LR[ index_lr(i,j,num_atom_types) ].y[r].CEclmb; + + if ( r == 1 ) + { + v0_vdw = LR[ index_lr(i,j,num_atom_types) ].y[r].CEvd; + v0_ele = LR[ index_lr(i,j,num_atom_types) ].y[r].CEclmb; } - else if( r == control->tabulate ){ - vlast_vdw = LR[ index_lr (i,j,num_atom_types) ].y[r].CEvd; - vlast_ele = LR[ index_lr (i,j,num_atom_types) ].y[r].CEclmb; + else if ( r == control->tabulate ) + { + vlast_vdw = LR[ index_lr(i,j,num_atom_types) ].y[r].CEvd; + vlast_ele = LR[ index_lr(i,j,num_atom_types) ].y[r].CEclmb; } } - /*//fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fh" ); + /*fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fh" ); for( r = 1; r <= control->tabulate; ++r ) - //fprintf( stderr, "%f %f %f\n", r * dr, h[r], fh[r] ); */ - Natural_Cubic_Spline( &h[1], &fh[1], - &(LR[ index_lr (i,j,num_atom_types) ].H[1]), control->tabulate+1 ); + fprintf( stderr, "%f %f %f\n", r * dr, h[r], fh[r] ); */ + Natural_Cubic_Spline( &h[1], &fh[1], + &(LR[ index_lr(i,j,num_atom_types) ].H[1]), control->tabulate + 1 ); - /*//fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fvdw" ); + /*fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fvdw" ); for( r = 1; r <= control->tabulate; ++r ) - //fprintf( stderr, "%f %f %f\n", r * dr, h[r], fvdw[r] ); - //fprintf( stderr, "v0_vdw: %f, vlast_vdw: %f\n", v0_vdw, vlast_vdw ); - */ - Complete_Cubic_Spline( &h[1], &fvdw[1], v0_vdw, vlast_vdw, - &(LR[ index_lr (i,j,num_atom_types) ].vdW[1]), control->tabulate+1 ); - Natural_Cubic_Spline( &h[1], &fCEvd[1], - &(LR[ index_lr (i,j,num_atom_types) ].CEvd[1]), control->tabulate+1 ); - - /*//fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fele" ); + fprintf( stderr, "%f %f %f\n", r * dr, h[r], fvdw[r] ); + fprintf( stderr, "v0_vdw: %f, vlast_vdw: %f\n", v0_vdw, vlast_vdw ); + */ + Complete_Cubic_Spline( &h[1], &fvdw[1], v0_vdw, vlast_vdw, + &(LR[ index_lr(i,j,num_atom_types) ].vdW[1]), control->tabulate + 1 ); + Natural_Cubic_Spline( &h[1], &fCEvd[1], + &(LR[ index_lr(i,j,num_atom_types) ].CEvd[1]), control->tabulate + 1 ); + + /*fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fele" ); for( r = 1; r <= control->tabulate; ++r ) - //fprintf( stderr, "%f %f %f\n", r * dr, h[r], fele[r] ); - //fprintf( stderr, "v0_ele: %f, vlast_ele: %f\n", v0_ele, vlast_ele ); - */ - Complete_Cubic_Spline( &h[1], &fele[1], v0_ele, vlast_ele, - &(LR[ index_lr (i,j,num_atom_types) ].ele[1]), control->tabulate+1 ); - Natural_Cubic_Spline( &h[1], &fCEclmb[1], - &(LR[ index_lr (i,j,num_atom_types) ].CEclmb[1]), control->tabulate+1 ); + fprintf( stderr, "%f %f %f\n", r * dr, h[r], fele[r] ); + fprintf( stderr, "v0_ele: %f, vlast_ele: %f\n", v0_ele, vlast_ele ); + */ + Complete_Cubic_Spline( &h[1], &fele[1], v0_ele, vlast_ele, + &(LR[ index_lr(i,j,num_atom_types) ].ele[1]), control->tabulate + 1 ); + Natural_Cubic_Spline( &h[1], &fCEclmb[1], + &(LR[ index_lr(i,j,num_atom_types) ].CEclmb[1]), control->tabulate + 1 ); } + } + } + } /***** //test LR-Lookup table - evdw_maxerr = 0; - eele_maxerr = 0; - for( i = 0; i < num_atom_types; ++i ) - if( existing_types[i] ) - for( j = i; j < num_atom_types; ++j ) - if( existing_types[j] ) { - for( r = 1; r <= 100; ++r ) { - rand_dist = (real)rand()/RAND_MAX * control->r_cut; - LR_vdW_Coulomb( system, control, i, j, rand_dist, &y ); - LR_Lookup( &(LR[i][j]), rand_dist, &y_spline ); - - evdw_abserr = fabs(y.e_vdW - y_spline.e_vdW); - evdw_relerr = fabs(evdw_abserr / y.e_vdW); - fvdw_abserr = fabs(y.CEvd - y_spline.CEvd); - fvdw_relerr = fabs(fvdw_abserr / y.CEvd); - eele_abserr = fabs(y.e_ele - y_spline.e_ele); - eele_relerr = fabs(eele_abserr / y.e_ele); - fele_abserr = fabs(y.CEclmb - y_spline.CEclmb); - fele_relerr = fabs(fele_abserr / y.CEclmb); - - if( evdw_relerr > 1e-10 || eele_relerr > 1e-10 ){ - //fprintf( stderr, "rand_dist = %24.15e\n", rand_dist ); - //fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", - y.H, y_spline.H, - fabs(y.H-y_spline.H), fabs((y.H-y_spline.H)/y.H) ); - - //fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", - y.e_vdW, y_spline.e_vdW, evdw_abserr, evdw_relerr ); - //fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", - y.CEvd, y_spline.CEvd, fvdw_abserr, fvdw_relerr ); - - //fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", - y.e_ele, y_spline.e_ele, eele_abserr, eele_relerr ); - //fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", - y.CEclmb, y_spline.CEclmb, fele_abserr, fele_relerr ); - } - - if( evdw_relerr > evdw_maxerr ) - evdw_maxerr = evdw_relerr; - if( eele_relerr > eele_maxerr ) - eele_maxerr = eele_relerr; - } - } - //fprintf( stderr, "evdw_maxerr: %24.15e\n", evdw_maxerr ); - //fprintf( stderr, "eele_maxerr: %24.15e\n", eele_maxerr ); - *******/ - + evdw_maxerr = 0; + eele_maxerr = 0; + for( i = 0; i < num_atom_types; ++i ) + if( existing_types[i] ) + for( j = i; j < num_atom_types; ++j ) + if( existing_types[j] ) { + for( r = 1; r <= 100; ++r ) { + rand_dist = (real)rand()/RAND_MAX * control->r_cut; + LR_vdW_Coulomb( system, control, i, j, rand_dist, &y ); + LR_Lookup( &(LR[i][j]), rand_dist, &y_spline ); + + evdw_abserr = fabs(y.e_vdW - y_spline.e_vdW); + evdw_relerr = fabs(evdw_abserr / y.e_vdW); + fvdw_abserr = fabs(y.CEvd - y_spline.CEvd); + fvdw_relerr = fabs(fvdw_abserr / y.CEvd); + eele_abserr = fabs(y.e_ele - y_spline.e_ele); + eele_relerr = fabs(eele_abserr / y.e_ele); + fele_abserr = fabs(y.CEclmb - y_spline.CEclmb); + fele_relerr = fabs(fele_abserr / y.CEclmb); + + if( evdw_relerr > 1e-10 || eele_relerr > 1e-10 ){ + fprintf( stderr, "rand_dist = %24.15e\n", rand_dist ); + fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", + y.H, y_spline.H, + fabs(y.H-y_spline.H), fabs((y.H-y_spline.H)/y.H) ); + + fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", + y.e_vdW, y_spline.e_vdW, evdw_abserr, evdw_relerr ); + fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", + y.CEvd, y_spline.CEvd, fvdw_abserr, fvdw_relerr ); + + fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", + y.e_ele, y_spline.e_ele, eele_abserr, eele_relerr ); + fprintf( stderr, "%24.15e %24.15e %24.15e %24.15e\n", + y.CEclmb, y_spline.CEclmb, fele_abserr, fele_relerr ); + } + + if( evdw_relerr > evdw_maxerr ) + evdw_maxerr = evdw_relerr; + if( eele_relerr > eele_maxerr ) + eele_maxerr = eele_relerr; + } + } + fprintf( stderr, "evdw_maxerr: %24.15e\n", evdw_maxerr ); + fprintf( stderr, "eele_maxerr: %24.15e\n", eele_maxerr ); + *******/ + free(h); free(fh); free(fvdw); @@ -383,24 +420,26 @@ real Lookup( real x, lookup_table* t ) real b; int i; - /* if ( x < t->xmin) - { - //fprintf(stderr,"Domain check %lf > %lf\n",t->xmin,x); - exit(0); + /* + if ( x < t->xmin) + { + fprintf(stderr,"Domain check %lf > %lf\n",t->xmin,x); + exit(0); } - if ( x > t->xmax) + if ( x > t->xmax) { - //fprintf(stderr,"Domain check %lf < %lf\n",t->xmax,x); - exit(0); - } */ + fprintf(stderr,"Domain check %lf < %lf\n",t->xmax,x); + exit(0); + } + */ i = Lookup_Index_Of( x, t ); x1 = i * t->dx + t->xmin; - x2 = (i+1) * t->dx + t->xmin; + x2 = (i + 1) * t->dx + t->xmin; - b = ( x2 * t->y[i] - x1 * t->y[i+1] ) * t->inv_dx; - // //fprintf( stdout,"SLookup_Entry: %d, %lf, %lf, %lf, %lf: %lf, %lf\n", + b = ( x2 * t->y[i] - x1 * t->y[i + 1] ) * t->inv_dx; + // fprintf( stdout,"SLookup_Entry: %d, %lf, %lf, %lf, %lf: %lf, %lf\n", // i,x1,x2,x,b,t->one_over_dx*(t->y[i+1]-t->y[i])*x+b,exp(x)); - return t->inv_dx * ( t->y[i+1] - t->y[i] ) * x + b; + return t->inv_dx * ( t->y[i + 1] - t->y[i] ) * x + b; } diff --git a/PuReMD-GPU/src/mytypes.h b/PuReMD-GPU/src/mytypes.h index 0eb1856a578df369aba28d8dbfb55dcf57348e9d..b04a9de39c8dbf487cac6d02a5e098a220862d5a 100644 --- a/PuReMD-GPU/src/mytypes.h +++ b/PuReMD-GPU/src/mytypes.h @@ -28,12 +28,6 @@ #define GLOBAL __global__ #define HOST_DEVICE __host__ __device__ - #include <cuda_runtime.h> - #include <cuda.h> - #include <cuda_runtime_api.h> - - #include <cublas_v2.h> - #include <cusparse_v2.h> #if __CUDA_ARCH__ < 600 #define MYATOMICADD myAtomicAdd #else @@ -55,14 +49,25 @@ #include "config.h" #endif -#include "math.h" -//#include "random.h" -#include "stdio.h" -#include "stdlib.h" -#include "string.h" -#include "sys/time.h" -#include "time.h" -#include "zlib.h" +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <time.h> + +#ifdef _OPENMP + #include <omp.h> +#endif + +#ifdef HAVE_CUDA + #include <cuda_runtime.h> + #include <cuda.h> + #include <cuda_runtime_api.h> + + #include <cublas_v2.h> + #include <cusparse_v2.h> +#endif //#define DEBUG_FOCUS //#define TEST_FORCES @@ -75,6 +80,7 @@ #define TRUE 1 #define FALSE 0 +#define LOG log #define EXP exp #define SQRT sqrt #define POW pow @@ -82,6 +88,8 @@ #define COS cos #define SIN sin #define TAN tan +#define FABS fabs +#define FMOD fmod #define SQR(x) ((x)*(x)) #define CUBE(x) ((x)*(x)*(x)) @@ -90,6 +98,15 @@ #define MAX( x, y ) (((x) > (y)) ? (x) : (y)) #define MIN( x, y ) (((x) < (y)) ? (x) : (y)) +/* NaN IEEE 754 representation for C99 in math.h + * Note: function choice must match REAL typedef below */ +#ifdef NAN + #define IS_NAN_REAL(a) (isnan(a)) +#else + #warn "No support for NaN" + #define NAN_REAL(a) (0) +#endif + #define PI 3.14159265 #define C_ele 332.06371 //#define K_B 503.398008 // kcal/mol/K @@ -106,7 +123,11 @@ #define AVOGNR 6.0221367e23 #define P_CONV 1.0e-24 * AVOGNR * JOULES_to_CAL -#define MAX_STR 100 // MAX STRing length (used for naming) +#define MAX_STR 1024 +#define MAX_LINE 1024 +#define MAX_TOKENS 1024 +#define MAX_TOKEN_LEN 1024 + #define MAX_ATOM_ID 100000 #define MAX_RESTRICT 15 #define MAX_MOLECULE_SIZE 20 @@ -124,23 +145,7 @@ #define MAX_ITR 10 #define RESTART 50 -#define FILE_NOT_FOUND_ERR 10 -#define UNKNOWN_ATOM_TYPE_ERR 11 -#define CANNOT_OPEN_OUTFILE 12 -#define INIT_ERR 13 -#define INSUFFICIENT_SPACE 14 -#define UNKNOWN_OPTION 15 -#define INVALID_INPUT 16 - -#define C_ATOM 0 -#define H_ATOM 1 -#define O_ATOM 2 -#define N_ATOM 3 -#define S_ATOM 4 -#define SI_ATOM 5 -#define GE_ATOM 6 -#define X_ATOM 7 - +/* tolerance used for validating GPU results against host */ #define GPU_TOLERANCE 1e-5 #define ZERO 0.000000000000000e+00 @@ -157,6 +162,7 @@ #define DANGER_ZONE 0.95 #define LOOSE_ZONE 0.75 +//TODO: make enum #define RES_GRID_ATOMS 0x01 #define RES_GRID_TOP 0x02 #define RES_GRID_MARK 0x03 @@ -165,17 +171,21 @@ #define RES_GRID_NBRS 0x06 #define RES_GRID_NBRS_CP 0x07 +//TODO: make enum #define RES_SYSTEM_ATOMS 0x10 #define RES_SYSTEM_SIMULATION_BOX 0x11 +//TODO: make enum #define RES_REAX_INT_SBP 0x20 #define RES_REAX_INT_TBP 0x21 #define RES_REAX_INT_THBP 0x22 #define RES_REAX_INT_HBP 0x23 #define RES_REAX_INT_FBP 0x24 +//TODO: make enum #define RES_SIMULATION_DATA 0x30 +//TODO: make enum #define RES_STORAGE 0x401 #define RES_STORAGE_HBOND_INDEX 0x402 #define RES_STORAGE_TOTAL_BOND_ORDER 0x403 @@ -229,13 +239,17 @@ #define RES_STORAGE_RESTRICTED_LIST 0x432 #define RES_STORAGE_ORIG_ID 0x433 +//TODO: make enum #define RES_CONTROL_PARAMS 0x50 +//TODO: make enum #define RES_GLOBAL_PARAMS 0x60 +//TODO: make enum #define RES_SPARSE_MATRIX_INDEX 0x70 #define RES_SPARSE_MATRIX_ENTRY 0x71 +//TODO: make enum #define RES_LR_LOOKUP_Y 0x80 #define RES_LR_LOOKUP_H 0x81 #define RES_LR_LOOKUP_VDW 0x82 @@ -244,6 +258,7 @@ #define RES_LR_LOOKUP_CECLMB 0x85 #define RES_LR_LOOKUP_TABLE 0x86 +//TODO: make enum #define RES_SCRATCH 0x90 #define LIST_INDEX 0x00 @@ -314,17 +329,78 @@ typedef real rvec[3]; typedef int ivec[3]; typedef real rtensor[3][3]; -enum {NVE, NVT, NPT, sNPT, iNPT, ensNR, bNVT}; -enum {FAR_NBRS, NEAR_NBRS, THREE_BODIES, BONDS, OLD_BONDS, - HBONDS, DBO, DDELTA, LIST_N - }; -enum {TYP_VOID, TYP_THREE_BODY, TYP_BOND, TYP_HBOND, TYP_DBO, - TYP_DDELTA, TYP_FAR_NEIGHBOR, TYP_NEAR_NEIGHBOR, TYP_N - }; -enum {UNKNOWN, WATER}; -enum {NO_ANALYSIS, FRAGMENTS, REACTIONS, NUM_ANALYSIS}; -enum {WRITE_ASCII, WRITE_BINARY, RF_N}; -enum {XYZ, PDB, BGF, ASCII_RESTART, BINARY_RESTART, GF_N}; +/* config params */ +enum ensemble +{ + NVE = 0, NVT = 1, NPT = 2, sNPT = 3, iNPT = 4, ensNR = 5, bNVT = 6, +}; + +enum interaction_list_offets +{ + FAR_NBRS = 0, NEAR_NBRS = 1, THREE_BODIES = 2, BONDS = 3, OLD_BONDS = 4, + HBONDS = 5, DBO = 6, DDELTA = 7, LIST_N = 8, +}; + +enum interaction_type +{ + TYP_VOID = 0, TYP_THREE_BODY = 1, TYP_BOND = 2, TYP_HBOND = 3, TYP_DBO = 4, + TYP_DDELTA = 5, TYP_FAR_NEIGHBOR = 6, TYP_NEAR_NEIGHBOR = 7, TYP_N = 8, +}; + +enum errors +{ + FILE_NOT_FOUND = -10, + UNKNOWN_ATOM_TYPE = -11, + CANNOT_OPEN_FILE = -12, + CANNOT_INITIALIZE = -13, + INSUFFICIENT_MEMORY = -14, + UNKNOWN_OPTION = -15, + INVALID_INPUT = -16, + INVALID_GEO = -17, + NUMERIC_BREAKDOWN = -18, + RUNTIME_ERROR = -19, +}; + +enum atoms +{ + C_ATOM = 0, H_ATOM = 1, O_ATOM = 2, N_ATOM = 3, + S_ATOM = 4, SI_ATOM = 5, GE_ATOM = 6, X_ATOM = 7, +}; + +enum molecule_type +{ + UNKNOWN = 0, WATER = 1, +}; + +enum molecular_analysis_type +{ + NO_ANALYSIS = 0, FRAGMENTS = 1, REACTIONS = 2, NUM_ANALYSIS = 3, +}; + +enum restart_format +{ + WRITE_ASCII = 0, WRITE_BINARY = 1, RF_N = 2, +}; + +enum geo_formats +{ + CUSTOM = 0, PDB = 1, BGF = 2, ASCII_RESTART = 3, BINARY_RESTART = 4, GF_N = 5, +}; + +enum solver +{ + GMRES_S = 0, GMRES_H_S = 1, CG_S = 2, SDM_S = 3, +}; + +enum pre_comp +{ + DIAG_PC = 0, ICHOLT_PC = 1, ILU_PAR_PC = 2, ILUT_PAR_PC = 3, ILU_SUPERLU_MT_PC = 4, +}; + +enum pre_app +{ + NONE_PA = 0, TRI_SOLVE_PA = 1, TRI_SOLVE_LEVEL_SCHED_PA = 2, TRI_SOLVE_GC_PA = 3, JACOBI_ITER_PA = 4, +}; /* Global params mapping */ @@ -502,33 +578,36 @@ typedef struct { int num_atom_types; global_parameters gp; - global_parameters d_gp; - single_body_parameters *sbp; - single_body_parameters *d_sbp; - two_body_parameters *tbp; - two_body_parameters *d_tbp; - three_body_header *thbp; - three_body_header *d_thbp; - hbond_parameters *hbp; - hbond_parameters *d_hbp; - four_body_header *fbp; - four_body_header *d_fbp; +#ifdef HAVE_CUDA + global_parameters d_gp; + single_body_parameters *d_sbp; + two_body_parameters *d_tbp; + three_body_header *d_thbp; + hbond_parameters *d_hbp; + four_body_header *d_fbp; +#endif } reax_interaction; typedef struct { - rvec x; /* Position, velocity, force on atom */ + /* Position, velocity, force on atom */ + rvec x; rvec v; rvec f; - real q; /* Charge on the atom */ - int type; /* Type of this atom */ + + /* Charge on the atom */ + real q; + + /* Type of this atom */ + int type; + char name[5]; char spare[7]; } reax_atom; @@ -561,9 +640,6 @@ typedef struct rvec len; rvec inv_len; - //CUDA - int max_cuda_nbrs; //TODO remove this not used anymore - int *atoms; int *top; int *mark; @@ -578,7 +654,16 @@ typedef struct { int N; - //CUDA + reax_atom *atoms; + reax_interaction reaxprm; + simulation_box box; + grid g; + +#ifdef HAVE_CUDA + reax_atom *d_atoms; + simulation_box *d_box; + grid d_g; + //int max_thb_intrs; int max_sparse_matrix_entries; int num_nbrs; @@ -586,17 +671,7 @@ typedef struct int num_hbonds; int num_thbodies; int init_thblist; - - reax_atom *atoms; - reax_atom *d_atoms; - - reax_interaction reaxprm; - - simulation_box box; - simulation_box *d_box; - - grid g; - grid d_g; +#endif } reax_system; @@ -616,23 +691,22 @@ typedef struct 2 : NPT (Parrinello-Rehman-Nose-Hoover) Anisotropic 3 : sNPT (Parrinello-Rehman-Nose-Hoover) semiisotropic 4 : iNPT (Parrinello-Rehman-Nose-Hoover) isotropic */ - int ensemble; - int nsteps; - int periodic_boundaries; - int restrict_bonds; - int tabulate; + int ensemble; + int nsteps; + int periodic_boundaries; + int restrict_bonds; + int tabulate; ivec periodic_images; real dt; int reneighbor; real vlist_cut; real nbr_cut; - real r_cut, r_low; // upper and lower taper + real r_cut, r_sp_cut, r_low; // upper, reduced upper, and lower taper real bo_cut; real thb_cut; real hb_cut; real Tap7, Tap6, Tap5, Tap4, Tap3, Tap2, Tap1, Tap0; - real q_err; int max_far_nbrs; real T_init, T_final, T; @@ -656,16 +730,26 @@ typedef struct int freq_diffusion_coef; int restrict_type; - int refactor; - real droptol; + unsigned int qeq_solver_type; + real qeq_solver_q_err; + real qeq_domain_sparsity; + unsigned int qeq_domain_sparsify_enabled; + unsigned int pre_comp_type; + unsigned int pre_comp_refactor; + real pre_comp_droptol; + unsigned int pre_comp_sweeps; + unsigned int pre_app_type; + unsigned int pre_app_jacobi_iters; int molec_anal; int freq_molec_anal; real bg_cut; int num_ignored; - int ignore[MAX_ATOM_TYPES]; + int ignore[MAX_ATOM_TYPES]; +#ifdef HAVE_CUDA void *d_control; +#endif } control_params; @@ -720,7 +804,14 @@ typedef struct real bonded; real nonb; real QEq; - int matvecs; + real QEq_sort_mat_rows; + real pre_comp; + real pre_app; + int solver_iters; + real solver_spmv; + real solver_vector_ops; + real solver_orthog; + real solver_tri_solve; } reax_timing; @@ -776,9 +867,11 @@ typedef struct rvec tot_press; reax_timing timing; - //CUDA + +#ifdef HAVE_CUDA reax_timing d_timing; void *d_simulation_data; +#endif } simulation_data; @@ -789,8 +882,9 @@ typedef struct real theta, cos_theta; rvec dcos_di, dcos_dj, dcos_dk; - //CUDA +#ifdef HAVE_CUDA int i, j, k; +#endif } three_body_interaction_data; @@ -813,9 +907,11 @@ typedef struct rvec dvec; // real H; //, Tap, inv_dr3gamij_1, inv_dr3gamij_3; - //CUDA +#ifdef HAVE_CUDA //int sym_index; //rvec h_f; +#endif + char spare[16]; } far_neighbor_data; @@ -868,6 +964,7 @@ typedef struct rvec dvec; bond_order_data bo_data; +#ifdef HAVE_CUDA //single body -- lone pair real scratch; @@ -887,42 +984,47 @@ typedef struct //compute_total_forces rvec t_f; +#endif } bond_data; +/* compressed row storage (crs) format + * See, e.g., + * http://netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000 + * + * m: number of nonzeros (NNZ) ALLOCATED + * n: number of rows + * start: row pointer (last element contains ACTUAL NNZ) + * j: column index for corresponding matrix entry + * val: matrix entry + * */ typedef struct { - int j; - real val; -} sparse_matrix_entry; - - -typedef struct -{ - int n, m; - int *start; - //CUDA - int *end; - sparse_matrix_entry *entries; - - int *j; + unsigned int n, m; + unsigned int *start; +#ifdef HAVE_CUDA + unsigned int *end; +#endif + unsigned int *j; real *val; - } sparse_matrix; typedef struct { - int estimate_nbrs; int num_far; int Htop; int hbonds; int num_hbonds; int bonds; int num_bonds; - int thbody; int num_3body; int gcell_atoms; + +#ifdef HAVE_CUDA + int estimate_nbrs; + int thbody; +#endif } reallocate_data; @@ -937,7 +1039,7 @@ typedef struct rvec *dDeltap_self; /* QEq storage */ - sparse_matrix H, L, U; + sparse_matrix *H, *H_sp, *L, *U; real *droptol; real *w; real *Hdia_inv; @@ -990,6 +1092,7 @@ typedef struct } static_storage; +/* interaction lists */ typedef struct { int n; @@ -1127,25 +1230,25 @@ typedef void (*evolve_function)(reax_system*, control_params*, list**, output_controls*); typedef real (*lookup_function)(real); -extern lookup_table Exp, Sqrt, Cube_Root, Four_Third_Root, Cos, Sin, ACos; +extern lookup_table Exp, Sqrt, Cube_Root, Four_Third_Root, Cos, Sin, ACos; extern LR_lookup_table *LR; - typedef void (*get_far_neighbors_function)(rvec, rvec, simulation_box*, - control_params*, far_neighbor_data*, - int*); + control_params*, far_neighbor_data*, int*); + +extern reax_timing d_timing; -/* CUDA structures */ +#ifdef HAVE_CUDA extern list *dev_lists; extern static_storage *dev_workspace; extern LR_lookup_table *d_LR; -extern reax_timing d_timing; -//Scratch Pad usage. +/* scratch Pad usage */ extern void *scratch; extern int BLOCKS, BLOCKS_POW_2, BLOCK_SIZE; extern int MATVEC_BLOCKS; +#endif #endif diff --git a/PuReMD-GPU/src/neighbors.c b/PuReMD-GPU/src/neighbors.c index 5f425e672080d2d4a272f7aca1859c45d8dde17d..7a005f081d57e26cc86bde6501e72bd64d6bc2cc 100644 --- a/PuReMD-GPU/src/neighbors.c +++ b/PuReMD-GPU/src/neighbors.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -26,51 +27,11 @@ #include "list.h" #include "reset_utils.h" #include "system_props.h" +#include "tool_box.h" #include "vector.h" -int Are_Far_Neighbors( rvec x1, rvec x2, simulation_box *box, - real cutoff, far_neighbor_data *data ) -{ - real norm_sqr, d, tmp; - int i; - - norm_sqr = 0; - - for( i = 0; i < 3; i++ ) { - d = x2[i] - x1[i]; - tmp = SQR(d); - - if( tmp >= SQR( box->box_norms[i] / 2.0 ) ) { - if( x2[i] > x1[i] ) { - d -= box->box_norms[i]; - data->rel_box[i] = -1; - } - else { - d += box->box_norms[i]; - data->rel_box[i] = +1; - } - - data->dvec[i] = d; - norm_sqr += SQR(d); - } - else { - data->dvec[i] = d; - norm_sqr += tmp; - data->rel_box[i] = 0; - } - } - - if( norm_sqr <= SQR(cutoff) ){ - data->d = sqrt(norm_sqr); - return 1; - } - - return 0; -} - - -void Generate_Neighbor_Lists( reax_system *system, control_params *control, +void Generate_Neighbor_Lists( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control ) { @@ -86,54 +47,61 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, far_neighbor_data *nbr_data; real t_start, t_elapsed; + t_start = Get_Time( ); // fprintf( stderr, "\n\tentered nbrs - " ); g = &( system->g ); far_nbrs = (*lists) + FAR_NBRS; Bin_Atoms( system, workspace ); - - t_start = Get_Time( ); - // fprintf( stderr, "atoms sorted - " ); num_far = 0; /* first pick up a cell in the grid */ - for( i = 0; i < g->ncell[0]; i++ ) - for( j = 0; j < g->ncell[1]; j++ ) - for( k = 0; k < g->ncell[2]; k++ ) { - nbrs = &g->nbrs[ index_grid_nbrs (i,j,k,0,g) ]; - nbrs_cp = &g->nbrs_cp[ index_grid_nbrs (i,j,k,0,g) ]; + for ( i = 0; i < g->ncell[0]; i++ ) + { + for ( j = 0; j < g->ncell[1]; j++ ) + { + for ( k = 0; k < g->ncell[2]; k++ ) + { + nbrs = &g->nbrs[ index_grid_nbrs(i,j,k,0,g) ]; + nbrs_cp = &g->nbrs_cp[ index_grid_nbrs(i,j,k,0,g) ]; //fprintf( stderr, "gridcell %d %d %d\n", i, j, k ); /* pick up an atom from the current cell */ - for(l = 0; l < g->top[ index_grid_3d (i,j,k,g) ]; ++l ){ - atom1 = g->atoms[ index_grid_atoms (i,j,k,l,g) ]; + for(l = 0; l < g->top[ index_grid_3d(i,j,k,g) ]; ++l ) + { + atom1 = g->atoms[ index_grid_atoms(i,j,k,l,g) ]; Set_Start_Index( atom1, num_far, far_nbrs ); //fprintf( stderr, "\tatom %d\n", atom1 ); itr = 0; - while( nbrs[itr][0] >= 0 ){ + while ( nbrs[itr][0] >= 0 ) + { x = nbrs[itr][0]; y = nbrs[itr][1]; z = nbrs[itr][2]; //fprintf( stderr, "\t\tgridcell %d %d %d\n", x, y, z ); - if( DistSqr_to_CP(nbrs_cp[itr], system->atoms[atom1].x ) <= - SQR(control->vlist_cut) ) { - nbr_atoms = &g->atoms[ index_grid_atoms (x,y,z,0,g) ]; - max = g->top[ index_grid_3d (x,y,z,g) ]; + if ( DistSqr_to_CP(nbrs_cp[itr], system->atoms[atom1].x ) <= + SQR(control->vlist_cut) ) + { + nbr_atoms = &g->atoms[ index_grid_atoms(x,y,z,0,g) ]; + max = g->top[ index_grid_3d(x,y,z,g) ]; //fprintf( stderr, "\t\tmax: %d\n", max ); /* pick up another atom from the neighbor cell */ - for( m = 0; m < max; ++m ) { + for ( m = 0; m < max; ++m ) + { atom2 = nbr_atoms[m]; - if( atom1 > atom2 ) { + if ( atom1 > atom2 ) + { nbr_data = &(far_nbrs->select.far_nbr_list[num_far]); - if(Are_Far_Neighbors(system->atoms[atom1].x, - system->atoms[atom2].x, - &(system->box), control->vlist_cut, - nbr_data)) { + //fprintf (stderr, " %f %f %f \n", nbr_data->dvec[0], nbr_data->dvec[1], nbr_data->dvec[2]); + if (Are_Far_Neighbors(system->atoms[atom1].x, + system->atoms[atom2].x, + &(system->box), control->vlist_cut, + nbr_data)) + { nbr_data->nbr = atom2; - ++num_far; } } @@ -144,20 +112,22 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, } Set_End_Index( atom1, num_far, far_nbrs ); - //fprintf(stderr, "i:%d, start: %d, end: %d - itr: %d\n", + //fprintf(stderr, "i:%d, start: %d, end: %d - itr: %d\n", // atom1,Start_Index(atom1,far_nbrs),End_Index(atom1,far_nbrs), - // itr); + // itr); } } + } + } - fprintf (stderr, " TOTAL HOST NEIGHBORS : %d \n", num_far); - - if( num_far > far_nbrs->num_intrs * DANGER_ZONE ) { + if ( num_far > far_nbrs->num_intrs * DANGER_ZONE ) + { workspace->realloc.num_far = num_far; - if( num_far > far_nbrs->num_intrs ){ + if ( num_far > far_nbrs->num_intrs ) + { fprintf( stderr, "step%d-ran out of space on far_nbrs: top=%d, max=%d", - data->step, num_far, far_nbrs->num_intrs ); - exit( INSUFFICIENT_SPACE ); + data->step, num_far, far_nbrs->num_intrs ); + exit( INSUFFICIENT_MEMORY ); } } @@ -165,25 +135,24 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, data->timing.nbrs += t_elapsed; #if defined(DEBUG) - for( i = 0; i < system->N; ++i ) { - qsort( &(far_nbrs->select.far_nbr_list[ Start_Index(i, far_nbrs) ]), - Num_Entries(i, far_nbrs), sizeof(far_neighbor_data), - compare_far_nbrs ); + for ( i = 0; i < system->N; ++i ) + { + qsort( &(far_nbrs->select.far_nbr_list[ Start_Index(i, far_nbrs) ]), + Num_Entries(i, far_nbrs), sizeof(far_neighbor_data), + compare_far_nbrs ); } #endif - -#if defined(DEBUG_FOCUS) - //fprintf( stderr, "nbrs - "); - //fprintf( stderr, "nbrs done, num_far: %d\n", num_far ); +#if defined(DEBUG_FOCUS) + fprintf( stderr, "nbrs - "); + fprintf( stderr, "nbrs done, num_far: %d\n", num_far ); #endif - #if defined(TEST_ENERGY) //Print_Far_Neighbors( system, control, workspace, lists ); #endif } -int Estimate_NumNeighbors( reax_system *system, control_params *control, +int Estimate_NumNeighbors( reax_system *system, control_params *control, static_storage *workspace, list **lists ) { int i, j, k, l, m, itr; @@ -195,53 +164,63 @@ int Estimate_NumNeighbors( reax_system *system, control_params *control, rvec *nbrs_cp; grid *g; far_neighbor_data nbr_data; - +#ifdef HAVE_CUDA int start = 0, finish = 0; +#endif // fprintf( stderr, "\n\tentered nbrs - " ); g = &( system->g ); Bin_Atoms( system, workspace ); // fprintf( stderr, "atoms sorted - " ); num_far = 0; +#ifdef HAVE_CUDA g->max_cuda_nbrs = 0; +#endif /* first pick up a cell in the grid */ - for( i = 0; i < g->ncell[0]; i++ ) - for( j = 0; j < g->ncell[1]; j++ ) - for( k = 0; k < g->ncell[2]; k++ ) { - nbrs = &g->nbrs[index_grid_nbrs (i,j,k,0,g) ]; - nbrs_cp = &g->nbrs_cp[index_grid_nbrs (i,j,k,0,g) ]; + for ( i = 0; i < g->ncell[0]; i++ ) + { + for ( j = 0; j < g->ncell[1]; j++ ) + { + for ( k = 0; k < g->ncell[2]; k++ ) + { + nbrs = &g->nbrs[ index_grid_nbrs(i,j,k,0,g) ]; + nbrs_cp = &g->nbrs_cp[ index_grid_nbrs(i,j,k,0,g) ]; //fprintf( stderr, "gridcell %d %d %d\n", i, j, k ); /* pick up an atom from the current cell */ - for(l = 0; l < g->top[index_grid_3d (i,j,k,g) ]; ++l ){ - atom1 = g->atoms[index_grid_atoms (i,j,k,l,g) ]; - start = num_far; + for(l = 0; l < g->top[ index_grid_3d(i,j,k,g) ]; ++l ) + { + atom1 = g->atoms[ index_grid_atoms(i,j,k,l,g) ]; itr = 0; - while( nbrs[itr][0] >= 0 ){ + while ( nbrs[itr][0] >= 0 ) + { x = nbrs[itr][0]; y = nbrs[itr][1]; z = nbrs[itr][2]; //fprintf( stderr, "\t\tgridcell %d %d %d\n", x, y, z ); - if( DistSqr_to_CP(nbrs_cp[itr], system->atoms[atom1].x ) <= - SQR(control->vlist_cut) ) { - nbr_atoms = &g->atoms[index_grid_atoms (x,y,z,0,g) ]; - max = g->top[index_grid_3d (x,y,z,g) ]; + if ( DistSqr_to_CP(nbrs_cp[itr], system->atoms[atom1].x ) <= + SQR(control->vlist_cut) ) + { + nbr_atoms = &g->atoms[ index_grid_atoms(x,y,z,0,g) ]; + max = g->top[ index_grid_3d(x,y,z,g) ]; //fprintf( stderr, "\t\tmax: %d\n", max ); /* pick up another atom from the neighbor cell - - we have to compare atom1 with its own periodic images as well, - that's why there is also equality in the if stmt below */ - for( m = 0; m < max; ++m ) { + we have to compare atom1 with its own periodic images as well, + that's why there is also equality in the if stmt below */ + for ( m = 0; m < max; ++m ) + { atom2 = nbr_atoms[m]; //if( nbrs[itr+1][0] >= 0 || atom1 > atom2 ) { - if( atom1 > atom2 ) { - if(Are_Far_Neighbors(system->atoms[atom1].x, - system->atoms[atom2].x, - &(system->box), control->vlist_cut, - &nbr_data)) + if ( atom1 > atom2 ) + { + if (Are_Far_Neighbors(system->atoms[atom1].x, + system->atoms[atom2].x, + &(system->box), control->vlist_cut, + &nbr_data)) ++num_far; } } @@ -250,38 +229,46 @@ int Estimate_NumNeighbors( reax_system *system, control_params *control, ++itr; } - // finish note +#ifdef HAVE_CUDA finish = num_far; - if (g->max_cuda_nbrs <= (finish - start)){ + if (g->max_cuda_nbrs <= (finish - start)) + { g->max_cuda_nbrs = finish - start; } +#endif } } + } + } -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "estimate nbrs done, num_far: %d\n", num_far ); #endif - return num_far * SAFE_ZONE; } -//Code not used anymore #if defined DONE - -void Choose_Neighbor_Finder( reax_system *system, control_params *control, +void Choose_Neighbor_Finder( reax_system *system, control_params *control, get_far_neighbors_function *Get_Far_Neighbors ) { - if( control->periodic_boundaries ) + if ( control->periodic_boundaries ) { - if( system->box.box_norms[0] > 2.0 * control->vlist_cut && + if ( system->box.box_norms[0] > 2.0 * control->vlist_cut && system->box.box_norms[1] > 2.0 * control->vlist_cut && system->box.box_norms[2] > 2.0 * control->vlist_cut ) + { (*Get_Far_Neighbors) = Get_Periodic_Far_Neighbors_Big_Box; - else (*Get_Far_Neighbors) = Get_Periodic_Far_Neighbors_Small_Box; + } + else + { + (*Get_Far_Neighbors) = Get_Periodic_Far_Neighbors_Small_Box; + } } else + { (*Get_Far_Neighbors) = Get_NonPeriodic_Far_Neighbors; + } } @@ -327,18 +314,28 @@ inline int can_Bond( static_storage *workspace, int atom1, int atom2 ) // fprintf( stderr, "can bond %6d %6d?\n", atom1, atom2 ); - if( !workspace->restricted[ atom1 ] && !workspace->restricted[ atom2 ] ) - return 1; + if ( !workspace->restricted[ atom1 ] && !workspace->restricted[ atom2 ] ) + { + return FALSE; + } - for( i = 0; i < workspace->restricted[ atom1 ]; ++i ) - if( workspace->restricted_list[ atom1 ][i] == atom2 ) - return 1; + for ( i = 0; i < workspace->restricted[ atom1 ]; ++i ) + { + if ( workspace->restricted_list[ atom1 ][i] == atom2 ) + { + return FALSE; + } + } - for( i = 0; i < workspace->restricted[ atom2 ]; ++i ) - if( workspace->restricted_list[ atom2 ][i] == atom1 ) - return 1; + for ( i = 0; i < workspace->restricted[ atom2 ]; ++i ) + { + if ( workspace->restricted_list[ atom2 ][i] == atom1 ) + { + return FALSE; + } + } - return 0; + return TRUE; } @@ -347,17 +344,20 @@ inline int is_Near_Neighbor( list *near_nbrs, int atom1, int atom2 ) { int i; - for( i=Start_Index(atom1,near_nbrs); i<End_Index(atom1,near_nbrs); ++i ) - if( near_nbrs->select.near_nbr_list[i].nbr == atom2 ) + for ( i = Start_Index(atom1, near_nbrs); i < End_Index(atom1, near_nbrs); ++i ) + { + if ( near_nbrs->select.near_nbr_list[i].nbr == atom2 ) { // fprintf( stderr, "near neighbors %6d %6d\n", atom1, atom2 ); - return 1; + return FALSE; } + } - return 0; + return TRUE; } -void Generate_Neighbor_Lists( reax_system *system, control_params *control, + +void Generate_Neighbor_Lists( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control ) { @@ -368,21 +368,20 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, int num_far; int c, count; int grid_top; - grid *g = &( system->g ); + grid *g = &( system->g ); list *far_nbrs = (*lists) + FAR_NBRS; //int hb_type1, hb_type2; //list *hbonds = (*lists) + HBOND; //int top_hbond1, top_hbond2; get_far_neighbors_function Get_Far_Neighbors; far_neighbor_data new_nbrs[125]; -#ifndef REORDER_ATOMS - int l, m; -#endif // fprintf( stderr, "\n\tentered nbrs - " ); - if( control->ensemble == iNPT || control->ensemble == sNPT || + if ( control->ensemble == iNPT || control->ensemble == sNPT || control->ensemble == NPT ) + { Update_Grid( system ); + } // fprintf( stderr, "grid updated - " ); Bin_Atoms( system, out_control ); @@ -394,9 +393,9 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, #endif Choose_Neighbor_Finder( system, control, &Get_Far_Neighbors ); - // fprintf( stderr, "function chosen - " ); + // fprintf( stderr, "function chosen - " ); - Reset_Neighbor_Lists( system, workspace, lists ); + Reset_Neighbor_Lists( system, workspace, lists ); // fprintf( stderr, "lists cleared - " ); num_far = 0; @@ -404,9 +403,12 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, c = 0; /* first pick up a cell in the grid */ - for( i = 0; i < g->ncell[0]; i++ ) - for( j = 0; j < g->ncell[1]; j++ ) - for( k = 0; k < g->ncell[2]; k++ ) { + for ( i = 0; i < g->ncell[0]; i++ ) + { + for ( j = 0; j < g->ncell[1]; j++ ) + { + for ( k = 0; k < g->ncell[2]; k++ ) + { nbrs = g->nbrs[i][j][k]; nbrs_cp = g->nbrs_cp[i][j][k]; @@ -414,119 +416,137 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, //#ifdef REORDER_ATOMS // for(atom1 = g->start[i][j][k]; atom1 < g->end[i][j][k]; atom1++) //#else - for(l = 0; l < g->top[i][j][k]; ++l ){ + for (l = 0; l < g->top[i][j][k]; ++l ) + { atom1 = g->atoms[i][j][k][l]; Set_End_Index( atom1, num_far, far_nbrs ); // fprintf( stderr, "atom %d:\n", atom1 ); itr = 0; - while( nbrs[itr][0] > 0 ){ + while ( nbrs[itr][0] > 0 ) + { x = nbrs[itr][0]; y = nbrs[itr][1]; z = nbrs[itr][2]; - // if( DistSqr_to_CP(nbrs_cp[itr], system->atoms[atom1].x ) <= - // SQR(control->r_cut)) + // if( DistSqr_to_CP(nbrs_cp[itr], system->atoms[atom1].x ) <= + // SQR(control->r_cut)) nbr_atoms = g->atoms[x][y][z]; max_atoms = g->top[x][y][z]; /* pick up another atom from the neighbor cell - - we have to compare atom1 with its own periodic images as well, + we have to compare atom1 with its own periodic images as well, that's why there is also equality in the if stmt below */ //#ifdef REORDER_ATOMS //for(atom2=g->start[x][y][z]; atom2<g->end[x][y][z]; atom2++) //#else - for( m = 0, atom2=nbr_atoms[m]; m < max; ++m, atom2=nbr_atoms[m] ) - if( atom1 >= atom2 ) { + for ( m = 0, atom2 = nbr_atoms[m]; m < max; ++m, atom2 = nbr_atoms[m] ) + { + if ( atom1 >= atom2 ) + { //fprintf( stderr, "\tatom2 %d", atom2 ); //top_near1 = End_Index( atom1, near_nbrs ); //Set_Start_Index( atom1, num_far, far_nbrs ); //hb_type1=system->reaxprm.sbp[system->atoms[atom1].type].p_hbond; Get_Far_Neighbors( system->atoms[atom1].x, - system->atoms[atom2].x, - &(system->box), control, new_nbrs, &count ); + system->atoms[atom2].x, + &(system->box), control, new_nbrs, &count ); fprintf( stderr, "\t%d count:%d\n", atom2, count ); - for( c = 0; c < count; ++c ) - if(atom1 != atom2 || (atom1 == atom2 && new_nbrs[c].d>=0.1)){ + for ( c = 0; c < count; ++c ) + { + if (atom1 != atom2 || (atom1 == atom2 && new_nbrs[c].d >= 0.1)) + { Set_Far_Neighbor(&(far_nbrs->select.far_nbr_list[num_far]), - atom2, new_nbrs[c].d, 1.0, - new_nbrs[c].dvec, new_nbrs[c].rel_box ); + atom2, new_nbrs[c].d, 1.0, + new_nbrs[c].dvec, new_nbrs[c].rel_box ); ++num_far; /*fprintf(stderr,"FARNBR:%6d%6d%8.3f[%8.3f%8.3f%8.3f]\n", - atom1, atom2, new_nbrs[c].d, - new_nbrs[c].dvec[0], new_nbrs[c].dvec[1], + atom1, atom2, new_nbrs[c].d, + new_nbrs[c].dvec[0], new_nbrs[c].dvec[1], new_nbrs[c].dvec[2] ); */ - /* hydrogen bond lists */ - /*if( control->hb_cut > 0.1 && + /* hydrogen bond lists */ + /*if( control->hb_cut > 0.1 && new_nbrs[c].d <= control->hb_cut ) { - // fprintf( stderr, "%d %d\n", atom1, atom2 ); - hb_type2=system->reaxprm.sbp[system->atoms[atom2].type].p_hbond; - if( hb_type1 == 1 && hb_type2 == 2 ) { - top_hbond1=End_Index(workspace->hbond_index[atom1],hbonds); - Set_Near_Neighbor(&(hbonds->select.hbond_list[top_hbond1]), - atom2, new_nbrs[c].d, 1.0, new_nbrs[c].dvec, - new_nbrs[c].rel_box ); - Set_End_Index( workspace->hbond_index[atom1], - top_hbond1 + 1, hbonds ); - } - else if( hb_type1 == 2 && hb_type2 == 1 ) { - top_hbond2 = End_Index( workspace->hbond_index[atom2], hbonds ); - Set_Near_Neighbor(&(hbonds->select.hbond_list[top_hbond2]), - atom1, new_nbrs[c].d, -1.0, new_nbrs[c].dvec, - new_nbrs[c].rel_box ); - Set_End_Index( workspace->hbond_index[atom2], - top_hbond2 + 1, hbonds ); - }*/ + // fprintf( stderr, "%d %d\n", atom1, atom2 ); + hb_type2=system->reaxprm.sbp[system->atoms[atom2].type].p_hbond; + if( hb_type1 == 1 && hb_type2 == 2 ) { + top_hbond1=End_Index(workspace->hbond_index[atom1],hbonds); + Set_Near_Neighbor(&(hbonds->select.hbond_list[top_hbond1]), + atom2, new_nbrs[c].d, 1.0, new_nbrs[c].dvec, + new_nbrs[c].rel_box ); + Set_End_Index( workspace->hbond_index[atom1], + top_hbond1 + 1, hbonds ); + } + else if( hb_type1 == 2 && hb_type2 == 1 ) { + top_hbond2 = End_Index( workspace->hbond_index[atom2], hbonds ); + Set_Near_Neighbor(&(hbonds->select.hbond_list[top_hbond2]), + atom1, new_nbrs[c].d, -1.0, new_nbrs[c].dvec, + new_nbrs[c].rel_box ); + Set_End_Index( workspace->hbond_index[atom2], + top_hbond2 + 1, hbonds ); + }*/ } } + } } + } Set_End_Index( atom1, top_far1, far_nbrs ); } } + } + } fprintf( stderr, "nbrs done-" ); + /* apply restrictions on near neighbors only */ - if( (data->step - data->prev_steps) < control->restrict_bonds ) { - for( atom1 = 0; atom1 < system->N; ++atom1 ) - if( workspace->restricted[ atom1 ] ) { + if ( (data->step - data->prev_steps) < control->restrict_bonds ) + { + for ( atom1 = 0; atom1 < system->N; ++atom1 ) + { + if ( workspace->restricted[ atom1 ] ) + { // fprintf( stderr, "atom1: %d\n", atom1 ); top_near1 = End_Index( atom1, near_nbrs ); - for( j = 0; j < workspace->restricted[ atom1 ]; ++j ) - if(!is_Near_Neighbor(near_nbrs, atom1, - atom2 = workspace->restricted_list[atom1][j])) { + for ( j = 0; j < workspace->restricted[ atom1 ]; ++j ) + { + if (is_Near_Neighbor(near_nbrs, atom1, + atom2 = workspace->restricted_list[atom1][j]) == FALSE) + { fprintf( stderr, "%3d-%3d: added bond by applying restrictions!\n", - atom1, atom2 ); + atom1, atom2 ); - top_near2 = End_Index( atom2, near_nbrs ); + top_near2 = End_Index( atom2, near_nbrs ); - /* we just would like to get the nearest image, so a call to + /* we just would like to get the nearest image, so a call to Get_Periodic_Far_Neighbors_Big_Box is good enough. */ - Get_Periodic_Far_Neighbors_Big_Box( system->atoms[ atom1 ].x, - system->atoms[ atom2 ].x, - &(system->box), control, - new_nbrs, &count ); + Get_Periodic_Far_Neighbors_Big_Box( system->atoms[ atom1 ].x, + system->atoms[ atom2 ].x, + &(system->box), control, + new_nbrs, &count ); Set_Near_Neighbor( &(near_nbrs->select.near_nbr_list[ top_near1 ]), - atom2, new_nbrs[c].d, 1.0, - new_nbrs[c].dvec, new_nbrs[c].rel_box ); + atom2, new_nbrs[c].d, 1.0, + new_nbrs[c].dvec, new_nbrs[c].rel_box ); ++top_near1; Set_Near_Neighbor( &(near_nbrs->select.near_nbr_list[ top_near2 ]), - atom1, new_nbrs[c].d, -1.0, - new_nbrs[c].dvec, new_nbrs[c].rel_box ); - Set_End_Index( atom2, top_near2+1, near_nbrs ); + atom1, new_nbrs[c].d, -1.0, + new_nbrs[c].dvec, new_nbrs[c].rel_box ); + Set_End_Index( atom2, top_near2 + 1, near_nbrs ); } + } Set_End_Index( atom1, top_near1, near_nbrs ); } + } } // fprintf( stderr, "restrictions applied-" ); @@ -534,56 +554,61 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, /* verify nbrlists, count num_intrs, sort nearnbrs */ near_nbrs->num_intrs = 0; far_nbrs->num_intrs = 0; - for( i = 0; i < system->N-1; ++i ) { - if( End_Index(i, near_nbrs) > Start_Index(i+1, near_nbrs) ) { - fprintf( stderr, - "step%3d: nearnbr list of atom%d is overwritten by atom%d\n", - data->step, i+1, i ); - exit( 1 ); + for ( i = 0; i < system->N - 1; ++i ) + { + if ( End_Index(i, near_nbrs) > Start_Index(i + 1, near_nbrs) ) + { + fprintf( stderr, + "step%3d: nearnbr list of atom%d is overwritten by atom%d\n", + data->step, i + 1, i ); + exit( RUNTIME_ERROR ); } near_nbrs->num_intrs += Num_Entries(i, near_nbrs); - if( End_Index(i, far_nbrs) > Start_Index(i+1, far_nbrs) ) { - fprintf( stderr, - "step%3d: farnbr list of atom%d is overwritten by atom%d\n", - data->step, i+1, i ); - exit( 1 ); + if ( End_Index(i, far_nbrs) > Start_Index(i + 1, far_nbrs) ) + { + fprintf( stderr, + "step%3d: farnbr list of atom%d is overwritten by atom%d\n", + data->step, i + 1, i ); + exit( RUNTIME_ERROR ); } far_nbrs->num_intrs += Num_Entries(i, far_nbrs); } - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { qsort( &(near_nbrs->select.near_nbr_list[ Start_Index(i, near_nbrs) ]), - Num_Entries(i, near_nbrs), sizeof(near_neighbor_data), - compare_near_nbrs ); + Num_Entries(i, near_nbrs), sizeof(near_neighbor_data), + compare_near_nbrs ); } // fprintf( stderr, "near nbrs sorted\n" ); + #ifdef TEST_ENERGY /* for( i = 0; i < system->N; ++i ) { - qsort( &(far_nbrs->select.far_nbr_list[ Start_Index(i, far_nbrs) ]), - Num_Entries(i, far_nbrs), sizeof(far_neighbor_data), - compare_far_nbrs ); + qsort( &(far_nbrs->select.far_nbr_list[ Start_Index(i, far_nbrs) ]), + Num_Entries(i, far_nbrs), sizeof(far_neighbor_data), + compare_far_nbrs ); } */ - fprintf( stderr, "Near neighbors/atom: %d (compare to 150)\n", - num_near / system->N ); - fprintf( stderr, "Far neighbors per atom: %d (compare to %d)\n", - num_far / system->N, control->max_far_nbrs ); + fprintf( stderr, "Near neighbors/atom: %d (compare to 150)\n", + num_near / system->N ); + fprintf( stderr, "Far neighbors per atom: %d (compare to %d)\n", + num_far / system->N, control->max_far_nbrs ); #endif //fprintf( stderr, "step%d: num of nearnbrs = %6d num of farnbrs: %6d\n", // data->step, num_near, num_far ); - //fprintf( stderr, "\talloc nearnbrs = %6d alloc farnbrs: %6d\n", - // system->N * near_nbrs->intrs_per_unit, + //fprintf( stderr, "\talloc nearnbrs = %6d alloc farnbrs: %6d\n", + // system->N * near_nbrs->intrs_per_unit, // system->N * far_nbrs->intrs_per_unit ); } -void Generate_Neighbor_Lists( reax_system *system, control_params *control, +void Generate_Neighbor_Lists( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control ) { @@ -603,73 +628,84 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, far_nbrs = (*lists) + FAR_NBRS; // fprintf( stderr, "\n\tentered nbrs - " ); - if( control->ensemble == iNPT || - control->ensemble == sNPT || + if ( control->ensemble == iNPT || + control->ensemble == sNPT || control->ensemble == NPT ) + { Update_Grid( system ); + } // fprintf( stderr, "grid updated - " ); Bin_Atoms( system, out_control ); // fprintf( stderr, "atoms sorted - " ); Choose_Neighbor_Finder( system, control, &Get_Far_Neighbors ); - // fprintf( stderr, "function chosen - " ); - Reset_Neighbor_Lists( system, workspace, lists ); + // fprintf( stderr, "function chosen - " ); + Reset_Neighbor_Lists( system, workspace, lists ); // fprintf( stderr, "lists cleared - " ); num_far = 0; c = 0; /* first pick up a cell in the grid */ - for( i = 0; i < g->ncell[0]; i++ ) - for( j = 0; j < g->ncell[1]; j++ ) - for( k = 0; k < g->ncell[2]; k++ ) { + for ( i = 0; i < g->ncell[0]; i++ ) + { + for ( j = 0; j < g->ncell[1]; j++ ) + { + for ( k = 0; k < g->ncell[2]; k++ ) + { nbrs = g->nbrs[i][j][k]; nbrs_cp = g->nbrs_cp[i][j][k]; fprintf( stderr, "gridcell %d %d %d\n", i, j, k ); /* pick up an atom from the current cell */ - for(l = 0; l < g->top[i][j][k]; ++l ){ + for (l = 0; l < g->top[i][j][k]; ++l ) + { atom1 = g->atoms[i][j][k][l]; Set_Start_Index( atom1, num_far, far_nbrs ); fprintf( stderr, "\tatom %d\n", atom1 ); itr = 0; - while( nbrs[itr][0] > 0 ){ + while ( nbrs[itr][0] > 0 ) + { x = nbrs[itr][0]; y = nbrs[itr][1]; z = nbrs[itr][2]; fprintf( stderr, "\t\tgridcell %d %d %d\n", x, y, z ); - // if( DistSqr_to_CP(nbrs_cp[itr], system->atoms[atom1].x ) <= - // SQR(control->r_cut)) + // if( DistSqr_to_CP(nbrs_cp[itr], system->atoms[atom1].x ) <= + // SQR(control->r_cut)) nbr_atoms = g->atoms[x][y][z]; max = g->top[x][y][z]; fprintf( stderr, "\t\tmax: %d\n", max ); /* pick up another atom from the neighbor cell - - we have to compare atom1 with its own periodic images as well, + we have to compare atom1 with its own periodic images as well, that's why there is also equality in the if stmt below */ - for( m = 0, atom2=nbr_atoms[m]; m < max; ++m, atom2=nbr_atoms[m] ) - if( atom1 >= atom2 ) { + for ( m = 0, atom2 = nbr_atoms[m]; m < max; ++m, atom2 = nbr_atoms[m] ) + { + if ( atom1 >= atom2 ) + { Get_Far_Neighbors( system->atoms[atom1].x, - system->atoms[atom2].x, - &(system->box), control, new_nbrs, &count ); + system->atoms[atom2].x, + &(system->box), control, new_nbrs, &count ); fprintf( stderr, "\t\t\t%d count:%d\n", atom2, count ); - for( c = 0; c < count; ++c ) - if(atom1 != atom2 || (atom1 == atom2 && new_nbrs[c].d>=0.1)){ + for ( c = 0; c < count; ++c ) + if (atom1 != atom2 || (atom1 == atom2 && new_nbrs[c].d >= 0.1)) + { Set_Far_Neighbor(&(far_nbrs->select.far_nbr_list[num_far]), - atom2, new_nbrs[c].d, 1.0, - new_nbrs[c].dvec, new_nbrs[c].rel_box ); + atom2, new_nbrs[c].d, 1.0, + new_nbrs[c].dvec, new_nbrs[c].rel_box ); ++num_far; /*fprintf(stderr,"FARNBR:%6d%6d%8.3f[%8.3f%8.3f%8.3f]\n", - atom1, atom2, new_nbrs[c].d, - new_nbrs[c].dvec[0], new_nbrs[c].dvec[1], + atom1, atom2, new_nbrs[c].d, + new_nbrs[c].dvec[0], new_nbrs[c].dvec[1], new_nbrs[c].dvec[2] ); */ } } + } ++itr; } @@ -677,22 +713,26 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, Set_End_Index( atom1, num_far, far_nbrs ); } } + } + } - far_nbrs->num_intrs = num_far; + far_nbrs->num_intrs = num_far; fprintf( stderr, "nbrs done, num_far: %d\n", num_far ); #if defined(DEBUG) - for( i = 0; i < system->N; ++i ) { - qsort( &(far_nbrs->select.far_nbr_list[ Start_Index(i, far_nbrs) ]), - Num_Entries(i, far_nbrs), sizeof(far_neighbor_data), - compare_far_nbrs ); + for ( i = 0; i < system->N; ++i ) + { + qsort( &(far_nbrs->select.far_nbr_list[ Start_Index(i, far_nbrs) ]), + Num_Entries(i, far_nbrs), sizeof(far_neighbor_data), + compare_far_nbrs ); } fprintf( stderr, "step%d: num of farnbrs=%6d\n", data->step, num_far ); - fprintf( stderr, "\tallocated farnbrs: %6d\n", - system->N * far_nbrs->intrs_per_unit ); + fprintf( stderr, "\tallocated farnbrs: %6d\n", + system->N * far_nbrs->intrs_per_unit ); #endif } + #endif diff --git a/PuReMD-GPU/src/neighbors.h b/PuReMD-GPU/src/neighbors.h index 64c14ad29d5194006aacb057a7d80ef54aeee8e4..8eb5cfc2696f4d354edcf3751dedfd315c6762a3 100644 --- a/PuReMD-GPU/src/neighbors.h +++ b/PuReMD-GPU/src/neighbors.h @@ -30,10 +30,8 @@ void Generate_Neighbor_Lists( reax_system*, control_params*, simulation_data*, int Estimate_NumNeighbors( reax_system*, control_params*, static_storage*, list** ); -int Are_Far_Neighbors( rvec, rvec, simulation_box*, real, far_neighbor_data* ); - -static inline HOST_DEVICE int index_grid_debug (int x, int y, int z, int blocksize) +static inline HOST_DEVICE int index_grid_debug( int x, int y, int z, int blocksize ) { return x * 8 * 8 * blocksize + y * 8 * blocksize + diff --git a/PuReMD-GPU/src/pdb_tools.c b/PuReMD-GPU/src/pdb_tools.c deleted file mode 100644 index a7102da2cf8d3023956539960f93f4e61c116a81..0000000000000000000000000000000000000000 --- a/PuReMD-GPU/src/pdb_tools.c +++ /dev/null @@ -1,628 +0,0 @@ -/*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator - - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu - Hasan Metin Aktulga, haktulga@cs.purdue.edu - Ananth Y Grama, ayg@cs.purdue.edu - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details: - <http://www.gnu.org/licenses/>. - ----------------------------------------------------------------------*/ - -#include "pdb_tools.h" -#include "box.h" -#include "list.h" -#include "param.h" -#include "restart.h" -#include "ctype.h" - - -int is_Valid_Serial( static_storage *workspace, int serial ) -{ - if ( workspace->map_serials[ serial ] < 0 ) - { - fprintf( stderr, "CONECT line includes invalid pdb serial number %d.\n", - serial ); - fprintf( stderr, "Please correct the input file.Terminating...\n" ); - exit( INVALID_INPUT ); - } - - return 1; -} - - -int Check_Input_Range( int val, int lo, int hi, char *message ) -{ - if ( val < lo || val > hi ) - { - fprintf( stderr, "%s\nInput %d - Out of range %d-%d. Terminating...\n", - message, val, lo, hi ); - exit( INVALID_INPUT ); - } - - return 1; -} - - -void Trim_Spaces( char *element ) -{ - int i, j; - - for ( i = 0; element[i] == ' '; ++i ); // skip initial space chars - - for ( j = i; j < strlen(element) && element[j] != ' '; ++j ) - element[j - i] = toupper( element[j] ); // make uppercase, move to beginning - element[j - i] = 0; // finalize the string -} - - -char Read_PDB( char* pdb_file, reax_system* system, control_params *control, - simulation_data *data, static_storage *workspace ) -{ - - FILE *pdb; - char **tmp; - char *s, *s1; - char descriptor[9], serial[9]; - char atom_name[9], res_name[9], res_seq[9]; - char s_x[9], s_y[9], s_z[9]; - char occupancy[9], temp_factor[9]; - char seg_id[9], element[9], charge[9]; - char alt_loc, chain_id, icode; - char s_a[10], s_b[10], s_c[10], s_alpha[9], s_beta[9], s_gamma[9]; - char s_group[12], s_zValue[9]; - char *endptr = NULL; - int i, c, c1, pdb_serial, ratom = 0; - /* open pdb file */ - if ( (pdb = fopen(pdb_file, "r")) == NULL ) - { - fprintf( stderr, "Error opening the pdb file!\n" ); - exit( FILE_NOT_FOUND_ERR ); - } - - - /* allocate memory for tokenizing pdb lines */ - s = (char*) malloc( sizeof(char) * MAX_LINE ); - s1 = (char*) malloc( sizeof(char) * MAX_LINE ); - tmp = (char**) malloc( sizeof(char*) * MAX_TOKENS ); - for ( i = 0; i < MAX_TOKENS; i++ ) - tmp[i] = (char*) malloc( sizeof(char) * MAX_TOKEN_LEN ); - - - /* count number of atoms in the pdb file */ - system->N = 0; - while (!feof(pdb)) - { - s[0] = 0; - fgets( s, MAX_LINE, pdb ); - - tmp[0][0] = 0; - c = Tokenize( s, &tmp ); - - if ( strncmp( tmp[0], "ATOM", 4 ) == 0 || - strncmp( tmp[0], "HETATM", 6 ) == 0 ) - (system->N)++; - } - fclose(pdb); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "system->N: %d\n", system->N ); -#endif - - /* memory allocations for atoms, atom maps, bond restrictions */ - system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); - - workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); - for ( i = 0; i < MAX_ATOM_ID; ++i ) - workspace->map_serials[i] = -1; - - workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted_list = (int*) calloc( system->N * MAX_RESTRICT, sizeof(int) ); - - //for( i = 0; i < system->N; ++i ) - // workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); - - - /* start reading and processing pdb file */ - pdb = fopen(pdb_file, "r"); - c = 0; - c1 = 0; - - while (!feof(pdb)) - { - /* clear previous input line */ - s[0] = 0; - for ( i = 0; i < c1; ++i ) - tmp[i][0] = 0; - - /* read new line and tokenize it */ - fgets( s, MAX_LINE, pdb ); - strncpy( s1, s, MAX_LINE - 1 ); - c1 = Tokenize( s, &tmp ); - - /* process new line */ - if ( strncmp(tmp[0], "ATOM", 4) == 0 || strncmp(tmp[0], "HETATM", 6) == 0 ) - { - if ( strncmp(tmp[0], "ATOM", 4) == 0 ) - { - strncpy( &descriptor[0], s1, 6 ); - descriptor[6] = 0; - strncpy( &serial[0], s1 + 6, 5 ); - serial[5] = 0; - strncpy( &atom_name[0], s1 + 12, 4 ); - atom_name[4] = 0; - alt_loc = s1[16]; - strncpy( &res_name[0], s1 + 17, 3 ); - res_name[3] = 0; - chain_id = s1[21]; - strncpy( &res_seq[0], s1 + 22, 4 ); - res_seq[4] = 0; - icode = s1[26]; - strncpy( &s_x[0], s1 + 30, 8 ); - s_x[8] = 0; - strncpy( &s_y[0], s1 + 38, 8 ); - s_y[8] = 0; - strncpy( &s_z[0], s1 + 46, 8 ); - s_z[8] = 0; - strncpy( &occupancy[0], s1 + 54, 6 ); - occupancy[6] = 0; - strncpy( &temp_factor[0], s1 + 60, 6 ); - temp_factor[6] = 0; - strncpy( &seg_id[0], s1 + 72, 4 ); - seg_id[4] = 0; - strncpy( &element[0], s1 + 76, 2 ); - element[2] = 0; - strncpy( &charge[0], s1 + 78, 2 ); - charge[2] = 0; - } - else if (strncmp(tmp[0], "HETATM", 6) == 0) - { - strncpy( &descriptor[0], s1, 6 ); - descriptor[6] = 0; - strncpy( &serial[0], s1 + 6, 5 ); - serial[5] = 0; - strncpy( &atom_name[0], s1 + 12, 4 ); - atom_name[4] = 0; - alt_loc = s1[16]; - strncpy( &res_name[0], s1 + 17, 3 ); - res_name[3] = 0; - chain_id = s1[21]; - strncpy( &res_seq[0], s1 + 22, 4 ); - res_seq[4] = 0; - icode = s1[26]; - strncpy( &s_x[0], s1 + 30, 8 ); - s_x[8] = 0; - strncpy( &s_y[0], s1 + 38, 8 ); - s_y[8] = 0; - strncpy( &s_z[0], s1 + 46, 8 ); - s_z[8] = 0; - strncpy( &occupancy[0], s1 + 54, 6 ); - occupancy[6] = 0; - strncpy( &temp_factor[0], s1 + 60, 6 ); - temp_factor[6] = 0; - //strncpy( &seg_id[0], s1+72, 4 ); seg_id[4] = 0; - strncpy( &element[0], s1 + 76, 2 ); - element[2] = 0; - strncpy( &charge[0], s1 + 78, 2 ); - charge[2] = 0; - } - - - /* add to mapping */ - pdb_serial = strtod( &serial[0], &endptr ); - Check_Input_Range( pdb_serial, 0, MAX_ATOM_ID, "Invalid pdb_serial" ); - workspace->map_serials[ pdb_serial ] = c; - workspace->orig_id[ c ] = pdb_serial; - // fprintf( stderr, "map %d --> %d\n", pdb_serial, c ); - - - /* copy atomic positions */ - system->atoms[c].x[0] = strtod( &s_x[0], &endptr ); - system->atoms[c].x[1] = strtod( &s_y[0], &endptr ); - system->atoms[c].x[2] = strtod( &s_z[0], &endptr ); - - /* atom name and type */ - strcpy( system->atoms[c].name, atom_name ); - Trim_Spaces( element ); - system->atoms[c].type = Get_Atom_Type( &(system->reaxprm), element ); - - /* fprintf( stderr, - "%d%8.3f%8.3f%8.3fq:%8.3f occ:%s temp:%s seg_id:%s element:%s\n", - system->atoms[c].type, - system->atoms[c].x[0], system->atoms[c].x[1], system->atoms[c].x[2], - system->atoms[c].q, occupancy, temp_factor, seg_id, element ); */ - c++; - } - else if (!strncmp( tmp[0], "CRYST1", 6 )) - { - sscanf( s1, PDB_CRYST1_FORMAT, - &descriptor[0], - &s_a[0], - &s_b[0], - &s_c[0], - &s_alpha[0], - &s_beta[0], - &s_gamma[0], - &s_group[0], - &s_zValue[0] ); - - /* Compute full volume tensor from the angles */ - Init_Box_From_CRYST( atof(s_a), atof(s_b), atof(s_c), - atof(s_alpha), atof(s_beta), atof(s_gamma), - &(system->box) ); - } - - /* IMPORTANT: We do not check for the soundness of restrictions here. - When atom2 is on atom1's restricted list, and there is a restriction on - atom2, then atom1 has to be on atom2's restricted list, too. However, - we do not check if this is the case in the input file, - this is upto the user. */ - else if (!strncmp( tmp[0], "CONECT", 6 )) - { - /* error check */ - //fprintf(stderr, "CONECT: %d\n", c1 ); - Check_Input_Range( c1 - 2, 0, MAX_RESTRICT, - "CONECT line exceeds max restrictions allowed.\n" ); - - /* read bond restrictions */ - if ( is_Valid_Serial( workspace, pdb_serial = atoi(tmp[1]) ) ) - ratom = workspace->map_serials[ pdb_serial ]; - - workspace->restricted[ ratom ] = c1 - 2; - for ( i = 2; i < c1; ++i ) - { - if ( is_Valid_Serial( workspace, pdb_serial = atoi(tmp[i]) ) ) - workspace->restricted_list[ (ratom * MAX_RESTRICT) + (i - 2) ] = - workspace->map_serials[ pdb_serial ]; - } - - /* fprintf( stderr, "restriction on %d:", ratom ); - for( i = 0; i < workspace->restricted[ ratom ]; ++i ) - fprintf( stderr, " %d", workspace->restricted_list[ratom][i] ); - fprintf( stderr, "\n" ); */ - } - } - - fclose(pdb); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "pdb file read\n" ); -#endif - - return 1; -} - - -char Write_PDB( reax_system* system, control_params *control, - simulation_data *data, static_storage *workspace, - list* bonds, output_controls *out_control ) -{ - int i, j, k, count; - int connect[4]; - char temp[MAX_STR], name[10]; - real bo; - real alpha, beta, gamma; - - - /* open output pdb file */ - sprintf( temp, "%s%d.pdb", control->sim_name, data->step ); - out_control->pdb = fopen( temp, "w" ); - - - /* Writing Box information */ - /* Write full volume tensor from the angles (as soon as possible) TODO_SOON */ - gamma = acos( (system->box.box[0][0] * system->box.box[1][0] + - system->box.box[0][1] * system->box.box[1][1] + - system->box.box[0][2] * system->box.box[1][2]) / - (system->box.box_norms[0] * system->box.box_norms[1])); - beta = acos( (system->box.box[0][0] * system->box.box[2][0] + - system->box.box[0][1] * system->box.box[2][1] + - system->box.box[0][2] * system->box.box[2][2]) / - (system->box.box_norms[0] * system->box.box_norms[2])); - alpha = acos( (system->box.box[2][0] * system->box.box[1][0] + - system->box.box[2][1] * system->box.box[1][1] + - system->box.box[2][2] * system->box.box[1][2]) / - (system->box.box_norms[2] * system->box.box_norms[1])); - - fprintf(out_control->pdb, PDB_CRYST1_FORMAT_O, - "CRYST1", - system->box.box_norms[0], - system->box.box_norms[1], - system->box.box_norms[2], - RAD2DEG(alpha), - RAD2DEG(beta), - RAD2DEG(gamma), - " ", - 0); - fprintf( out_control->log, "Box written\n" ); - fflush( out_control->log ); - - /* Writing atom information */ - for (i = 0; i < system->N; i++) - { - strncpy( name, system->reaxprm.sbp[system->atoms[i].type].name, 2 ); - name[2] = '\0'; - fprintf( out_control->pdb, PDB_ATOM_FORMAT_O, - "ATOM ", - workspace->orig_id[i], - name, - ' ', - "REX", - ' ', - 1, - ' ', - system->atoms[i].x[0], - system->atoms[i].x[1], - system->atoms[i].x[2], - 1.0, - 0.0, - "0", - name, - " " ); - } - - fprintf( out_control->log, "ATOM written\n" ); - fflush( out_control->log ); - - /* Writing connect information */ - for (i = 0; i < system->N; i++) - { - count = 0; - - for (j = Start_Index(i, bonds); j < End_Index(i, bonds); ++j) - { - bo = bonds->select.bond_list[j].bo_data.BO; - if (bo > 0.3) - { - connect[count] = workspace->orig_id[bonds->select.bond_list[j].nbr]; - count++; - } - } - - fprintf( out_control->pdb, "%6s%6d", "CONECT", workspace->orig_id[i] ); - for ( k = 0; k < count; k++ ) - fprintf( out_control->pdb, "%6d", connect[k] ); - fprintf( out_control->pdb, "\n" ); - } - - fprintf( out_control->pdb, "END\n" ); - - fclose( out_control->pdb ); - - return 1; -} - - -char Read_BGF( char* bgf_file, reax_system* system, control_params *control, - simulation_data *data, static_storage *workspace ) -{ - FILE *bgf; - char **tokens; - char *line, *backup; - char descriptor[10], serial[10]; - char atom_name[10], res_name[10], res_seq[10]; - char s_x[12], s_y[12], s_z[12]; - char occupancy[10], temp_factor[10]; - char element[10], charge[10]; - char chain_id; - char s_a[12], s_b[12], s_c[12], s_alpha[12], s_beta[12], s_gamma[12]; - char *endptr = NULL; - int i, atom_cnt, token_cnt, bgf_serial, ratom = 0; - - /* open biograf file */ - if ( (bgf = fopen( bgf_file, "r" )) == NULL ) - { - fprintf( stderr, "Error opening the bgf file!\n" ); - exit( FILE_NOT_FOUND_ERR ); - } - - - /* allocate memory for tokenizing biograf file lines */ - line = (char*) malloc( sizeof(char) * MAX_LINE ); - backup = (char*) malloc( sizeof(char) * MAX_LINE ); - tokens = (char**) malloc( sizeof(char*) * MAX_TOKENS ); - for ( i = 0; i < MAX_TOKENS; i++ ) - tokens[i] = (char*) malloc( sizeof(char) * MAX_TOKEN_LEN ); - - - /* count number of atoms in the pdb file */ - system->N = 0; - while ( !feof( bgf ) ) - { - line[0] = 0; - fgets( line, MAX_LINE, bgf ); - - tokens[0][0] = 0; - token_cnt = Tokenize( line, &tokens ); - - if ( !strcmp( tokens[0], "ATOM" ) || !strcmp( tokens[0], "HETATM" ) ) - (system->N)++; - } - //fprintf( stderr, "system->N: %d\n", system->N ); - fclose( bgf ); - - - /* memory allocations for atoms, atom maps, bond restrictions */ - system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); - - workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); - for ( i = 0; i < MAX_ATOM_ID; ++i ) - workspace->map_serials[i] = -1; - - workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted_list = (int*) calloc( system->N * MAX_RESTRICT, sizeof(int) ); - //for( i = 0; i < system->N; ++i ) - // workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); - - - /* start reading and processing pdb file */ - bgf = fopen( bgf_file, "r" ); - atom_cnt = 0; - token_cnt = 0; - - while ( !feof( bgf ) ) - { - /* clear previous input line */ - line[0] = 0; - for ( i = 0; i < token_cnt; ++i ) - tokens[i][0] = 0; - - /* read new line and tokenize it */ - fgets( line, MAX_LINE, bgf ); - strncpy( backup, line, MAX_LINE - 1 ); - token_cnt = Tokenize( line, &tokens ); - - /* process new line */ - if ( !strncmp(tokens[0], "ATOM", 4) || !strncmp(tokens[0], "HETATM", 6) ) - { - if ( !strncmp(tokens[0], "ATOM", 4) ) - { - strncpy( &descriptor[0], backup, 6 ); - descriptor[6] = 0; - strncpy( &serial[0], backup + 7, 5 ); - serial[5] = 0; - strncpy( &atom_name[0], backup + 13, 5 ); - atom_name[5] = 0; - strncpy( &res_name[0], backup + 19, 3 ); - res_name[3] = 0; - chain_id = backup[23]; - strncpy( &res_seq[0], backup + 25, 5 ); - res_seq[5] = 0; - strncpy( &s_x[0], backup + 30, 10 ); - s_x[10] = 0; - strncpy( &s_y[0], backup + 40, 10 ); - s_y[10] = 0; - strncpy( &s_z[0], backup + 50, 10 ); - s_z[10] = 0; - strncpy( &element[0], backup + 61, 5 ); - element[5] = 0; - strncpy( &occupancy[0], backup + 66, 3 ); - occupancy[3] = 0; - strncpy( &temp_factor[0], backup + 69, 2 ); - temp_factor[2] = 0; - strncpy( &charge[0], backup + 72, 8 ); - charge[8] = 0; - } - else if ( !strncmp(tokens[0], "HETATM", 6) ) - { - /* bgf hetatm: - (7x,i5,1x,a5,1x,a3,1x,a1,1x,a5,3f10.5,1x,a5,i3,i2,1x,f8.5) */ - strncpy( &descriptor[0], backup, 6 ); - descriptor[6] = 0; - strncpy( &serial[0], backup + 7, 5 ); - serial[5] = 0; - strncpy( &atom_name[0], backup + 13, 5 ); - atom_name[5] = 0; - strncpy( &res_name[0], backup + 19, 3 ); - res_name[3] = 0; - chain_id = backup[23]; - strncpy( &res_seq[0], backup + 25, 5 ); - res_seq[5] = 0; - strncpy( &s_x[0], backup + 30, 10 ); - s_x[10] = 0; - strncpy( &s_y[0], backup + 40, 10 ); - s_y[10] = 0; - strncpy( &s_z[0], backup + 50, 10 ); - s_z[10] = 0; - strncpy( &element[0], backup + 61, 5 ); - element[5] = 0; - strncpy( &occupancy[0], backup + 66, 3 ); - occupancy[3] = 0; - strncpy( &temp_factor[0], backup + 69, 2 ); - temp_factor[2] = 0; - strncpy( &charge[0], backup + 72, 8 ); - charge[8] = 0; - } - - - /* add to mapping */ - bgf_serial = strtod( &serial[0], &endptr ); - Check_Input_Range( bgf_serial, 0, MAX_ATOM_ID, "Invalid bgf serial" ); - workspace->map_serials[ bgf_serial ] = atom_cnt; - workspace->orig_id[ atom_cnt ] = bgf_serial; - // fprintf( stderr, "map %d --> %d\n", bgf_serial, atom_cnt ); - - - /* copy atomic positions */ - system->atoms[atom_cnt].x[0] = strtod( &s_x[0], &endptr ); - system->atoms[atom_cnt].x[1] = strtod( &s_y[0], &endptr ); - system->atoms[atom_cnt].x[2] = strtod( &s_z[0], &endptr ); - - - /* atom name and type */ - //BGF_FIX - atom_name[4] = 0; - //BGF_FIX - - strcpy( system->atoms[atom_cnt].name, atom_name ); - Trim_Spaces( element ); - system->atoms[atom_cnt].type = - Get_Atom_Type( &(system->reaxprm), element ); - - /* fprintf( stderr, - "a:%3d(%1d) c:%10.5f%10.5f%10.5f q:%10.5f occ:%s temp:%s seg_id:%s element:%s\n", - atom_cnt, system->atoms[ atom_cnt ].type, - system->atoms[ atom_cnt ].x[0], - system->atoms[ atom_cnt ].x[1], system->atoms[ atom_cnt ].x[2], - system->atoms[ atom_cnt ].q, occupancy, temp_factor, - seg_id, element ); */ - - atom_cnt++; - } - else if (!strncmp( tokens[0], "CRYSTX", 6 )) - { - sscanf( backup, BGF_CRYSTX_FORMAT, - &descriptor[0], - &s_a[0], - &s_b[0], - &s_c[0], - &s_alpha[0], - &s_beta[0], - &s_gamma[0] ); - - /* Compute full volume tensor from the angles */ - Init_Box_From_CRYST( atof(s_a), atof(s_b), atof(s_c), - atof(s_alpha), atof(s_beta), atof(s_gamma), - &(system->box) ); - } - else if (!strncmp( tokens[0], "CONECT", 6 )) - { - /* check number of restrictions */ - Check_Input_Range( token_cnt - 2, 0, MAX_RESTRICT, - "CONECT line exceeds max restrictions allowed.\n" ); - - /* read bond restrictions */ - if ( is_Valid_Serial( workspace, bgf_serial = atoi(tokens[1]) ) ) - ratom = workspace->map_serials[ bgf_serial ]; - - workspace->restricted[ ratom ] = token_cnt - 2; - for ( i = 2; i < token_cnt; ++i ) - if ( is_Valid_Serial( workspace, bgf_serial = atoi(tokens[i]) ) ) - workspace->restricted_list[ (ratom * MAX_RESTRICT) + (i - 2) ] = - workspace->map_serials[ bgf_serial ]; - - /* fprintf( stderr, "restriction on %d:", ratom ); - for( i = 0; i < workspace->restricted[ ratom ]; ++i ) - fprintf( stderr, " %d", workspace->restricted_list[ratom][i] ); - fprintf( stderr, "\n" ); */ - } - } - - fclose( bgf ); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "bgf file read\n" ); -#endif - - return 1; -} diff --git a/PuReMD-GPU/src/print_utils.c b/PuReMD-GPU/src/print_utils.c index 913ff617a23f9f395a300a0985dec7ad36c33fab..d0f0e1bad12720a793074fbbc1e4d194ebb8fd8e 100644 --- a/PuReMD-GPU/src/print_utils.c +++ b/PuReMD-GPU/src/print_utils.c @@ -19,9 +19,11 @@ ----------------------------------------------------------------------*/ #include "print_utils.h" + +#include "geo_tools.h" #include "list.h" -#include "pdb_tools.h" #include "system_props.h" +#include "tool_box.h" #include "vector.h" @@ -374,18 +376,6 @@ void Init_Force_Test_Functions( ) #endif -char *Get_Element( reax_system *system, int i ) -{ - return &( system->reaxprm.sbp[system->atoms[i].type].name[0] ); -} - - -char *Get_Atom_Name( reax_system *system, int i ) -{ - return &(system->atoms[i].name[0]); -} - - /* near nbrs contain both i-j, j-i nbrhood info */ void Print_Near_Neighbors( reax_system *system, control_params *control, static_storage *workspace, list **lists ) @@ -625,35 +615,48 @@ void Output_Results( reax_system *system, control_params *control, data->E_vdW, data->E_Ele, data->E_Pol ); #endif -#ifdef __PRINT_CPU_RESULTS__ +#ifndef HAVE_CUDA t_elapsed = Get_Timing_Info( data->timing.total ); if ( data->step == data->prev_steps ) f_update = 1; else f_update = out_control->energy_update_freq; - fprintf( out_control->log, "%6d%10.2f%10.2f%10.2f%10.2f%10.2f%10.2f%10.2f\n", + fprintf( out_control->log, "%6d %10.2f %10.2f %10.2f %10.2f %10.2f %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f\n", data->step, t_elapsed / f_update, data->timing.nbrs / f_update, data->timing.init_forces / f_update, data->timing.bonded / f_update, data->timing.nonb / f_update, data->timing.QEq / f_update, - (double)data->timing.matvecs / f_update ); + data->timing.QEq_sort_mat_rows / f_update, + (double)data->timing.solver_iters / f_update, + data->timing.pre_comp / f_update, + data->timing.pre_app / f_update, + data->timing.solver_spmv / f_update, + data->timing.solver_vector_ops / f_update, + data->timing.solver_orthog / f_update, + data->timing.solver_tri_solve / f_update ); #else t_elapsed = Get_Timing_Info( d_timing.total ); if ( data->step == data->prev_steps ) f_update = 1; else f_update = out_control->energy_update_freq; - fprintf( out_control->log, "%6d%10.2f%10.2f%10.2f%10.2f%10.2f%10.2f%10.2f\n", + fprintf( out_control->log, "%6d %10.2f %10.2f %10.2f %10.2f %10.2f %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f\n", data->step, t_elapsed / f_update, - d_timing.nbrs / f_update, - d_timing.init_forces / f_update, - d_timing.bonded / f_update, - d_timing.nonb / f_update, - d_timing.QEq / f_update, - (double)d_timing.matvecs / f_update ); - + d_timing->timing.nbrs / f_update, + d_timing->timing.init_forces / f_update, + d_timing->timing.bonded / f_update, + d_timing->timing.nonb / f_update, + d_timing->timing.QEq / f_update, + d_timing->timing.QEq_sort_mat_rows / f_update, + (double)d_timing->timing.solver_iters / f_update, + d_timing->timing.pre_comp / f_update, + d_timing->timing.pre_app / f_update, + d_timing->timing.solver_spmv / f_update, + d_timing->timing.solver_vector_ops / f_update, + d_timing->timing.solver_orthog / f_update, + d_timing->timing.solver_tri_solve / f_update ); #endif //fprintf (stderr, " total %10.5f \n", t_elapsed); @@ -673,16 +676,32 @@ void Output_Results( reax_system *system, control_params *control, data->timing.init_forces = 0; data->timing.bonded = 0; data->timing.nonb = 0; - data->timing.QEq = 0; - data->timing.matvecs = 0; - + data->timing.QEq = ZERO; + data->timing.QEq_sort_mat_rows = ZERO; + data->timing.pre_comp = ZERO; + data->timing.pre_app = ZERO; + data->timing.solver_iters = 0; + data->timing.solver_spmv = ZERO; + data->timing.solver_vector_ops = ZERO; + data->timing.solver_orthog = ZERO; + data->timing.solver_tri_solve = ZERO; + +#ifdef HAVE_CUDA d_timing.total = Get_Time( ); d_timing.nbrs = 0; d_timing.init_forces = 0; d_timing.bonded = 0; d_timing.nonb = 0; - d_timing.QEq = 0; - d_timing.matvecs = 0; + d_timing->timing.QEq = ZERO; + d_timing->timing.QEq_sort_mat_rows = ZERO; + d_timing->timing.pre_comp = ZERO; + d_timing->timing.pre_app = ZERO; + d_timing->timing.solver_iters = 0; + d_timing->timing.solver_spmv = ZERO; + d_timing->timing.solver_vector_ops = ZERO; + d_timing->timing.solver_orthog = ZERO; + d_timing->timing.solver_tri_solve = ZERO; +#endif fflush( out_control->out ); fflush( out_control->pot ); @@ -716,16 +735,16 @@ void Output_Results( reax_system *system, control_params *control, if ( out_control->write_steps > 0 && data->step % out_control->write_steps == 0 ) { - // t_start = Get_Time( ); + //t_start = Get_Time( ); out_control->append_traj_frame( system, control, data, workspace, lists, out_control ); - //Write_PDB( system, control, data, workspace, *lists+BONDS, out_control ); - // t_elapsed = Get_Timing_Info( t_start ); - // fprintf(stdout, "append_frame took %.6f seconds\n", t_elapsed ); + //Write_PDB( system, *lists+BONDS, data, control, workspace, out_control ); + //t_elapsed = Get_Timing_Info( t_start ); + //fprintf(stdout, "append_frame took %.6f seconds\n", t_elapsed ); } - // fprintf( stderr, "output_results... done\n" ); + //fprintf( stderr, "output_results... done\n" ); } @@ -759,23 +778,46 @@ void Print_Linear_System( reax_system *system, control_params *control, sprintf( fname, "%s.H%d.out", control->sim_name, step ); out = fopen( fname, "w" ); - H = &workspace->H; + H = workspace->H; for ( i = 0; i < system->N; ++i ) { for ( j = H->start[i]; j < H->start[i + 1] - 1; ++j ) { fprintf( out, "%6d%6d %24.15e\n", - workspace->orig_id[i], workspace->orig_id[H->entries[j].j], - H->entries[j].val ); + workspace->orig_id[i], workspace->orig_id[H->j[j]], + H->val[j] ); fprintf( out, "%6d%6d %24.15e\n", - workspace->orig_id[H->entries[j].j], workspace->orig_id[i], - H->entries[j].val ); + workspace->orig_id[H->j[j]], workspace->orig_id[i], + H->val[j] ); } // the diagonal entry fprintf( out, "%6d%6d %24.15e\n", - workspace->orig_id[i], workspace->orig_id[i], H->entries[j].val ); + workspace->orig_id[i], workspace->orig_id[i], H->val[j] ); + } + + fclose( out ); + + sprintf( fname, "%s.H_sp%d.out", control->sim_name, step ); + out = fopen( fname, "w" ); + H = workspace->H_sp; + + for ( i = 0; i < system->N; ++i ) + { + for ( j = H->start[i]; j < H->start[i + 1] - 1; ++j ) + { + fprintf( out, "%6d%6d %24.15e\n", + workspace->orig_id[i], workspace->orig_id[H->j[j]], + H->val[j] ); + + fprintf( out, "%6d%6d %24.15e\n", + workspace->orig_id[H->j[j]], workspace->orig_id[i], + H->val[j] ); + } + // the diagonal entry + fprintf( out, "%6d%6d %24.15e\n", + workspace->orig_id[i], workspace->orig_id[i], H->val[j] ); } fclose( out ); @@ -834,11 +876,11 @@ void Print_Sparse_Matrix( sparse_matrix *A ) { int i, j; - for ( i = 0; i < 10; ++i ) + for ( i = 0; i < A->n; ++i ) { fprintf( stderr, "i:%d j(val):", i ); for ( j = A->start[i]; j < A->start[i + 1]; ++j ) - fprintf( stderr, "%d(%.4f) ", A->entries[j].j, A->entries[j].val ); + fprintf( stderr, "%d(%.4f) ", A->j[j], A->val[j] ); fprintf( stderr, "\n" ); } } @@ -850,8 +892,14 @@ void Print_Sparse_Matrix2( sparse_matrix *A, char *fname ) FILE *f = fopen( fname, "w" ); for ( i = 0; i < A->n; ++i ) + { for ( j = A->start[i]; j < A->start[i + 1]; ++j ) - fprintf( f, "%d%d %.15e\n", A->entries[j].j, i, A->entries[j].val ); + { + //fprintf( f, "%d%d %.15e\n", A->entries[j].j, i, A->entries[j].val ); + //Convert 0-based to 1-based (for Matlab) + fprintf( f, "%6d%6d %24.15e\n", i+1, A->j[j]+1, A->val[j] ); + } + } fclose(f); } diff --git a/PuReMD-GPU/src/print_utils.h b/PuReMD-GPU/src/print_utils.h index 5f479bdc99fa30c518f69b5a23fa88b19af1a306..46d08516e00b002792d507b5effd7bd1ee5d551d 100644 --- a/PuReMD-GPU/src/print_utils.h +++ b/PuReMD-GPU/src/print_utils.h @@ -23,28 +23,25 @@ #include "mytypes.h" + typedef void (*print_interaction)(reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls*); -extern print_interaction Print_Interactions[NO_OF_INTERACTIONS]; + static_storage*, list**, output_controls*); -char *Get_Element( reax_system*, int ); +extern print_interaction Print_Interactions[NO_OF_INTERACTIONS]; -char *Get_Atom_Name( reax_system*, int ); -void Print_Near_Neighbors( reax_system*, control_params*, static_storage*, - list** ); +void Print_Near_Neighbors( reax_system*, control_params*, static_storage*, list** ); -void Print_Far_Neighbors( reax_system*, control_params*, static_storage*, - list** ); +void Print_Far_Neighbors( reax_system*, control_params*, static_storage*, list** ); void Print_Total_Force( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); void Output_Results( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); void Print_Bond_Orders( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); void Print_Linear_System( reax_system*, control_params*, static_storage*, int ); @@ -61,23 +58,23 @@ void Print_Bond_List2( reax_system*, list*, char* ); #ifdef TEST_FORCES void Dummy_Printer( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); void Print_Bond_Forces( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); void Print_LonePair_Forces( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); -void Print_OverUnderCoor_Forces(reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls*); + static_storage*, list**, output_controls* ); +void Print_OverUnderCoor_Forces(reax_system*, control_params*, + simulation_data*, static_storage*, list**, output_controls*); void Print_Three_Body_Forces( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); -void Print_Hydrogen_Bond_Forces(reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls*); + static_storage*, list**, output_controls* ); +void Print_Hydrogen_Bond_Forces(reax_system*, control_params*, + simulation_data*, static_storage*, list**, output_controls*); void Print_Four_Body_Forces( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); void Print_vdW_Coulomb_Forces( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); void Compare_Total_Forces( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); void Init_Force_Test_Functions( ); #endif diff --git a/PuReMD-GPU/src/qeq.c b/PuReMD-GPU/src/qeq.c new file mode 100644 index 0000000000000000000000000000000000000000..a319e89b872d34e3e3da7ca9121a8a8b63a4d83c --- /dev/null +++ b/PuReMD-GPU/src/qeq.c @@ -0,0 +1,1667 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "qeq.h" + +#include "allocate.h" +#include "index_utils.h" +#include "list.h" +#include "lin_alg.h" +#include "print_utils.h" +#include "tool_box.h" +#if defined(HAVE_SUPERLU_MT) +#include "slu_mt_ddefs.h" +#endif + + +#if defined(TEST_MAT) +static sparse_matrix * create_test_mat( void ) +{ + unsigned int i, n; + sparse_matrix *H_test; + + if ( Allocate_Matrix( &H_test, 3, 6 ) == FAILURE ) + { + fprintf( stderr, "not enough memory for test matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + //3x3, SPD, store lower half + i = 0; + n = 0; + H_test->start[n] = i; + H_test->j[i] = 0; + H_test->val[i] = 4.; + ++i; + ++n; + H_test->start[n] = i; + H_test->j[i] = 0; + H_test->val[i] = 12.; + ++i; + H_test->j[i] = 1; + H_test->val[i] = 37.; + ++i; + ++n; + H_test->start[n] = i; + H_test->j[i] = 0; + H_test->val[i] = -16.; + ++i; + H_test->j[i] = 1; + H_test->val[i] = -43.; + ++i; + H_test->j[i] = 2; + H_test->val[i] = 98.; + ++i; + ++n; + H_test->start[n] = i; + + return H_test; +} +#endif + + +/* Routine used with qsort for sorting nonzeros within a sparse matrix row + * + * v1/v2: pointers to column indices of nonzeros within a row (unsigned int) + */ +static int compare_matrix_entry(const void *v1, const void *v2) +{ + /* larger element has larger column index */ + return *(unsigned int *)v1 - *(unsigned int *)v2; +} + + +/* Routine used for sorting nonzeros within a sparse matrix row; + * internally, a combination of qsort and manual sorting is utilized + * (parallel calls to qsort when multithreading, rows mapped to threads) + * + * A: sparse matrix for which to sort nonzeros within a row, stored in CSR format + */ +static void Sort_Matrix_Rows( sparse_matrix * const A ) +{ + unsigned int i, j, k, si, ei, *temp_j; + real *temp_val; + + #pragma omp parallel default(none) private(i, j, k, si, ei, temp_j, temp_val) shared(stderr) + { + if ( ( temp_j = (unsigned int*) malloc( A->n * sizeof(unsigned int)) ) == NULL + || ( temp_val = (real*) malloc( A->n * sizeof(real)) ) == NULL ) + { + fprintf( stderr, "Not enough space for matrix row sort. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* sort each row of A using column indices */ + #pragma omp for schedule(guided) + for ( i = 0; i < A->n; ++i ) + { + si = A->start[i]; + ei = A->start[i + 1]; + memcpy( temp_j, A->j + si, sizeof(unsigned int) * (ei - si) ); + memcpy( temp_val, A->val + si, sizeof(real) * (ei - si) ); + + //TODO: consider implementing single custom one-pass sort instead of using qsort + manual sort + /* polymorphic sort in standard C library using column indices */ + qsort( temp_j, ei - si, sizeof(unsigned int), compare_matrix_entry ); + + /* manually sort vals */ + for ( j = 0; j < (ei - si); ++j ) + { + for ( k = 0; k < (ei - si); ++k ) + { + if ( A->j[si + j] == temp_j[k] ) + { + A->val[si + k] = temp_val[j]; + break; + } + + } + } + + /* copy sorted column indices */ + memcpy( A->j + si, temp_j, sizeof(unsigned int) * (ei - si) ); + } + + free( temp_val ); + free( temp_j ); + } +} + + +static void Calculate_Droptol( const sparse_matrix * const A, real * const droptol, + const real dtol ) +{ + int i, j, k; + real val; +#ifdef _OPENMP + static real *droptol_local; + unsigned int tid; +#endif + + #pragma omp parallel default(none) private(i, j, k, val, tid), shared(droptol_local, stderr) + { +#ifdef _OPENMP + tid = omp_get_thread_num(); + + #pragma omp master + { + /* keep b_local for program duration to avoid allocate/free + * overhead per Sparse_MatVec call*/ + if ( droptol_local == NULL ) + { + if ( (droptol_local = (real*) malloc( omp_get_num_threads() * A->n * sizeof(real))) == NULL ) + { + fprintf( stderr, "Not enough space for droptol. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + } + + #pragma omp barrier +#endif + + /* init droptol to 0 */ + for ( i = 0; i < A->n; ++i ) + { +#ifdef _OPENMP + droptol_local[tid * A->n + i] = 0.0; +#else + droptol[i] = 0.0; +#endif + } + + #pragma omp barrier + + /* calculate sqaure of the norm of each row */ + #pragma omp for schedule(static) + for ( i = 0; i < A->n; ++i ) + { + for ( k = A->start[i]; k < A->start[i + 1] - 1; ++k ) + { + j = A->j[k]; + val = A->val[k]; + +#ifdef _OPENMP + droptol_local[tid * A->n + i] += val * val; + droptol_local[tid * A->n + j] += val * val; +#else + droptol[i] += val * val; + droptol[j] += val * val; +#endif + } + + val = A->val[k]; // diagonal entry +#ifdef _OPENMP + droptol_local[tid * A->n + i] += val * val; +#else + droptol[i] += val * val; +#endif + } + + #pragma omp barrier + +#ifdef _OPENMP + #pragma omp for schedule(static) + for ( i = 0; i < A->n; ++i ) + { + droptol[i] = 0.0; + for ( k = 0; k < omp_get_num_threads(); ++k ) + { + droptol[i] += droptol_local[k * A->n + i]; + } + } +#endif + + #pragma omp barrier + + /* calculate local droptol for each row */ + //fprintf( stderr, "droptol: " ); + #pragma omp for schedule(static) + for ( i = 0; i < A->n; ++i ) + { + //fprintf( stderr, "%f-->", droptol[i] ); + droptol[i] = SQRT( droptol[i] ) * dtol; + //fprintf( stderr, "%f ", droptol[i] ); + } + //fprintf( stderr, "\n" ); + } +} + + +static int Estimate_LU_Fill( const sparse_matrix * const A, const real * const droptol ) +{ + int i, j, pj; + int fillin; + real val; + + fillin = 0; + + #pragma omp parallel for schedule(static) \ + default(none) private(i, j, pj, val) reduction(+: fillin) + for ( i = 0; i < A->n; ++i ) + { + for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) + { + j = A->j[pj]; + val = A->val[pj]; + + if ( FABS(val) > droptol[i] ) + { + ++fillin; + } + } + } + + return fillin + A->n; +} + + +#if defined(HAVE_SUPERLU_MT) +static real SuperLU_Factorize( const sparse_matrix * const A, + sparse_matrix * const L, sparse_matrix * const U ) +{ + unsigned int i, pj, count, *Ltop, *Utop, r; + sparse_matrix *A_t; + SuperMatrix A_S, AC_S, L_S, U_S; + NCformat *A_S_store; + SCPformat *L_S_store; + NCPformat *U_S_store; + superlumt_options_t superlumt_options; + pxgstrf_shared_t pxgstrf_shared; + pdgstrf_threadarg_t *pdgstrf_threadarg; + int_t nprocs; + fact_t fact; + trans_t trans; + yes_no_t refact, usepr; + real u, drop_tol; + real *a, *at; + int_t *asub, *atsub, *xa, *xat; + int_t *perm_c; /* column permutation vector */ + int_t *perm_r; /* row permutations from partial pivoting */ + void *work; + int_t info, lwork; + int_t permc_spec, panel_size, relax; + Gstat_t Gstat; + flops_t flopcnt; + + /* Default parameters to control factorization. */ +#ifdef _OPENMP + //TODO: set as global parameter and use + #pragma omp parallel \ + default(none) shared(nprocs) + { + #pragma omp master + { + /* SuperLU_MT spawns threads internally, so set and pass parameter */ + nprocs = omp_get_num_threads(); + } + } +#else + nprocs = 1; +#endif + +// fact = EQUILIBRATE; /* equilibrate A (i.e., scale rows & cols to have unit norm), then factorize */ + fact = DOFACT; /* factor from scratch */ + trans = NOTRANS; + refact = NO; /* first time factorization */ + //TODO: add to control file and use the value there to set these + panel_size = sp_ienv(1); /* # consec. cols treated as unit task */ + relax = sp_ienv(2); /* # cols grouped as relaxed supernode */ + u = 1.0; /* diagonal pivoting threshold */ + usepr = NO; + drop_tol = 0.0; + work = NULL; + lwork = 0; + +//#if defined(DEBUG) + fprintf( stderr, "nprocs = %d\n", nprocs ); + fprintf( stderr, "Panel size = %d\n", panel_size ); + fprintf( stderr, "Relax = %d\n", relax ); +//#endif + + if ( !(perm_r = intMalloc(A->n)) ) + { + SUPERLU_ABORT("Malloc fails for perm_r[]."); + } + if ( !(perm_c = intMalloc(A->n)) ) + { + SUPERLU_ABORT("Malloc fails for perm_c[]."); + } + if ( !(superlumt_options.etree = intMalloc(A->n)) ) + { + SUPERLU_ABORT("Malloc fails for etree[]."); + } + if ( !(superlumt_options.colcnt_h = intMalloc(A->n)) ) + { + SUPERLU_ABORT("Malloc fails for colcnt_h[]."); + } + if ( !(superlumt_options.part_super_h = intMalloc(A->n)) ) + { + SUPERLU_ABORT("Malloc fails for part_super__h[]."); + } + if ( ( (a = (real*) malloc( (2 * A->start[A->n] - A->n) * sizeof(real))) == NULL ) + || ( (asub = (int_t*) malloc( (2 * A->start[A->n] - A->n) * sizeof(int_t))) == NULL ) + || ( (xa = (int_t*) malloc( (A->n + 1) * sizeof(int_t))) == NULL ) + || ( (Ltop = (unsigned int*) malloc( (A->n + 1) * sizeof(unsigned int))) == NULL ) + || ( (Utop = (unsigned int*) malloc( (A->n + 1) * sizeof(unsigned int))) == NULL ) ) + { + fprintf( stderr, "Not enough space for SuperLU factorization. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + if ( Allocate_Matrix( &A_t, A->n, A->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* Set up the sparse matrix data structure for A. */ + Transpose( A, A_t ); + + count = 0; + for ( i = 0; i < A->n; ++i ) + { + xa[i] = count; + for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) + { + a[count] = A->entries[pj].val; + asub[count] = A->entries[pj].j; + ++count; + } + for ( pj = A_t->start[i] + 1; pj < A_t->start[i + 1]; ++pj ) + { + a[count] = A_t->entries[pj].val; + asub[count] = A_t->entries[pj].j; + ++count; + } + } + xa[i] = count; + + dCompRow_to_CompCol( A->n, A->n, 2 * A->start[A->n] - A->n, a, asub, xa, + &at, &atsub, &xat ); + + for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i ) + fprintf( stderr, "%6d", asub[i] ); + fprintf( stderr, "\n" ); + for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i ) + fprintf( stderr, "%6.1f", a[i] ); + fprintf( stderr, "\n" ); + for ( i = 0; i <= A->n; ++i ) + fprintf( stderr, "%6d", xa[i] ); + fprintf( stderr, "\n" ); + for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i ) + fprintf( stderr, "%6d", atsub[i] ); + fprintf( stderr, "\n" ); + for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i ) + fprintf( stderr, "%6.1f", at[i] ); + fprintf( stderr, "\n" ); + for ( i = 0; i <= A->n; ++i ) + fprintf( stderr, "%6d", xat[i] ); + fprintf( stderr, "\n" ); + + A_S.Stype = SLU_NC; /* column-wise, no supernode */ + A_S.Dtype = SLU_D; /* double-precision */ + A_S.Mtype = SLU_GE; /* full (general) matrix -- required for parallel factorization */ + A_S.nrow = A->n; + A_S.ncol = A->n; + A_S.Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) ); + A_S_store = (NCformat *) A_S.Store; + A_S_store->nnz = 2 * A->start[A->n] - A->n; + A_S_store->nzval = at; + A_S_store->rowind = atsub; + A_S_store->colptr = xat; + + /* ------------------------------------------------------------ + Allocate storage and initialize statistics variables. + ------------------------------------------------------------*/ + StatAlloc( A->n, nprocs, panel_size, relax, &Gstat ); + StatInit( A->n, nprocs, &Gstat ); + + /* ------------------------------------------------------------ + Get column permutation vector perm_c[], according to permc_spec: + permc_spec = 0: natural ordering + permc_spec = 1: minimum degree ordering on structure of A'*A + permc_spec = 2: minimum degree ordering on structure of A'+A + permc_spec = 3: approximate minimum degree for unsymmetric matrices + ------------------------------------------------------------*/ + permc_spec = 0; + get_perm_c( permc_spec, &A_S, perm_c ); + + /* ------------------------------------------------------------ + Initialize the option structure superlumt_options using the + user-input parameters; + Apply perm_c to the columns of original A to form AC. + ------------------------------------------------------------*/ + pdgstrf_init( nprocs, fact, trans, refact, panel_size, relax, + u, usepr, drop_tol, perm_c, perm_r, + work, lwork, &A_S, &AC_S, &superlumt_options, &Gstat ); + + for ( i = 0; i < ((NCPformat*)AC_S.Store)->nnz; ++i ) + fprintf( stderr, "%6.1f", ((real*)(((NCPformat*)AC_S.Store)->nzval))[i] ); + fprintf( stderr, "\n" ); + + /* ------------------------------------------------------------ + Compute the LU factorization of A. + The following routine will create nprocs threads. + ------------------------------------------------------------*/ + pdgstrf( &superlumt_options, &AC_S, perm_r, &L_S, &U_S, &Gstat, &info ); + + fprintf( stderr, "INFO: %d\n", info ); + + flopcnt = 0; + for (i = 0; i < nprocs; ++i) + { + flopcnt += Gstat.procstat[i].fcops; + } + Gstat.ops[FACT] = flopcnt; + +//#if defined(DEBUG) + printf("\n** Result of sparse LU **\n"); + L_S_store = (SCPformat *) L_S.Store; + U_S_store = (NCPformat *) U_S.Store; + printf( "No of nonzeros in factor L = " IFMT "\n", L_S_store->nnz ); + printf( "No of nonzeros in factor U = " IFMT "\n", U_S_store->nnz ); + fflush( stdout ); +//#endif + + /* convert L and R from SuperLU formats to CSR */ + memset( Ltop, 0, (A->n + 1) * sizeof(int) ); + memset( Utop, 0, (A->n + 1) * sizeof(int) ); + memset( L->start, 0, (A->n + 1) * sizeof(int) ); + memset( U->start, 0, (A->n + 1) * sizeof(int) ); + + for ( i = 0; i < 2 * L_S_store->nnz; ++i ) + fprintf( stderr, "%6.1f", ((real*)(L_S_store->nzval))[i] ); + fprintf( stderr, "\n" ); + for ( i = 0; i < 2 * U_S_store->nnz; ++i ) + fprintf( stderr, "%6.1f", ((real*)(U_S_store->nzval))[i] ); + fprintf( stderr, "\n" ); + + printf( "No of supernodes in factor L = " IFMT "\n", L_S_store->nsuper ); + for ( i = 0; i < A->n; ++i ) + { + fprintf( stderr, "nzval_col_beg[%5d] = %d\n", i, L_S_store->nzval_colbeg[i] ); + fprintf( stderr, "nzval_col_end[%5d] = %d\n", i, L_S_store->nzval_colend[i] ); + //TODO: correct for SCPformat for L? + //for( pj = L_S_store->rowind_colbeg[i]; pj < L_S_store->rowind_colend[i]; ++pj ) +// for( pj = 0; pj < L_S_store->rowind_colend[i] - L_S_store->rowind_colbeg[i]; ++pj ) +// { +// ++Ltop[L_S_store->rowind[L_S_store->rowind_colbeg[i] + pj] + 1]; +// } + fprintf( stderr, "col_beg[%5d] = %d\n", i, U_S_store->colbeg[i] ); + fprintf( stderr, "col_end[%5d] = %d\n", i, U_S_store->colend[i] ); + for ( pj = U_S_store->colbeg[i]; pj < U_S_store->colend[i]; ++pj ) + { + ++Utop[U_S_store->rowind[pj] + 1]; + fprintf( stderr, "Utop[%5d] = %d\n", U_S_store->rowind[pj] + 1, Utop[U_S_store->rowind[pj] + 1] ); + } + } + for ( i = 1; i <= A->n; ++i ) + { +// Ltop[i] = L->start[i] = Ltop[i] + Ltop[i - 1]; + Utop[i] = U->start[i] = Utop[i] + Utop[i - 1]; +// fprintf( stderr, "Utop[%5d] = %d\n", i, Utop[i] ); +// fprintf( stderr, "U->start[%5d] = %d\n", i, U->start[i] ); + } + for ( i = 0; i < A->n; ++i ) + { +// for( pj = 0; pj < L_S_store->nzval_colend[i] - L_S_store->nzval_colbeg[i]; ++pj ) +// { +// r = L_S_store->rowind[L_S_store->rowind_colbeg[i] + pj]; +// L->entries[Ltop[r]].j = r; +// L->entries[Ltop[r]].val = ((real*)L_S_store->nzval)[L_S_store->nzval_colbeg[i] + pj]; +// ++Ltop[r]; +// } + for ( pj = U_S_store->colbeg[i]; pj < U_S_store->colend[i]; ++pj ) + { + r = U_S_store->rowind[pj]; + U->entries[Utop[r]].j = i; + U->entries[Utop[r]].val = ((real*)U_S_store->nzval)[pj]; + ++Utop[r]; + } + } + + /* ------------------------------------------------------------ + Deallocate storage after factorization. + ------------------------------------------------------------*/ + pxgstrf_finalize( &superlumt_options, &AC_S ); + Deallocate_Matrix( A_t ); + free( xa ); + free( asub ); + free( a ); + SUPERLU_FREE( perm_r ); + SUPERLU_FREE( perm_c ); + SUPERLU_FREE( ((NCformat *)A_S.Store)->rowind ); + SUPERLU_FREE( ((NCformat *)A_S.Store)->colptr ); + SUPERLU_FREE( ((NCformat *)A_S.Store)->nzval ); + SUPERLU_FREE( A_S.Store ); + if ( lwork == 0 ) + { + Destroy_SuperNode_SCP(&L_S); + Destroy_CompCol_NCP(&U_S); + } + else if ( lwork > 0 ) + { + SUPERLU_FREE(work); + } + StatFree(&Gstat); + + free( Utop ); + free( Ltop ); + + //TODO: return iters + return 0.; +} +#endif + + +/* Diagonal (Jacobi) preconditioner computation */ +static real diag_pre_comp( const reax_system * const system, real * const Hdia_inv ) +{ + unsigned int i; + real start; + + start = Get_Time( ); + + #pragma omp parallel for schedule(static) \ + default(none) private(i) + for ( i = 0; i < system->N; ++i ) + { + Hdia_inv[i] = 1.0 / system->reaxprm.sbp[system->atoms[i].type].eta; + } + + return Get_Timing_Info( start ); +} + + +/* Incomplete Cholesky factorization with dual thresholding */ +static real ICHOLT( const sparse_matrix * const A, const real * const droptol, + sparse_matrix * const L, sparse_matrix * const U ) +{ + int *tmp_j; + real *tmp_val; + int i, j, pj, k1, k2, tmptop, Ltop; + real val, start; + int *Utop; + + start = Get_Time( ); + + if ( ( Utop = (int*) malloc((A->n + 1) * sizeof(int)) ) == NULL || + ( tmp_j = (int*) malloc(A->n * sizeof(int)) ) == NULL || + ( tmp_val = (real*) malloc(A->n * sizeof(real)) ) == NULL ) + { + fprintf( stderr, "not enough memory for ICHOLT preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + // clear variables + Ltop = 0; + tmptop = 0; + memset( L->start, 0, (A->n + 1) * sizeof(unsigned int) ); + memset( U->start, 0, (A->n + 1) * sizeof(unsigned int) ); + memset( Utop, 0, A->n * sizeof(unsigned int) ); + + //fprintf( stderr, "n: %d\n", A->n ); + for ( i = 0; i < A->n; ++i ) + { + L->start[i] = Ltop; + tmptop = 0; + + for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) + { + j = A->j[pj]; + val = A->val[pj]; + //fprintf( stderr, "i: %d, j: %d", i, j ); + + if ( FABS(val) > droptol[i] ) + { + k1 = 0; + k2 = L->start[j]; + while ( k1 < tmptop && k2 < L->start[j + 1] ) + { + if ( tmp_j[k1] < L->j[k2] ) + { + ++k1; + } + else if ( tmp_j[k1] > L->j[k2] ) + { + ++k2; + } + else + { + val -= (tmp_val[k1++] * L->val[k2++]); + } + } + + // L matrix is lower triangular, + // so right before the start of next row comes jth diagonal + val /= L->val[L->start[j + 1] - 1]; + + tmp_j[tmptop] = j; + tmp_val[tmptop] = val; + ++tmptop; + } + //fprintf( stderr, " -- done\n" ); + } + + // sanity check + if ( A->j[pj] != i ) + { + fprintf( stderr, "i=%d, badly built A matrix!\n", i ); + exit( NUMERIC_BREAKDOWN ); + } + + // compute the ith diagonal in L + val = A->val[pj]; + for ( k1 = 0; k1 < tmptop; ++k1 ) + { + val -= (tmp_val[k1] * tmp_val[k1]); + } + + tmp_j[tmptop] = i; + tmp_val[tmptop] = SQRT(val); + + // apply the dropping rule once again + //fprintf( stderr, "row%d: tmptop: %d\n", i, tmptop ); + //for( k1 = 0; k1<= tmptop; ++k1 ) + // fprintf( stderr, "%d(%f) ", tmp[k1].j, tmp[k1].val ); + //fprintf( stderr, "\n" ); + //fprintf( stderr, "row(%d): droptol=%.4f\n", i+1, droptol[i] ); + for ( k1 = 0; k1 < tmptop; ++k1 ) + { + if ( FABS(tmp_val[k1]) > droptol[i] / tmp_val[tmptop] ) + { + L->j[Ltop] = tmp_j[k1]; + L->val[Ltop] = tmp_val[k1]; + U->start[tmp_j[k1] + 1]++; + ++Ltop; + //fprintf( stderr, "%d(%.4f) ", tmp[k1].j+1, tmp[k1].val ); + } + } + // keep the diagonal in any case + L->j[Ltop] = tmp_j[k1]; + L->val[Ltop] = tmp_val[k1]; + ++Ltop; + //fprintf( stderr, "%d(%.4f)\n", tmp[k1].j+1, tmp[k1].val ); + } + + L->start[i] = Ltop; +// fprintf( stderr, "nnz(L): %d, max: %d\n", Ltop, L->n * 50 ); + + /* U = L^T (Cholesky factorization) */ + Transpose( L, U ); +// for ( i = 1; i <= U->n; ++i ) +// { +// Utop[i] = U->start[i] = U->start[i] + U->start[i - 1] + 1; +// } +// for ( i = 0; i < L->n; ++i ) +// { +// for ( pj = L->start[i]; pj < L->start[i + 1]; ++pj ) +// { +// j = L->j[pj]; +// U->j[Utop[j]] = i; +// U->val[Utop[j]] = L->val[pj]; +// Utop[j]++; +// } +// } + +// fprintf( stderr, "nnz(U): %d, max: %d\n", Utop[U->n], U->n * 50 ); + + free( tmp_val ); + free( tmp_j ); + free( Utop ); + + return Get_Timing_Info( start ); +} + + +/* Fine-grained (parallel) incomplete Cholesky factorization + * + * Reference: + * Edmond Chow and Aftab Patel + * Fine-Grained Parallel Incomplete LU Factorization + * SIAM J. Sci. Comp. */ +static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, + sparse_matrix * const U_t, sparse_matrix * const U ) +{ + unsigned int i, j, k, pj, x = 0, y = 0, ei_x, ei_y; + real *D, *D_inv, sum, start; + sparse_matrix *DAD; + int *Utop; + + start = Get_Time( ); + + if ( Allocate_Matrix( DAD, A->n, A->m ) == FAILURE || + ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || + ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL || + ( Utop = (int*) malloc((A->n + 1) * sizeof(int)) ) == NULL ) + { + fprintf( stderr, "not enough memory for ICHOL_PAR preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + #pragma omp parallel for schedule(static) \ + default(none) shared(D_inv, D) private(i) + for ( i = 0; i < A->n; ++i ) + { + D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); + D[i] = 1. / D_inv[i]; + } + + memset( U->start, 0, sizeof(unsigned int) * (A->n + 1) ); + memset( Utop, 0, sizeof(unsigned int) * (A->n + 1) ); + + /* to get convergence, A must have unit diagonal, so apply + * transformation DAD, where D = D(1./sqrt(D(A))) */ + memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); + #pragma omp parallel for schedule(guided) \ + default(none) shared(DAD, D_inv, D) private(i, pj) + for ( i = 0; i < A->n; ++i ) + { + /* non-diagonals */ + for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) + { + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = A->val[pj] * D[i] * D[A->j[pj]]; + } + /* diagonal */ + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = 1.; + } + + /* initial guesses for U^T, + * assume: A and DAD symmetric and stored lower triangular */ + memcpy( U_t->start, DAD->start, sizeof(int) * (DAD->n + 1) ); + memcpy( U_t->j, DAD->j, sizeof(int) * (DAD->m) ); + memcpy( U_t->val, DAD->val, sizeof(real) * (DAD->m) ); + + for ( i = 0; i < sweeps; ++i ) + { + /* for each nonzero */ + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD, stderr) private(sum, ei_x, ei_y, k) firstprivate(x, y) + for ( j = 0; j < A->start[A->n]; ++j ) + { + sum = ZERO; + + /* determine row bounds of current nonzero */ + x = 0; + ei_x = 0; + for ( k = 0; k <= A->n; ++k ) + { + if ( U_t->start[k] > j ) + { + x = U_t->start[k - 1]; + ei_x = U_t->start[k]; + break; + } + } + /* column bounds of current nonzero */ + y = U_t->start[U_t->j[j]]; + ei_y = U_t->start[U_t->j[j] + 1]; + + /* sparse dot product: dot( U^T(i,1:j-1), U^T(j,1:j-1) ) */ + while ( U_t->j[x] < U_t->j[j] && + U_t->j[y] < U_t->j[j] && + x < ei_x && y < ei_y ) + { + if ( U_t->j[x] == U_t->j[y] ) + { + sum += (U_t->val[x] * U_t->val[y]); + ++x; + ++y; + } + else if ( U_t->j[x] < U_t->j[y] ) + { + ++x; + } + else + { + ++y; + } + } + + sum = DAD->val[j] - sum; + + /* diagonal entries */ + if ( (k - 1) == U_t->j[j] ) + { + /* sanity check */ + if ( sum < ZERO ) + { + fprintf( stderr, "Numeric breakdown in ICHOL Terminating.\n"); +#if defined(DEBUG_FOCUS) + fprintf( stderr, "A(%5d,%5d) = %10.3f\n", + k - 1, A->entries[j].j, A->entries[j].val ); + fprintf( stderr, "sum = %10.3f\n", sum); +#endif + exit(NUMERIC_BREAKDOWN); + } + + U_t->val[j] = SQRT( sum ); + } + /* non-diagonal entries */ + else + { + U_t->val[j] = sum / U_t->val[ei_y - 1]; + } + } + } + + /* apply inverse transformation D^{-1}U^{T}, + * since DAD \approx U^{T}U, so + * D^{-1}DADD^{-1} = A \approx D^{-1}U^{T}UD^{-1} */ + #pragma omp parallel for schedule(guided) \ + default(none) shared(D_inv) private(i, pj) + for ( i = 0; i < A->n; ++i ) + { + for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) + { + U_t->val[pj] *= D_inv[i]; + } + } + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "nnz(L): %d, max: %d\n", U_t->start[U_t->n], U_t->n * 50 ); +#endif + + /* transpose U^{T} and copy into U */ + Transpose( U_t, U ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "nnz(U): %d, max: %d\n", Utop[U->n], U->n * 50 ); +#endif + + Deallocate_Matrix( DAD ); + free(D_inv); + free(D); + free(Utop); + + return Get_Timing_Info( start ); +} + + +/* Fine-grained (parallel) incomplete LU factorization + * + * Reference: + * Edmond Chow and Aftab Patel + * Fine-Grained Parallel Incomplete LU Factorization + * SIAM J. Sci. Comp. + * + * A: symmetric, half-stored (lower triangular), CSR format + * sweeps: number of loops over non-zeros for computation + * L / U: factorized triangular matrices (A \approx LU), CSR format */ +static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, + sparse_matrix * const L, sparse_matrix * const U ) +{ + unsigned int i, j, k, pj, x, y, ei_x, ei_y; + real *D, *D_inv, sum, start; + sparse_matrix *DAD; + + start = Get_Time( ); + + if ( Allocate_Matrix( DAD, A->n, A->m ) == FAILURE || + ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || + ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL ) + { + fprintf( stderr, "not enough memory for ILU_PAR preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + #pragma omp parallel for schedule(static) \ + default(none) shared(D, D_inv) private(i) + for ( i = 0; i < A->n; ++i ) + { + D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); + D[i] = 1.0 / D_inv[i]; + } + + /* to get convergence, A must have unit diagonal, so apply + * transformation DAD, where D = D(1./sqrt(D(A))) */ + memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD, D) private(i, pj) + for ( i = 0; i < A->n; ++i ) + { + /* non-diagonals */ + for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) + { + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = D[i] * A->val[pj] * D[A->j[pj]]; + } + /* diagonal */ + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = 1.0; + } + + /* initial guesses for L and U, + * assume: A and DAD symmetric and stored lower triangular */ + memcpy( L->start, DAD->start, sizeof(int) * (DAD->n + 1) ); + memcpy( L->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); + memcpy( L->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); + /* store U^T in CSR for row-wise access and tranpose later */ + memcpy( U->start, DAD->start, sizeof(int) * (DAD->n + 1) ); + memcpy( U->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); + memcpy( U->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); + + /* L has unit diagonal, by convention */ + #pragma omp parallel for schedule(static) default(none) private(i) + for ( i = 0; i < A->n; ++i ) + { + L->val[L->start[i + 1] - 1] = 1.0; + } + + for ( i = 0; i < sweeps; ++i ) + { + /* for each nonzero in L */ + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) + for ( j = 0; j < DAD->start[DAD->n]; ++j ) + { + sum = ZERO; + + /* determine row bounds of current nonzero */ + x = 0; + ei_x = 0; + for ( k = 1; k <= DAD->n; ++k ) + { + if ( DAD->start[k] > j ) + { + x = DAD->start[k - 1]; + ei_x = DAD->start[k]; + break; + } + } + /* determine column bounds of current nonzero */ + y = DAD->start[DAD->j[j]]; + ei_y = DAD->start[DAD->j[j] + 1]; + + /* sparse dot product: + * dot( L(i,1:j-1), U(1:j-1,j) ) */ + while ( L->j[x] < L->j[j] && + L->j[y] < L->j[j] && + x < ei_x && y < ei_y ) + { + if ( L->j[x] == L->j[y] ) + { + sum += (L->val[x] * U->val[y]); + ++x; + ++y; + } + else if ( L->j[x] < L->j[y] ) + { + ++x; + } + else + { + ++y; + } + } + + if ( j != ei_x - 1 ) + { + L->val[j] = ( DAD->val[j] - sum ) / U->val[ei_y - 1]; + } + } + + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) + for ( j = 0; j < DAD->start[DAD->n]; ++j ) + { + sum = ZERO; + + /* determine row bounds of current nonzero */ + x = 0; + ei_x = 0; + for ( k = 1; k <= DAD->n; ++k ) + { + if ( DAD->start[k] > j ) + { + x = DAD->start[k - 1]; + ei_x = DAD->start[k]; + break; + } + } + /* determine column bounds of current nonzero */ + y = DAD->start[DAD->j[j]]; + ei_y = DAD->start[DAD->j[j] + 1]; + + /* sparse dot product: + * dot( L(i,1:i-1), U(1:i-1,j) ) */ + while ( U->j[x] < U->j[j] && + U->j[y] < U->j[j] && + x < ei_x && y < ei_y ) + { + if ( U->j[x] == U->j[y] ) + { + sum += (L->val[y] * U->val[x]); + ++x; + ++y; + } + else if ( U->j[x] < U->j[y] ) + { + ++x; + } + else + { + ++y; + } + } + + U->val[j] = DAD->val[j] - sum; + } + } + + /* apply inverse transformation: + * since DAD \approx LU, then + * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */ + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD, D_inv) private(i, pj) + for ( i = 0; i < DAD->n; ++i ) + { + for ( pj = DAD->start[i]; pj < DAD->start[i + 1]; ++pj ) + { + L->val[pj] = D_inv[i] * L->val[pj]; + /* currently storing U^T, so use row index instead of column index */ + U->val[pj] = U->val[pj] * D_inv[i]; + } + } + + Transpose_I( U ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "nnz(L): %d, max: %d\n", L->start[L->n], L->n * 50 ); + fprintf( stderr, "nnz(U): %d, max: %d\n", Utop[U->n], U->n * 50 ); +#endif + + Deallocate_Matrix( DAD ); + free( D_inv ); + free( D ); + + return Get_Timing_Info( start ); +} + + +/* Fine-grained (parallel) incomplete LU factorization with thresholding + * + * Reference: + * Edmond Chow and Aftab Patel + * Fine-Grained Parallel Incomplete LU Factorization + * SIAM J. Sci. Comp. + * + * A: symmetric, half-stored (lower triangular), CSR format + * droptol: row-wise tolerances used for dropping + * sweeps: number of loops over non-zeros for computation + * L / U: factorized triangular matrices (A \approx LU), CSR format */ +static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, + const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U ) +{ + unsigned int i, j, k, pj, x, y, ei_x, ei_y, Ltop, Utop; + real *D, *D_inv, sum, start; + sparse_matrix *DAD, *L_temp, *U_temp; + + start = Get_Time( ); + + if ( Allocate_Matrix( DAD, A->n, A->m ) == FAILURE || + Allocate_Matrix( L_temp, A->n, A->m ) == FAILURE || + Allocate_Matrix( U_temp, A->n, A->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for ILUT_PAR preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + if ( ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || + ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL ) + { + fprintf( stderr, "not enough memory for ILUT_PAR preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + #pragma omp parallel for schedule(static) \ + default(none) shared(D, D_inv) private(i) + for ( i = 0; i < A->n; ++i ) + { + D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); + D[i] = 1.0 / D_inv[i]; + } + + /* to get convergence, A must have unit diagonal, so apply + * transformation DAD, where D = D(1./sqrt(D(A))) */ + memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD, D) private(i, pj) + for ( i = 0; i < A->n; ++i ) + { + /* non-diagonals */ + for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) + { + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = D[i] * A->val[pj] * D[A->j[pj]]; + } + /* diagonal */ + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = 1.0; + } + + /* initial guesses for L and U, + * assume: A and DAD symmetric and stored lower triangular */ + memcpy( L_temp->start, DAD->start, sizeof(int) * (DAD->n + 1) ); + memcpy( L_temp->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); + memcpy( L_temp->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); + /* store U^T in CSR for row-wise access and tranpose later */ + memcpy( U_temp->start, DAD->start, sizeof(int) * (DAD->n + 1) ); + memcpy( U_temp->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); + memcpy( U_temp->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); + + /* L has unit diagonal, by convention */ + #pragma omp parallel for schedule(static) \ + default(none) private(i) shared(L_temp) + for ( i = 0; i < A->n; ++i ) + { + L_temp->val[L_temp->start[i + 1] - 1] = 1.0; + } + + for ( i = 0; i < sweeps; ++i ) + { + /* for each nonzero in L */ + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) + for ( j = 0; j < DAD->start[DAD->n]; ++j ) + { + sum = ZERO; + + /* determine row bounds of current nonzero */ + x = 0; + ei_x = 0; + for ( k = 1; k <= DAD->n; ++k ) + { + if ( DAD->start[k] > j ) + { + x = DAD->start[k - 1]; + ei_x = DAD->start[k]; + break; + } + } + /* determine column bounds of current nonzero */ + y = DAD->start[DAD->j[j]]; + ei_y = DAD->start[DAD->j[j] + 1]; + + /* sparse dot product: + * dot( L(i,1:j-1), U(1:j-1,j) ) */ + while ( L_temp->j[x] < L_temp->j[j] && + L_temp->j[y] < L_temp->j[j] && + x < ei_x && y < ei_y ) + { + if ( L_temp->j[x] == L_temp->j[y] ) + { + sum += (L_temp->val[x] * U_temp->val[y]); + ++x; + ++y; + } + else if ( L_temp->j[x] < L_temp->j[y] ) + { + ++x; + } + else + { + ++y; + } + } + + if ( j != ei_x - 1 ) + { + L_temp->val[j] = ( DAD->val[j] - sum ) / U_temp->val[ei_y - 1]; + } + } + + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) + for ( j = 0; j < DAD->start[DAD->n]; ++j ) + { + sum = ZERO; + + /* determine row bounds of current nonzero */ + x = 0; + ei_x = 0; + for ( k = 1; k <= DAD->n; ++k ) + { + if ( DAD->start[k] > j ) + { + x = DAD->start[k - 1]; + ei_x = DAD->start[k]; + break; + } + } + /* determine column bounds of current nonzero */ + y = DAD->start[DAD->j[j]]; + ei_y = DAD->start[DAD->j[j] + 1]; + + /* sparse dot product: + * dot( L(i,1:i-1), U(1:i-1,j) ) */ + while ( U_temp->j[x] < U_temp->j[j] && + U_temp->j[y] < U_temp->j[j] && + x < ei_x && y < ei_y ) + { + if ( U_temp->j[x] == U_temp->j[y] ) + { + sum += (L_temp->val[y] * U_temp->val[x]); + ++x; + ++y; + } + else if ( U_temp->j[x] < U_temp->j[y] ) + { + ++x; + } + else + { + ++y; + } + } + + U_temp->val[j] = DAD->val[j] - sum; + } + } + + /* apply inverse transformation: + * since DAD \approx LU, then + * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */ + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD, L_temp, U_temp, D_inv) private(i, pj) + for ( i = 0; i < DAD->n; ++i ) + { + for ( pj = DAD->start[i]; pj < DAD->start[i + 1]; ++pj ) + { + L_temp->val[pj] = D_inv[i] * L_temp->val[pj]; + /* currently storing U^T, so use row index instead of column index */ + U_temp->val[pj] = U_temp->val[pj] * D_inv[i]; + } + } + + /* apply the dropping rule */ + Ltop = 0; + Utop = 0; + for ( i = 0; i < DAD->n; ++i ) + { + L->start[i] = Ltop; + U->start[i] = Utop; + + for ( pj = L_temp->start[i]; pj < L_temp->start[i + 1] - 1; ++pj ) + { + if ( FABS( L_temp->val[pj] ) > FABS( droptol[i] / L_temp->val[L_temp->start[i + 1] - 1] ) ) + { + L->j[Ltop] = L_temp->j[pj]; + L->val[Ltop] = L_temp->val[pj]; + ++Ltop; + } + } + + /* diagonal */ + L->j[Ltop] = L_temp->j[pj]; + L->val[Ltop] = L_temp->val[pj]; + ++Ltop; + + for ( pj = U_temp->start[i]; pj < U_temp->start[i + 1] - 1; ++pj ) + { + if ( FABS( U_temp->val[pj] ) > FABS( droptol[i] / U_temp->val[U_temp->start[i + 1] - 1] ) ) + { + U->j[Utop] = U_temp->j[pj]; + U->val[Utop] = U_temp->val[pj]; + ++Utop; + } + } + + /* diagonal */ + U->j[Utop] = U_temp->j[pj]; + U->val[Utop] = U_temp->val[pj]; + ++Utop; + } + + L->start[i] = Ltop; + U->start[i] = Utop; + + Transpose_I( U ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "nnz(L): %d\n", L->start[L->n] ); + fprintf( stderr, "nnz(U): %d\n", U->start[U->n] ); +#endif + + Deallocate_Matrix( U_temp ); + Deallocate_Matrix( L_temp ); + Deallocate_Matrix( DAD ); + free( D_inv ); + free( D ); + + return Get_Timing_Info( start ); +} + + +/* Setup routine which performs the following: + * 1) init storage for QEq matrices and other dependent routines + * 2) compute preconditioner (if sim. step matches refactor step) + * 3) extrapolate ficticious charges s and t + */ +static void Init_MatVec( const reax_system * const system, const control_params * const control, + simulation_data * const data, static_storage * const workspace, const list * const far_nbrs ) +{ + int i, fillin; + real s_tmp, t_tmp, time; + sparse_matrix *Hptr; +// char fname[100]; + + if (control->qeq_domain_sparsify_enabled) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + +#if defined(TEST_MAT) + Hptr = create_test_mat( ); +#endif + + if (control->pre_comp_refactor > 0 && + ((data->step - data->prev_steps) % control->pre_comp_refactor == 0 || workspace->L == NULL)) + { + //Print_Linear_System( system, control, workspace, data->step ); + + time = Get_Time( ); + if ( control->pre_comp_type != DIAG_PC ) + { + Sort_Matrix_Rows( workspace->H ); + if ( control->qeq_domain_sparsify_enabled == TRUE ) + { + Sort_Matrix_Rows( workspace->H_sp ); + } + + if ( control->pre_app_type == TRI_SOLVE_GC_PA ) + { + if ( control->qeq_domain_sparsify_enabled == TRUE ) + { + Hptr = setup_graph_coloring( workspace->H_sp ); + } + else + { + Hptr = setup_graph_coloring( workspace->H ); + } + + Sort_Matrix_Rows( Hptr ); + } + } + data->timing.QEq_sort_mat_rows += Get_Timing_Info( time ); + +#if defined(DEBUG) + fprintf( stderr, "H matrix sorted\n" ); +#endif + + switch ( control->pre_comp_type ) + { + case DIAG_PC: + if ( workspace->Hdia_inv == NULL ) + { + if ( ( workspace->Hdia_inv = (real *) calloc( system->N, sizeof( real ) ) ) == NULL ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + data->timing.pre_comp += diag_pre_comp( system, workspace->Hdia_inv ); + break; + + case ICHOLT_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->pre_comp_droptol ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "drop tolerances calculated\n" ); +#endif + + if ( workspace->L == NULL ) + { + fillin = Estimate_LU_Fill( Hptr, workspace->droptol ); + if ( Allocate_Matrix( workspace->L, far_nbrs->n, fillin ) == FAILURE || + Allocate_Matrix( workspace->U, far_nbrs->n, fillin ) == FAILURE ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + +#if defined(DEBUG) + fprintf( stderr, "fillin = %d\n", fillin ); + fprintf( stderr, "allocated memory: L = U = %ldMB\n", + fillin * sizeof(sparse_matrix_entry) / (1024 * 1024) ); +#endif + } + + data->timing.pre_comp += ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); + break; + + case ILU_PAR_PC: + if ( workspace->L == NULL ) + { + /* factors have sparsity pattern as H */ + if ( Allocate_Matrix( workspace->L, Hptr->n, Hptr->m ) == FAILURE || + Allocate_Matrix( workspace->U, Hptr->n, Hptr->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + data->timing.pre_comp += ILU_PAR( Hptr, control->pre_comp_sweeps, workspace->L, workspace->U ); + break; + + case ILUT_PAR_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->pre_comp_droptol ); +#if defined(DEBUG_FOCUS) + fprintf( stderr, "drop tolerances calculated\n" ); +#endif + + if ( workspace->L == NULL ) + { + /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */ + if ( Allocate_Matrix( workspace->L, Hptr->n, Hptr->m ) == FAILURE || + Allocate_Matrix( workspace->U, Hptr->n, Hptr->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + data->timing.pre_comp += ILUT_PAR( Hptr, workspace->droptol, control->pre_comp_sweeps, + workspace->L, workspace->U ); + break; + + case ILU_SUPERLU_MT_PC: + if ( workspace->L == NULL ) + { + /* factors have sparsity pattern as H */ + if ( Allocate_Matrix( workspace->L, Hptr->n, Hptr->m ) == FAILURE || + Allocate_Matrix( workspace->U, Hptr->n, Hptr->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + +#if defined(HAVE_SUPERLU_MT) + data->timing.pre_comp += SuperLU_Factorize( Hptr, workspace->L, workspace->U ); +#else + fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" ); + exit( INVALID_INPUT ); +#endif + break; + + default: + fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + +#if defined(DEBUG) + fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); +#endif + +#if defined(DEBUG_FOCUS) + sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->L, fname ); + sprintf( fname, "%s.U%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->U, fname ); + + fprintf( stderr, "icholt-" ); + //sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); + //Print_Sparse_Matrix2( workspace->L, fname ); + //Print_Sparse_Matrix( U ); +#endif + } + + /* extrapolation for s & t */ + //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer + #pragma omp parallel for schedule(static) \ + default(none) private(i, s_tmp, t_tmp) + for ( i = 0; i < system->N; ++i ) + { + // no extrapolation + //s_tmp = workspace->s[index_wkspace_sys(0,i,system->N)]; + //t_tmp = workspace->t[index_wkspace_sys(0,i,system->N)]; + + // linear + //s_tmp = 2 * workspace->s[index_wkspace_sys(0,i,system->N)] - workspace->s[index_wkspace_sys(1,i,system->N)]; + //t_tmp = 2 * workspace->t[index_wkspace_sys(0,i,system->N)] - workspace->t[index_wkspace_sys(1,i,system->N)]; + + // quadratic +// s_tmp = workspace->s[index_wkspace_sys(2,i,system->N)] + +// 3 * (workspace->s[index_wkspace_sys(0,i,system->N)]-workspace->s[index_wkspace_sys(1,i,system->N)]); + t_tmp = workspace->t[index_wkspace_sys(2,i,system->N)] + + 3 * (workspace->t[index_wkspace_sys(0,i,system->N)] -workspace->t[index_wkspace_sys(1,i,system->N)]); + + // cubic + s_tmp = 4 * (workspace->s[index_wkspace_sys(0,i,system->N)] + workspace->s[index_wkspace_sys(2,i,system->N)]) - + (6 * workspace->s[index_wkspace_sys(1,i,system->N)] + workspace->s[index_wkspace_sys(3,i,system->N)]); + //t_tmp = 4 * (workspace->t[index_wkspace_sys(0,i,system->N)] + workspace->t[index_wkspace_sys(2,i,system->N)]) - + // (6 * workspace->t[index_wkspace_sys(1,i,system->N)] + workspace->t[index_wkspace_sys(3,i,system->N)] ); + + // 4th order +// s_tmp = 5 * (workspace->s[index_wkspace_sys(0,i,system->N)] - +// workspace->s[index_wkspace_sys(3,i,system->N)]) + 10 * +// (-workspace->s[index_wkspace_sys(1,i,system->N)] + +// workspace->s[index_wkspace_sys(2,i,system->N)] ) + +// workspace->s[index_wkspace_sys(4,i,system->N)]; +// t_tmp = 5 * (workspace->t[index_wkspace_sys(0,i,system->N)] - +// workspace->t[index_wkspace_sys(3,i,system->N)]) + 10 * +// (-workspace->t[index_wkspace_sys(1,i,system->N)] + +// workspace->t[index_wkspace_sys(2,i,system->N)] ) + +// workspace->t[index_wkspace_sys(4,i,system->N)]; + + workspace->s[index_wkspace_sys(4,i,system->N)] = workspace->s[index_wkspace_sys(3,i,system->N)]; + workspace->s[index_wkspace_sys(3,i,system->N)] = workspace->s[index_wkspace_sys(2,i,system->N)]; + workspace->s[index_wkspace_sys(2,i,system->N)] = workspace->s[index_wkspace_sys(1,i,system->N)]; + workspace->s[index_wkspace_sys(1,i,system->N)] = workspace->s[index_wkspace_sys(0,i,system->N)]; + workspace->s[index_wkspace_sys(0,i,system->N)] = s_tmp; + + workspace->t[index_wkspace_sys(4,i,system->N)] = workspace->t[index_wkspace_sys(3,i,system->N)]; + workspace->t[index_wkspace_sys(3,i,system->N)] = workspace->t[index_wkspace_sys(2,i,system->N)]; + workspace->t[index_wkspace_sys(2,i,system->N)] = workspace->t[index_wkspace_sys(1,i,system->N)]; + workspace->t[index_wkspace_sys(1,i,system->N)] = workspace->t[index_wkspace_sys(0,i,system->N)]; + workspace->t[index_wkspace_sys(0,i,system->N)] = t_tmp; + } +} + + +/* Combine ficticious charges s and t to get atomic charge q + */ +static void Calculate_Charges( const reax_system * const system, static_storage * const workspace ) +{ + int i; + real u, s_sum, t_sum; + + s_sum = t_sum = 0.; + for ( i = 0; i < system->N; ++i ) + { + s_sum += workspace->s[index_wkspace_sys(0,i,system->N)]; + t_sum += workspace->t[index_wkspace_sys(0,i,system->N)]; + } + + u = s_sum / t_sum; + for ( i = 0; i < system->N; ++i ) + { + system->atoms[i].q = workspace->s[index_wkspace_sys(0,i,system->N)] + - u * workspace->t[index_wkspace_sys(0,i,system->N)]; + } +} + + +/* Main driver method for QEq kernel + * + * Rough outline: + * 1) init / setup routines + * 2) perform 2 linear solves + * 3) compute atomic charges based on output of 2) + */ +void QEq( reax_system * const system, control_params * const control, simulation_data * const data, + static_storage * const workspace, const list * const far_nbrs, + const output_controls * const out_control ) +{ + int iters; + + Init_MatVec( system, control, data, workspace, far_nbrs ); + + switch ( control->qeq_solver_type ) + { + case GMRES_S: + iters = GMRES( workspace, control, data, workspace->H, workspace->b_s, control->qeq_solver_q_err, + &workspace->s[index_wkspace_sys(0,0,system->N)], out_control->log, + ((data->step - data->prev_steps) % control->pre_comp_refactor == 0) ? TRUE : FALSE ); + iters += GMRES( workspace, control, data, workspace->H, workspace->b_t, control->qeq_solver_q_err, + &workspace->t[index_wkspace_sys(0,0,system->N)], out_control->log, FALSE ); + break; + case GMRES_H_S: + iters = GMRES_HouseHolder( workspace, control, data, workspace->H, workspace->b_s, control->qeq_solver_q_err, + &workspace->s[index_wkspace_sys(0,0,system->N)], out_control->log, (data->step - data->prev_steps) % control->pre_comp_refactor == 0 ); + iters += GMRES_HouseHolder( workspace, control, data, workspace->H, workspace->b_t, control->qeq_solver_q_err, + &workspace->t[index_wkspace_sys(0,0,system->N)], out_control->log, 0 ); + break; + case CG_S: + iters = CG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, + &workspace->s[index_wkspace_sys(0,0,system->N)], out_control->log ) + 1; + iters += CG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, + &workspace->t[index_wkspace_sys(0,0,system->N)], out_control->log ) + 1; + break; + case SDM_S: + iters = SDM( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, + &workspace->s[index_wkspace_sys(0,0,system->N)], out_control->log ) + 1; + iters += SDM( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, + &workspace->t[index_wkspace_sys(0,0,system->N)], out_control->log ) + 1; + break; + default: + fprintf( stderr, "Unrecognized QEq solver selection. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + data->timing.solver_iters += iters; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "linsolve-" ); +#endif + + Calculate_Charges( system, workspace ); +} diff --git a/PuReMD-GPU/src/qeq.h b/PuReMD-GPU/src/qeq.h new file mode 100644 index 0000000000000000000000000000000000000000..4c6c7ea2ce396f5f3cd9a538b801f2658199b7d9 --- /dev/null +++ b/PuReMD-GPU/src/qeq.h @@ -0,0 +1,73 @@ +/*---------------------------------------------------------------------- + PuReMD-GPU - Reax Force Field Simulator + + Copyright (2014) Purdue University + Sudhir Kylasa, skylasa@purdue.edu + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __QEq_H_ +#define __QEq_H_ + +#include "mytypes.h" + + +void QEq( reax_system* const, control_params* const, simulation_data* const, + static_storage* const, const list* const, + const output_controls* const ); + + +//static inline HOST_DEVICE void swap( sparse_matrix_entry *array, +// int index1, int index2 ) +//{ +// sparse_matrix_entry temp = array[index1]; +// array[index1] = array[index2]; +// array[index2] = temp; +//} +// +// +//static inline HOST_DEVICE void quick_sort( sparse_matrix_entry *array, +// int start, int end ) +//{ +// int i = start; +// int k = end; +// +// if (end - start >= 1) +// { +// int pivot = array[start].j; +// +// while (k > i) +// { +// while ((array[i].j <= pivot) && (i <= end) && (k > i)) +// { +// i++; +// } +// while ((array[k].j > pivot) && (k >= start) && (k >= i)) +// { +// k--; +// } +// if (k > i) +// { +// swap( array, i, k ); +// } +// } +// swap( array, start, k ); +// quick_sort( array, start, k - 1 ); +// quick_sort( array, k + 1, end ); +// } +//} + + +#endif diff --git a/PuReMD-GPU/src/random.h b/PuReMD-GPU/src/random.h index b19bc58e3dcef04a324b108be718bfbff3e5c06c..a936477278d06a989d50ab0faeafb8a737a4e5fd 100644 --- a/PuReMD-GPU/src/random.h +++ b/PuReMD-GPU/src/random.h @@ -58,7 +58,7 @@ static inline HOST_DEVICE double GRandom(double mean, double sigma) rsq = v1 * v1 + v2 * v2; } - return mean + v1 * sigma * sqrt(-2.0 * log(rsq) / rsq); + return mean + v1 * sigma * SQRT(-2.0 * LOG(rsq) / rsq); } diff --git a/PuReMD-GPU/src/reset_utils.c b/PuReMD-GPU/src/reset_utils.c index f79596aa9d29a65f673448d18a28c73c00444e43..ecb921bb00255081bec5470baaab070df8cb80ef 100644 --- a/PuReMD-GPU/src/reset_utils.c +++ b/PuReMD-GPU/src/reset_utils.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -28,18 +29,20 @@ void Reset_Atoms( reax_system* system ) { int i; - for( i = 0; i < system->N; ++i ) - memset( system->atoms[i].f, 0.0, RVEC_SIZE ); + for ( i = 0; i < system->N; ++i ) + { + memset( system->atoms[i].f, 0.0, sizeof(rvec) ); + } } void Reset_Pressures( simulation_data *data ) { - rtensor_MakeZero( data->flex_bar.P ); + rtensor_MakeZero( data->flex_bar.P ); data->iso_bar.P = 0; rvec_MakeZero( data->int_press ); rvec_MakeZero( data->ext_press ); - /* fprintf( stderr, "reset: ext_press (%12.6f %12.6f %12.6f)\n", + /* fprintf( stderr, "reset: ext_press (%12.6f %12.6f %12.6f)\n", data->ext_press[0], data->ext_press[1], data->ext_press[2] ); */ } @@ -97,49 +100,57 @@ void Reset_Workspace( reax_system *system, static_storage *workspace ) } -void Reset_Neighbor_Lists( reax_system *system, control_params *control, +void Reset_Neighbor_Lists( reax_system *system, control_params *control, static_storage *workspace, list **lists ) { int i, tmp; list *bonds = (*lists) + BONDS; list *hbonds = (*lists) + HBONDS; - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { tmp = Start_Index( i, bonds ); Set_End_Index( i, tmp, bonds ); } - //TODO check if this is needed - memset (bonds->select.bond_list, 0, BOND_DATA_SIZE * bonds->num_intrs ); + //TODO: added for GPU, verify if correct + memset( bonds->select.bond_list, 0, BOND_DATA_SIZE * bonds->num_intrs ); - if( control->hb_cut > 0 ) - for( i = 0; i < system->N; ++i ) - if( system->reaxprm.sbp[system->atoms[i].type].p_hbond == 1) { + if ( control->hb_cut > 0 ) + { + for ( i = 0; i < system->N; ++i ) + { + if ( system->reaxprm.sbp[system->atoms[i].type].p_hbond == 1) + { tmp = Start_Index( workspace->hbond_index[i], hbonds ); Set_End_Index( workspace->hbond_index[i], tmp, hbonds ); - /* fprintf( stderr, "i:%d, hbond: %d-%d\n", - i, Start_Index( workspace->hbond_index[i], hbonds ), + /* fprintf( stderr, "i:%d, hbond: %d-%d\n", + i, Start_Index( workspace->hbond_index[i], hbonds ), End_Index( workspace->hbond_index[i], hbonds ) );*/ } + } + } } -void Reset( reax_system *system, control_params *control, +void Reset( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, list **lists ) { Reset_Atoms( system ); Reset_Simulation_Data( data ); - if( control->ensemble == NPT || control->ensemble == sNPT || + if ( control->ensemble == NPT || control->ensemble == sNPT || control->ensemble == iNPT ) + { Reset_Pressures( data ); + } - Reset_Workspace( system, workspace ); + Reset_Workspace( system, workspace ); Reset_Neighbor_Lists( system, control, workspace, lists ); -#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "reset - "); #endif } @@ -147,16 +158,18 @@ void Reset( reax_system *system, control_params *control, void Reset_Grid( grid *g ) { - memset (g->top, 0, INT_SIZE * g->ncell[0]*g->ncell[1]*g->ncell[2]); + memset( g->top, 0, INT_SIZE * g->ncell[0]*g->ncell[1]*g->ncell[2] ); } + void Reset_Marks( grid *g, ivec *grid_stack, int grid_top ) { int i; - for( i = 0; i < grid_top; ++i ) - g->mark[grid_stack[i][0] * g->ncell[1]*g->ncell[2] + - grid_stack[i][1] * g->ncell[2] + - grid_stack[i][2]] = 0; + for ( i = 0; i < grid_top; ++i ) + { + g->mark[grid_stack[i][0] * g->ncell[1]*g->ncell[2] + + grid_stack[i][1] * g->ncell[2] + grid_stack[i][2]] = 0; + } } diff --git a/PuReMD-GPU/src/restart.c b/PuReMD-GPU/src/restart.c index b6ccb014d91ad33cd337d7688345d06811b2c681..13abdecc8142c5f40b942b79c2e886246372576c 100644 --- a/PuReMD-GPU/src/restart.c +++ b/PuReMD-GPU/src/restart.c @@ -1,9 +1,10 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or @@ -19,11 +20,12 @@ ----------------------------------------------------------------------*/ #include "restart.h" + #include "box.h" #include "vector.h" void Write_Binary_Restart( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace ) + simulation_data *data, static_storage *workspace ) { int i; char fname[MAX_STR]; @@ -65,8 +67,8 @@ void Write_Binary_Restart( reax_system *system, control_params *control, void Read_Binary_Restart( char *fname, reax_system *system, - control_params *control, simulation_data *data, - static_storage *workspace ) + control_params *control, simulation_data *data, + static_storage *workspace ) { int i; FILE *fres; @@ -103,14 +105,13 @@ void Read_Binary_Restart( char *fname, reax_system *system, workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); for ( i = 0; i < MAX_ATOM_ID; ++i ) + { workspace->map_serials[i] = -1; + } workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); workspace->restricted = (int*) calloc( system->N, sizeof(int) ); workspace->restricted_list = (int*) calloc( system->N * MAX_RESTRICT, sizeof(int) ); - //CHANGE - //for( i = 0; i < system->N; ++i ) - // workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); for ( i = 0; i < system->N; ++i ) { @@ -175,8 +176,7 @@ void Write_ASCII_Restart( reax_system *system, control_params *control, void Read_ASCII_Restart( char *fname, reax_system *system, - control_params *control, simulation_data *data, - static_storage *workspace ) + control_params *control, simulation_data *data, static_storage *workspace ) { int i; FILE *fres; @@ -185,8 +185,7 @@ void Read_ASCII_Restart( char *fname, reax_system *system, fres = fopen( fname, "r" ); /* header */ - //fscanf( fres, READ_RESTART_HEADER, - fscanf( fres, RESTART_HEADER, + fscanf( fres, READ_RESTART_HEADER, &data->prev_steps, &system->N, &data->therm.T, &data->therm.xi, &data->therm.v_xi, &data->therm.v_xi_old, &data->therm.G_xi, &system->box.box[0][0], &system->box.box[0][1], &system->box.box[0][2], @@ -194,7 +193,7 @@ void Read_ASCII_Restart( char *fname, reax_system *system, &system->box.box[2][0], &system->box.box[2][1], &system->box.box[2][2]); Make_Consistent( &(system->box) ); -//#if defined(DEBUG_FOCUS) +#if defined(DEBUG_FOCUS) fprintf( stderr, "restart step: %d\n", data->prev_steps ); fprintf( stderr, "restart thermostat: %10.6f %10.6f %10.6f %10.6f %10.6f\n", data->therm.T, data->therm.xi, @@ -204,22 +203,20 @@ void Read_ASCII_Restart( char *fname, reax_system *system, system->box.box[0][0], system->box.box[0][1], system->box.box[0][2], system->box.box[1][0], system->box.box[1][1], system->box.box[1][2], system->box.box[2][0], system->box.box[2][1], system->box.box[2][2] ); - fprintf ( stderr, "Total Atoms read: %d \n", system->N); -//#endif +#endif /* memory allocations for atoms, atom maps, bond restrictions */ system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); for ( i = 0; i < MAX_ATOM_ID; ++i ) + { workspace->map_serials[i] = -1; + } workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); workspace->restricted = (int*) calloc( system->N, sizeof(int) ); workspace->restricted_list = (int*) calloc( system->N * MAX_RESTRICT, sizeof(int) ); - //CHANGE - //for( i = 0; i < system->N; ++i ) - // workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); for ( i = 0; i < system->N; ++i ) { @@ -240,11 +237,15 @@ void Read_ASCII_Restart( char *fname, reax_system *system, void Write_Restart( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - output_controls *out_control ) + simulation_data *data, static_storage *workspace, output_controls + *out_control ) { if ( out_control->restart_format == WRITE_ASCII ) + { Write_ASCII_Restart( system, control, data, workspace ); + } else if ( out_control->restart_format == WRITE_BINARY ) + { Write_Binary_Restart( system, control, data, workspace ); + } } diff --git a/PuReMD-GPU/src/single_body_interactions.c b/PuReMD-GPU/src/single_body_interactions.c index b26f493e703819f066389991a4845acab113b326..4c5824dd9862770863aa3e3299ca4f1f691c561e 100644 --- a/PuReMD-GPU/src/single_body_interactions.c +++ b/PuReMD-GPU/src/single_body_interactions.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -27,11 +28,8 @@ #include "vector.h" -void LonePair_OverUnder_Coordination_Energy( reax_system *system, - control_params *control, - simulation_data *data, - static_storage *workspace, - list **lists, +void LonePair_OverUnder_Coordination_Energy( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control ) { int i, j, pj, type_i, type_j; @@ -49,7 +47,7 @@ void LonePair_OverUnder_Coordination_Energy( reax_system *system, single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; bond_data *pbond; - bond_order_data *bo_ij; + bond_order_data *bo_ij; list *bonds = (*lists) + BONDS; /* Initialize parameters */ @@ -61,64 +59,71 @@ void LonePair_OverUnder_Coordination_Energy( reax_system *system, p_ovun7 = system->reaxprm.gp.l[8]; p_ovun8 = system->reaxprm.gp.l[9]; - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { /* set the parameter pointer */ type_i = system->atoms[i].type; sbp_i = &(system->reaxprm.sbp[ type_i ]); /* lone-pair Energy */ - p_lp2 = sbp_i->p_lp2; + p_lp2 = sbp_i->p_lp2; expvd2 = EXP( -75 * workspace->Delta_lp[i] ); inv_expvd2 = 1. / (1. + expvd2 ); /* calculate the energy */ - data->E_Lp += e_lp = - p_lp2 * workspace->Delta_lp[i] * inv_expvd2; + data->E_Lp += e_lp = p_lp2 * workspace->Delta_lp[i] * inv_expvd2; - dElp = p_lp2 * inv_expvd2 + - 75 * p_lp2 * workspace->Delta_lp[i] * expvd2 * SQR(inv_expvd2); + dElp = p_lp2 * inv_expvd2 + + 75 * p_lp2 * workspace->Delta_lp[i] * expvd2 * SQR(inv_expvd2); CElp = dElp * workspace->dDelta_lp[i]; workspace->CdDelta[i] += CElp; // lp - 1st term #ifdef TEST_ENERGY - fprintf( out_control->elp, "%23.15e%23.15e%23.15e%23.15e\n", - p_lp2, workspace->Delta_lp_temp[i], expvd2, dElp ); + fprintf( out_control->elp, "%23.15e%23.15e%23.15e%23.15e\n", + p_lp2, workspace->Delta_lp_temp[i], expvd2, dElp ); fprintf( out_control->elp, "%6d%23.15e%23.15e%23.15e\n", - workspace->orig_id[i]+1, workspace->nlp[i], e_lp, data->E_Lp ); + workspace->orig_id[i] + 1, workspace->nlp[i], e_lp, data->E_Lp ); #endif + #ifdef TEST_FORCES Add_dDelta( system, lists, i, CElp, workspace->f_lp ); // lp - 1st term #endif /* correction for C2 */ - if( system->reaxprm.gp.l[5] > 0.001 && + if ( system->reaxprm.gp.l[5] > 0.001 && !strcmp( system->reaxprm.sbp[type_i].name, "C" ) ) - for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) - if( i < bonds->select.bond_list[pj].nbr ) { + { + for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) + { + if ( i < bonds->select.bond_list[pj].nbr ) + { j = bonds->select.bond_list[pj].nbr; type_j = system->atoms[j].type; - if( !strcmp( system->reaxprm.sbp[type_j].name, "C" ) ) { + if ( !strcmp( system->reaxprm.sbp[type_j].name, "C" ) ) + { twbp = &( system->reaxprm.tbp[ index_tbp(type_i,type_j,system->reaxprm.num_atom_types) ]); bo_ij = &( bonds->select.bond_list[pj].bo_data ); Di = workspace->Delta[i]; - vov3 = bo_ij->BO - Di - 0.040*POW(Di, 4.); + vov3 = bo_ij->BO - Di - 0.040 * POW(Di, 4.); - if( vov3 > 3. ) { - data->E_Lp += e_lph = p_lp3 * SQR(vov3-3.0); + if ( vov3 > 3. ) + { + data->E_Lp += e_lph = p_lp3 * SQR(vov3 - 3.0); //estrain(i) += e_lph; - deahu2dbo = 2.*p_lp3*(vov3 - 3.); - deahu2dsbo = 2.*p_lp3*(vov3 - 3.)*(-1. - 0.16*POW(Di, 3.)); + deahu2dbo = 2.*p_lp3 * (vov3 - 3.); + deahu2dsbo = 2.*p_lp3 * (vov3 - 3.) * (-1. - 0.16 * POW(Di, 3.)); bo_ij->Cdbo += deahu2dbo; workspace->CdDelta[i] += deahu2dsbo; #ifdef TEST_ENERGY - fprintf(out_control->elp,"C2cor%6d%6d%23.15e%23.15e%23.15e\n", + fprintf(out_control->elp, "C2cor%6d%6d%23.15e%23.15e%23.15e\n", // workspace->orig_id[i], workspace->orig_id[j], - i+1, j+1, e_lph, deahu2dbo, deahu2dsbo ); + i + 1, j + 1, e_lph, deahu2dbo, deahu2dsbo ); #endif + #ifdef TEST_FORCES Add_dBO(system, lists, i, pj, deahu2dbo, workspace->f_lp); Add_dDelta(system, lists, i, deahu2dsbo, workspace->f_lp); @@ -127,44 +132,52 @@ void LonePair_OverUnder_Coordination_Energy( reax_system *system, } } + } + } } - - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { type_i = system->atoms[i].type; sbp_i = &(system->reaxprm.sbp[ type_i ]); /* over-coordination energy */ - if( sbp_i->mass > 21.0 ) + if ( sbp_i->mass > 21.0 ) + { dfvl = 0.0; - else dfvl = 1.0; // only for 1st-row elements + } + else + { + dfvl = 1.0; // only for 1st-row elements + } p_ovun2 = sbp_i->p_ovun2; sum_ovun1 = 0; sum_ovun2 = 0; - for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { + for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) + { j = bonds->select.bond_list[pj].nbr; - type_j = system->atoms[j].type; + type_j = system->atoms[j].type; bo_ij = &(bonds->select.bond_list[pj].bo_data); sbp_j = &(system->reaxprm.sbp[ type_j ]); twbp = &(system->reaxprm.tbp[ index_tbp(type_i,type_j,system->reaxprm.num_atom_types) ]); sum_ovun1 += twbp->p_ovun1 * twbp->De_s * bo_ij->BO; - sum_ovun2 += (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j])* - ( bo_ij->BO_pi + bo_ij->BO_pi2 ); - - /*fprintf( stdout, "%4d%4d%23.15e%23.15e%23.15e\n", - i+1, j+1, - dfvl * workspace->Delta_lp_temp[j], - sbp_j->nlp_opt, - workspace->nlp_temp[j] );*/ + sum_ovun2 += (workspace->Delta[j] - dfvl * workspace->Delta_lp_temp[j]) * + ( bo_ij->BO_pi + bo_ij->BO_pi2 ); + + /*fprintf( stdout, "%4d%4d%23.15e%23.15e%23.15e\n", + i+1, j+1, + dfvl * workspace->Delta_lp_temp[j], + sbp_j->nlp_opt, + workspace->nlp_temp[j] );*/ } exp_ovun1 = p_ovun3 * EXP( p_ovun4 * sum_ovun2 ); inv_exp_ovun1 = 1.0 / (1 + exp_ovun1); - Delta_lpcorr = workspace->Delta[i] - - (dfvl*workspace->Delta_lp_temp[i]) * inv_exp_ovun1; + Delta_lpcorr = workspace->Delta[i] - + (dfvl * workspace->Delta_lp_temp[i]) * inv_exp_ovun1; exp_ovun2 = EXP( p_ovun2 * Delta_lpcorr ); inv_exp_ovun2 = 1.0 / (1.0 + exp_ovun2); @@ -175,11 +188,11 @@ void LonePair_OverUnder_Coordination_Energy( reax_system *system, data->E_Ov += e_ov = sum_ovun1 * CEover1; CEover2 = sum_ovun1 * DlpVi * inv_exp_ovun2 * - ( 1.0 - Delta_lpcorr*( DlpVi + p_ovun2 * exp_ovun2 * inv_exp_ovun2 ) ); + ( 1.0 - Delta_lpcorr * ( DlpVi + p_ovun2 * exp_ovun2 * inv_exp_ovun2 ) ); - CEover3 = CEover2 * (1.0 - dfvl*workspace->dDelta_lp[i]*inv_exp_ovun1 ); + CEover3 = CEover2 * (1.0 - dfvl * workspace->dDelta_lp[i] * inv_exp_ovun1 ); - CEover4 = CEover2 * (dfvl*workspace->Delta_lp_temp[i]) * + CEover4 = CEover2 * (dfvl * workspace->Delta_lp_temp[i]) * p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1); @@ -193,14 +206,13 @@ void LonePair_OverUnder_Coordination_Energy( reax_system *system, inv_exp_ovun2n = 1.0 / (1.0 + exp_ovun2n); inv_exp_ovun8 = 1.0 / (1.0 + exp_ovun8); - data->E_Un += e_un = - -p_ovun5 * (1.0 - exp_ovun6) * inv_exp_ovun2n * inv_exp_ovun8; + data->E_Un += e_un = -p_ovun5 * (1.0 - exp_ovun6) * inv_exp_ovun2n * inv_exp_ovun8; - CEunder1 = inv_exp_ovun2n * ( p_ovun5*p_ovun6*exp_ovun6*inv_exp_ovun8 + + CEunder1 = inv_exp_ovun2n * ( p_ovun5 * p_ovun6 * exp_ovun6 * inv_exp_ovun8 + p_ovun2 * e_un * exp_ovun2n); CEunder2 = -e_un * p_ovun8 * exp_ovun8 * inv_exp_ovun8; - CEunder3 = CEunder1 * (1.0 - dfvl*workspace->dDelta_lp[i]*inv_exp_ovun1); - CEunder4 = CEunder1 * (dfvl*workspace->Delta_lp_temp[i]) * + CEunder3 = CEunder1 * (1.0 - dfvl * workspace->dDelta_lp[i] * inv_exp_ovun1); + CEunder4 = CEunder1 * (dfvl * workspace->Delta_lp_temp[i]) * p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1) + CEunder2; //fprintf( stdout, "%6d%23.15e%23.15e%23.15e\n", @@ -215,100 +227,98 @@ void LonePair_OverUnder_Coordination_Energy( reax_system *system, Add_dDelta( system, lists, i, CEunder3, workspace->f_un ); // UnCoor - 1st #endif - - for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ){ + for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) + { pbond = &(bonds->select.bond_list[pj]); j = pbond->nbr; type_j = system->atoms[j].type; bo_ij = &(pbond->bo_data); twbp = &(system->reaxprm.tbp[ index_tbp(type_i,type_j,system->reaxprm.num_atom_types) ]); - - bo_ij->Cdbo += CEover1 * twbp->p_ovun1 * twbp->De_s; // OvCoor - 1st - workspace->CdDelta[j] += CEover4*(1.0 - dfvl*workspace->dDelta_lp[j])* - (bo_ij->BO_pi + bo_ij->BO_pi2); // OvCoor - 3a - bo_ij->Cdbopi += CEover4 * - (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]);//OvCoor-3b - bo_ij->Cdbopi2 += CEover4 * - (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]);//OvCoor-3b + bo_ij->Cdbo += CEover1 * twbp->p_ovun1 * twbp->De_s; // OvCoor - 1st + workspace->CdDelta[j] += CEover4 * (1.0 - dfvl * workspace->dDelta_lp[j]) * + (bo_ij->BO_pi + bo_ij->BO_pi2); // OvCoor - 3a + bo_ij->Cdbopi += CEover4 * + (workspace->Delta[j] - dfvl * workspace->Delta_lp_temp[j]); //OvCoor-3b + bo_ij->Cdbopi2 += CEover4 * + (workspace->Delta[j] - dfvl * workspace->Delta_lp_temp[j]); //OvCoor-3b - workspace->CdDelta[j] += CEunder4*(1.0-dfvl*workspace->dDelta_lp[j]) * + workspace->CdDelta[j] += CEunder4 * (1.0 - dfvl * workspace->dDelta_lp[j]) * (bo_ij->BO_pi + bo_ij->BO_pi2); // UnCoor - 2a - bo_ij->Cdbopi += CEunder4 * - (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]);//UnCoor-2b - bo_ij->Cdbopi2 += CEunder4 * - (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]);//UnCoor-2b - + bo_ij->Cdbopi += CEunder4 * + (workspace->Delta[j] - dfvl * workspace->Delta_lp_temp[j]); //UnCoor-2b + bo_ij->Cdbopi2 += CEunder4 * + (workspace->Delta[j] - dfvl * workspace->Delta_lp_temp[j]); //UnCoor-2b #ifdef TEST_ENERGY /* fprintf( out_control->eov, "%6d%23.15e%23.15e" - workspace->orig_id[j]+1, - //twbp->p_ovun1,twbp->De_s,Delta_lpcorr*DlpVi*inv_exp_ovun2, - CEover1*twbp->p_ovun1*twbp->De_s, CEover3 ); */ - - /*fprintf( out_control->eov, "%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", - workspace->orig_id[j]+1, - CEover4, - CEover4* - (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]), - CEover4 * (bo_ij->BO_pi + bo_ij->BO_pi2), - (1.0 - dfvl*workspace->dDelta_lp[j]), - CEover4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * - (bo_ij->BO_pi + bo_ij->BO_pi2) );*/ + workspace->orig_id[j]+1, + //twbp->p_ovun1,twbp->De_s,Delta_lpcorr*DlpVi*inv_exp_ovun2, + CEover1*twbp->p_ovun1*twbp->De_s, CEover3 ); */ + + /*fprintf( out_control->eov, "%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", + workspace->orig_id[j]+1, + CEover4, + CEover4* + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]), + CEover4 * (bo_ij->BO_pi + bo_ij->BO_pi2), + (1.0 - dfvl*workspace->dDelta_lp[j]), + CEover4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * + (bo_ij->BO_pi + bo_ij->BO_pi2) );*/ /* fprintf( out_control->eun, "%6d%23.15e\n", - workspace->orig_id[j]+1, CEunder3 ); */ + workspace->orig_id[j]+1, CEunder3 ); */ /*fprintf( out_control->eun, "%6d%23.15e%23.15e%23.15e%23.15e\n", - workspace->orig_id[j]+1, - CEunder4, - (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]), - CEunder4* - (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]), - CEunder4*(1.0 - dfvl*workspace->dDelta_lp[j])* - (bo_ij->BO_pi + bo_ij->BO_pi2) );*/ + workspace->orig_id[j]+1, + CEunder4, + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]), + CEunder4* + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]), + CEunder4*(1.0 - dfvl*workspace->dDelta_lp[j])* + (bo_ij->BO_pi + bo_ij->BO_pi2) );*/ #endif #ifdef TEST_FORCES - Add_dBO( system, lists, i, pj, CEover1 * twbp->p_ovun1 * twbp->De_s, - workspace->f_ov ); // OvCoor - 1st term + Add_dBO( system, lists, i, pj, CEover1 * twbp->p_ovun1 * twbp->De_s, + workspace->f_ov ); // OvCoor - 1st term Add_dDelta( system, lists, j, - CEover4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * - (bo_ij->BO_pi+bo_ij->BO_pi2), workspace->f_ov );//OvCoor3a + CEover4 * (1.0 - dfvl * workspace->dDelta_lp[j]) * + (bo_ij->BO_pi + bo_ij->BO_pi2), workspace->f_ov ); //OvCoor3a - Add_dBOpinpi2( system, lists, i, pj, - CEover4 * (workspace->Delta[j] - - dfvl * workspace->Delta_lp_temp[j]), - CEover4 * (workspace->Delta[j] - - dfvl * workspace->Delta_lp_temp[j]), - workspace->f_ov, workspace->f_ov ); // OvCoor - 3b + Add_dBOpinpi2( system, lists, i, pj, + CEover4 * (workspace->Delta[j] - + dfvl * workspace->Delta_lp_temp[j]), + CEover4 * (workspace->Delta[j] - + dfvl * workspace->Delta_lp_temp[j]), + workspace->f_ov, workspace->f_ov ); // OvCoor - 3b Add_dDelta( system, lists, j, - CEunder4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * - (bo_ij->BO_pi + bo_ij->BO_pi2), - workspace->f_un ); // UnCoor - 2a - - Add_dBOpinpi2( system, lists, i, pj, - CEunder4 * (workspace->Delta[j] - - dfvl * workspace->Delta_lp_temp[j]), - CEunder4 * (workspace->Delta[j] - - dfvl * workspace->Delta_lp_temp[j]), - workspace->f_un, workspace->f_un ); // UnCoor - 2b + CEunder4 * (1.0 - dfvl * workspace->dDelta_lp[j]) * + (bo_ij->BO_pi + bo_ij->BO_pi2), + workspace->f_un ); // UnCoor - 2a + + Add_dBOpinpi2( system, lists, i, pj, + CEunder4 * (workspace->Delta[j] - + dfvl * workspace->Delta_lp_temp[j]), + CEunder4 * (workspace->Delta[j] - + dfvl * workspace->Delta_lp_temp[j]), + workspace->f_un, workspace->f_un ); // UnCoor - 2b #endif } -#ifdef TEST_ENERGY +#ifdef TEST_ENERGY - fprintf( out_control->eov, "%6d%15.8f%15.8f%15.8f\n", - i+1, DlpVi, Delta_lpcorr, sbp_i->valency ); + fprintf( out_control->eov, "%6d%15.8f%15.8f%15.8f\n", + i + 1, DlpVi, Delta_lpcorr, sbp_i->valency ); - fprintf( out_control->eov, "%6d%15.8f%15.8f\n", - i+1/*workspace->orig_id[i]+1*/, e_ov, data->E_Ov + data->E_Un ); + fprintf( out_control->eov, "%6d%15.8f%15.8f\n", + i + 1/*workspace->orig_id[i]+1*/, e_ov, data->E_Ov + data->E_Un ); - fprintf( out_control->eov, "%6d%15.8f%15.8f\n", - i+1/*workspace->orig_id[i]+1*/, e_un, data->E_Ov + data->E_Un ); + fprintf( out_control->eov, "%6d%15.8f%15.8f\n", + i + 1/*workspace->orig_id[i]+1*/, e_un, data->E_Ov + data->E_Un ); #endif } } diff --git a/PuReMD-GPU/src/system_props.c b/PuReMD-GPU/src/system_props.c index 0126b86b776dce8fd30aea0c228731b95104b216..fc93a474cf378f1a382d0ae017cf15a9b23eb17a 100644 --- a/PuReMD-GPU/src/system_props.c +++ b/PuReMD-GPU/src/system_props.c @@ -1,67 +1,50 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ #include "system_props.h" - -#include "box.h" +#include "tool_box.h" #include "vector.h" -HOST real Get_Time( ) -{ - struct timeval tim; - - gettimeofday(&tim, NULL ); - return( tim.tv_sec + (tim.tv_usec / 1000000.0) ); -} - - -HOST real Get_Timing_Info( real t_start ) -{ - struct timeval tim; - real t_end; - - gettimeofday(&tim, NULL ); - t_end = tim.tv_sec + (tim.tv_usec / 1000000.0); - return (t_end - t_start); -} - - -void Temperature_Control( control_params *control, simulation_data *data, - output_controls *out_control ) +void Temperature_Control( control_params *control, simulation_data *data, + output_controls *out_control ) { real tmp; - if( control->T_mode == 1 ) { // step-wise temperature control - if( (data->step - data->prev_steps) % - ((int)(control->T_freq / control->dt)) == 0 ) { - if( fabs( control->T - control->T_final ) >= fabs( control->T_rate ) ) + if ( control->T_mode == 1 ) // step-wise temperature control + { + if ( (data->step - data->prev_steps) % + ((int)(control->T_freq / control->dt)) == 0 ) + { + if ( fabs( control->T - control->T_final ) >= fabs( control->T_rate ) ) control->T += control->T_rate; - else control->T = control->T_final; + else control->T = control->T_final; } } - else if( control->T_mode == 2 ) { // constant slope control + else if ( control->T_mode == 2 ) // constant slope control + { tmp = control->T_rate * control->dt / control->T_freq; - if( fabs( control->T - control->T_final ) >= fabs( tmp ) ) - control->T += tmp; + if ( fabs( control->T - control->T_final ) >= fabs( tmp ) ) + control->T += tmp; } } @@ -69,39 +52,34 @@ void Temperature_Control( control_params *control, simulation_data *data, void Compute_Total_Mass( reax_system *system, simulation_data *data ) { int i; - int blocks; - int block_size; - real *partial_sums = 0; data->M = 0; - for( i = 0; i < system->N; i++ ) - data->M += system->reaxprm.sbp[ system->atoms[i].type ].mass; + for ( i = 0; i < system->N; i++ ) + data->M += system->reaxprm.sbp[ system->atoms[i].type ].mass; - data->inv_M = 1. / data->M; + //fprintf ( stderr, "Compute_total_Mass -->%f<-- \n", data->M ); + data->inv_M = 1. / data->M; } -void Compute_Center_of_Mass( reax_system *system, simulation_data *data, - FILE *fout ) +void Compute_Center_of_Mass( reax_system *system, simulation_data *data, + FILE *fout ) { int i; real m, xx, xy, xz, yy, yz, zz, det; rvec tvec, diff; rtensor mat, inv; - int blocks; - int block_size; - rvec *l_xcm, *l_vcm, *l_amcm; - real t_start, t_end; - rvec_MakeZero( data->xcm ); // position of CoM rvec_MakeZero( data->vcm ); // velocity of CoM rvec_MakeZero( data->amcm ); // angular momentum of CoM rvec_MakeZero( data->avcm ); // angular velocity of CoM + /* Compute the position, velocity and angular momentum about the CoM */ - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { m = system->reaxprm.sbp[ system->atoms[i].type ].mass; rvec_ScaledAdd( data->xcm, m, system->atoms[i].x ); @@ -111,11 +89,13 @@ void Compute_Center_of_Mass( reax_system *system, simulation_data *data, rvec_ScaledAdd( data->amcm, m, tvec ); /*fprintf( fout,"%3d %g %g %g\n", - i+1, + i+1, system->atoms[i].v[0], system->atoms[i].v[1], system->atoms[i].v[2] ); - fprintf( fout, "vcm: %g %g %g\n", - data->vcm[0], data->vcm[1], data->vcm[2] ); - */ + fprintf( fout, "vcm: %g %g %g\n", + data->vcm[0], data->vcm[1], data->vcm[2] ); + */ + /* fprintf( stderr, "amcm: %12.6f %12.6f %12.6f\n", + data->amcm[0], data->amcm[1], data->amcm[2] ); */ } rvec_Scale( data->xcm, data->inv_M, data->xcm ); @@ -129,7 +109,8 @@ void Compute_Center_of_Mass( reax_system *system, simulation_data *data, /* Calculate and then invert the inertial tensor */ xx = xy = xz = yy = yz = zz = 0; - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { m = system->reaxprm.sbp[ system->atoms[i].type ].mass; rvec_ScaledSum( diff, 1., system->atoms[i].x, -1., data->xcm ); @@ -138,19 +119,10 @@ void Compute_Center_of_Mass( reax_system *system, simulation_data *data, xz += diff[0] * diff[2] * m; yy += diff[1] * diff[1] * m; yz += diff[1] * diff[2] * m; - zz += diff[2] * diff[2] * m; + zz += diff[2] * diff[2] * m; } -#ifdef __DEBUG_CUDA__ - fprintf (stderr, " xx: %f \n", xx); - fprintf (stderr, " xy: %f \n", xy); - fprintf (stderr, " xz: %f \n", xz); - fprintf (stderr, " yy: %f \n", yy); - fprintf (stderr, " yz: %f \n", yz); - fprintf (stderr, " zz: %f \n", zz); -#endif - - mat[0][0] = yy + zz; + mat[0][0] = yy + zz; mat[0][1] = mat[1][0] = -xy; mat[0][2] = mat[2][0] = -xz; mat[1][1] = xx + zz; @@ -158,12 +130,12 @@ void Compute_Center_of_Mass( reax_system *system, simulation_data *data, mat[2][2] = xx + yy; /* invert the inertial tensor */ - det = ( mat[0][0] * mat[1][1] * mat[2][2] + - mat[0][1] * mat[1][2] * mat[2][0] + + det = ( mat[0][0] * mat[1][1] * mat[2][2] + + mat[0][1] * mat[1][2] * mat[2][0] + mat[0][2] * mat[1][0] * mat[2][1] ) - - ( mat[0][0] * mat[1][2] * mat[2][1] + - mat[0][1] * mat[1][0] * mat[2][2] + - mat[0][2] * mat[1][1] * mat[2][0] ); + ( mat[0][0] * mat[1][2] * mat[2][1] + + mat[0][1] * mat[1][0] * mat[2][2] + + mat[0][2] * mat[1][1] * mat[2][0] ); inv[0][0] = mat[1][1] * mat[2][2] - mat[1][2] * mat[2][1]; inv[0][1] = mat[0][2] * mat[2][1] - mat[0][1] * mat[2][2]; @@ -175,33 +147,33 @@ void Compute_Center_of_Mass( reax_system *system, simulation_data *data, inv[2][1] = mat[2][0] * mat[0][1] - mat[0][0] * mat[2][1]; inv[2][2] = mat[0][0] * mat[1][1] - mat[1][0] * mat[0][1]; - if( fabs(det) > ALMOST_ZERO ) - rtensor_Scale( inv, 1./det, inv ); - else + if ( fabs(det) > ALMOST_ZERO ) + rtensor_Scale( inv, 1. / det, inv ); + else rtensor_MakeZero( inv ); /* Compute the angular velocity about the centre of mass */ - rtensor_MatVec( data->avcm, inv, data->amcm ); + rtensor_MatVec( data->avcm, inv, data->amcm ); data->erot_cm = 0.5 * E_CONV * rvec_Dot( data->avcm, data->amcm ); #if defined(DEBUG) - fprintf( stderr, "xcm: %24.15e %24.15e %24.15e\n", - data->xcm[0], data->xcm[1], data->xcm[2] ); - fprintf( stderr, "vcm: %24.15e %24.15e %24.15e\n", - data->vcm[0], data->vcm[1], data->vcm[2] ); - fprintf( stderr, "amcm: %24.15e %24.15e %24.15e\n", - data->amcm[0], data->amcm[1], data->amcm[2] ); + fprintf( stderr, "xcm: %24.15e %24.15e %24.15e\n", + data->xcm[0], data->xcm[1], data->xcm[2] ); + fprintf( stderr, "vcm: %24.15e %24.15e %24.15e\n", + data->vcm[0], data->vcm[1], data->vcm[2] ); + fprintf( stderr, "amcm: %24.15e %24.15e %24.15e\n", + data->amcm[0], data->amcm[1], data->amcm[2] ); /* fprintf( fout, "mat: %f %f %f\n %f %f %f\n %f %f %f\n", - mat[0][0], mat[0][1], mat[0][2], - mat[1][0], mat[1][1], mat[1][2], + mat[0][0], mat[0][1], mat[0][2], + mat[1][0], mat[1][1], mat[1][2], mat[2][0], mat[2][1], mat[2][2] ); fprintf( fout, "inv: %g %g %g\n %g %g %g\n %g %g %g\n", - inv[0][0], inv[0][1], inv[0][2], - inv[1][0], inv[1][1], inv[1][2], + inv[0][0], inv[0][1], inv[0][2], + inv[1][0], inv[1][1], inv[1][2], inv[2][0], inv[2][1], inv[2][2] ); fflush( fout ); */ - fprintf( stderr, "avcm: %24.15e %24.15e %24.15e\n", - data->avcm[0], data->avcm[1], data->avcm[2] ); + fprintf( stderr, "avcm: %24.15e %24.15e %24.15e\n", + data->avcm[0], data->avcm[1], data->avcm[2] ); #endif } @@ -214,7 +186,8 @@ void Compute_Kinetic_Energy( reax_system* system, simulation_data* data ) data->E_Kin = 0.0; - for (i=0; i < system->N; i++) { + for (i = 0; i < system->N; i++) + { m = system->reaxprm.sbp[system->atoms[i].type].mass; rvec_Scale( p, m, system->atoms[i].v ); @@ -232,17 +205,17 @@ void Compute_Kinetic_Energy( reax_system* system, simulation_data* data ) } -/* IMPORTANT: This function assumes that current kinetic energy and - * the center of mass of the system is already computed before. +/* IMPORTANT: This function assumes that current kinetic energy and + * the center of mass of the system is already computed before. * - * IMPORTANT: In Klein's paper, it is stated that a dU/dV term needs - * to be added when there are long-range interactions or long-range + * IMPORTANT: In Klein's paper, it is stated that a dU/dV term needs + * to be added when there are long-range interactions or long-range * corrections to short-range interactions present. - * We may want to add that for more accuracy. + * We may want to add that for more accuracy. */ -void Compute_Pressure_Isotropic( reax_system* system, control_params *control, - simulation_data* data, - output_controls *out_control ) +void Compute_Pressure_Isotropic( reax_system* system, control_params *control, + simulation_data* data, + output_controls *out_control ) { int i; reax_atom *p_atom; @@ -254,8 +227,10 @@ void Compute_Pressure_Isotropic( reax_system* system, control_params *control, rvec_MakeZero( data->int_press ); // 0: both int and ext, 1: ext only, 2: int only - if( control->press_mode == 0 || control->press_mode == 2 ) { - for( i = 0; i < system->N; ++i ) { + if ( control->press_mode == 0 || control->press_mode == 2 ) + { + for ( i = 0; i < system->N; ++i ) + { p_atom = &( system->atoms[i] ); /* transform x into unitbox coordinates */ @@ -265,13 +240,14 @@ void Compute_Pressure_Isotropic( reax_system* system, control_params *control, rvec_Multiply( tmp, p_atom->f, tx ); rvec_Add( data->int_press, tmp ); - if( out_control->debug_level > 0 ) { - fprintf( out_control->prs, "%-8d%8.2f%8.2f%8.2f", - i+1, p_atom->x[0], p_atom->x[1], p_atom->x[2] ); - fprintf( out_control->prs, "%8.2f%8.2f%8.2f", - p_atom->f[0], p_atom->f[1], p_atom->f[2] ); - fprintf( out_control->prs, "%8.2f%8.2f%8.2f\n", - data->int_press[0],data->int_press[1],data->int_press[2]); + if ( out_control->debug_level > 0 ) + { + fprintf( out_control->prs, "%-8d%8.2f%8.2f%8.2f", + i + 1, p_atom->x[0], p_atom->x[1], p_atom->x[2] ); + fprintf( out_control->prs, "%8.2f%8.2f%8.2f", + p_atom->f[0], p_atom->f[1], p_atom->f[2] ); + fprintf( out_control->prs, "%8.2f%8.2f%8.2f\n", + data->int_press[0], data->int_press[1], data->int_press[2]); } } } @@ -279,53 +255,53 @@ void Compute_Pressure_Isotropic( reax_system* system, control_params *control, /* kinetic contribution */ data->kin_press = 2. * (E_CONV * data->E_Kin) / ( 3. * box->volume * P_CONV ); - /* Calculate total pressure in each direction */ - data->tot_press[0] = data->kin_press - - ((data->int_press[0] + data->ext_press[0]) / - (box->box_norms[1] * box->box_norms[2] * P_CONV)); + /* Calculate total pressure in each direction */ + data->tot_press[0] = data->kin_press - + ((data->int_press[0] + data->ext_press[0]) / + (box->box_norms[1] * box->box_norms[2] * P_CONV)); - data->tot_press[1] = data->kin_press - - ((data->int_press[1] + data->ext_press[1])/ - (box->box_norms[0] * box->box_norms[2] * P_CONV)); + data->tot_press[1] = data->kin_press - + ((data->int_press[1] + data->ext_press[1]) / + (box->box_norms[0] * box->box_norms[2] * P_CONV)); - data->tot_press[2] = data->kin_press - - ((data->int_press[2] + data->ext_press[2])/ - (box->box_norms[0] * box->box_norms[1] * P_CONV)); + data->tot_press[2] = data->kin_press - + ((data->int_press[2] + data->ext_press[2]) / + (box->box_norms[0] * box->box_norms[1] * P_CONV)); /* Average pressure for the whole box */ - data->iso_bar.P=(data->tot_press[0]+data->tot_press[1]+data->tot_press[2])/3; + data->iso_bar.P = (data->tot_press[0] + data->tot_press[1] + data->tot_press[2]) / 3; } -void Compute_Pressure_Isotropic_Klein( reax_system* system, - simulation_data* data ) +void Compute_Pressure_Isotropic_Klein( reax_system* system, + simulation_data* data ) { int i; reax_atom *p_atom; rvec dx; - // IMPORTANT: This function assumes that current kinetic energy and + // IMPORTANT: This function assumes that current kinetic energy and // the center of mass of the system is already computed before. data->iso_bar.P = 2.0 * data->E_Kin; - for( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N; ++i ) { p_atom = &( system->atoms[i] ); - rvec_ScaledSum(dx,1.0,p_atom->x,-1.0,data->xcm); + rvec_ScaledSum(dx, 1.0, p_atom->x, -1.0, data->xcm); data->iso_bar.P += ( -F_CONV * rvec_Dot(p_atom->f, dx) ); } data->iso_bar.P /= (3.0 * system->box.volume); - // IMPORTANT: In Klein's paper, it is stated that a dU/dV term needs - // to be added when there are long-range interactions or long-range + // IMPORTANT: In Klein's paper, it is stated that a dU/dV term needs + // to be added when there are long-range interactions or long-range // corrections to short-range interactions present. // We may want to add that for more accuracy. } -void Compute_Pressure( reax_system* system, simulation_data* data, - static_storage *workspace ) +void Compute_Pressure( reax_system* system, simulation_data* data, + static_storage *workspace ) { int i; reax_atom *p_atom; @@ -333,13 +309,14 @@ void Compute_Pressure( reax_system* system, simulation_data* data, rtensor_MakeZero( data->flex_bar.P ); - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { p_atom = &( system->atoms[i] ); // Distance_on_T3_Gen( data->rcm, p_atom->x, &(system->box), &dx ); rvec_OuterProduct( temp, p_atom->v, p_atom->v ); - rtensor_ScaledAdd( data->flex_bar.P, - system->reaxprm.sbp[ p_atom->type ].mass, temp ); - // rvec_OuterProduct(temp, workspace->virial_forces[i], p_atom->x ); + rtensor_ScaledAdd( data->flex_bar.P, + system->reaxprm.sbp[ p_atom->type ].mass, temp ); + // rvec_OuterProduct(temp, workspace->virial_forces[i], p_atom->x ); rtensor_ScaledAdd( data->flex_bar.P, -F_CONV, temp ); } diff --git a/PuReMD-GPU/src/system_props.h b/PuReMD-GPU/src/system_props.h index 874132451d02b2d62d87c82065874f04a35b2d37..e2cc98350167a763ac6acdac5807d710403210bd 100644 --- a/PuReMD-GPU/src/system_props.h +++ b/PuReMD-GPU/src/system_props.h @@ -28,10 +28,6 @@ extern "C" { #endif -real Get_Time( ); - -real Get_Timing_Info( real ); - void Temperature_Control( control_params*, simulation_data*, output_controls* ); void Compute_Total_Mass( reax_system*, simulation_data* ); diff --git a/PuReMD-GPU/src/testmd.c b/PuReMD-GPU/src/testmd.c index 57d8859df4645f982b93803415cb408c57a564f7..b5204950ee733f30cd69c13d28c5cfd6b2200246 100644 --- a/PuReMD-GPU/src/testmd.c +++ b/PuReMD-GPU/src/testmd.c @@ -22,20 +22,23 @@ #include "analyze.h" #include "box.h" +#include "control.h" +#include "ffield.h" #include "forces.h" +#include "geo_tools.h" #include "grid.h" #include "init_md.h" #include "integrate.h" #include "neighbors.h" -#include "param.h" -#include "pdb_tools.h" #include "print_utils.h" #include "reset_utils.h" #include "restart.h" #include "system_props.h" #include "traj.h" +#include "tool_box.h" #include "vector.h" +#ifdef HAVE_CUDA #include "cuda_environment.h" #include "cuda_forces.h" #include "cuda_init_md.h" @@ -43,9 +46,9 @@ #include "cuda_post_evolve.h" #include "cuda_reset_utils.h" #include "cuda_system_props.h" - #ifdef __BUILD_DEBUG__ - #include "validation.h" + #include "cuda_validation.h" +#endif #endif @@ -69,9 +72,12 @@ int BLOCKS, BLOCKS_POW_2, BLOCK_SIZE; int MATVEC_BLOCKS; -void Post_Evolve( reax_system* system, control_params* control, - simulation_data* data, static_storage* workspace, - list** lists, output_controls *out_control ) +static void Post_Evolve( reax_system * const system, + control_params * const control, + simulation_data * const data, + static_storage * const workspace, + list ** const lists, + output_controls * const out_control ) { int i; rvec diff, cross; @@ -110,15 +116,27 @@ void Post_Evolve( reax_system* system, control_params* control, } -void Read_System( char *geof, char *ff, char *ctrlf, - reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - output_controls *out_control ) +void static Read_System( char * const geo_file, + char * const ffield_file, + char * const control_file, + reax_system * const system, + control_params * const control, + simulation_data * const data, + static_storage * const workspace, + output_controls * const out_control ) { FILE *ffield, *ctrl; - ffield = fopen( ff, "r" ); - ctrl = fopen( ctrlf, "r" ); + if ( (ffield = fopen( ffield_file, "r" )) == NULL ) + { + fprintf( stderr, "Error opening the ffield file!\n" ); + exit( FILE_NOT_FOUND ); + } + if ( (ctrl = fopen( control_file, "r" )) == NULL ) + { + fprintf( stderr, "Error opening the ffield file!\n" ); + exit( FILE_NOT_FOUND ); + } /* ffield file */ Read_Force_Field( ffield, &(system->reaxprm) ); @@ -127,32 +145,31 @@ void Read_System( char *geof, char *ff, char *ctrlf, Read_Control_File( ctrl, system, control, out_control ); /* geo file */ - if( control->geo_format == XYZ ) + if( control->geo_format == CUSTOM ) { - fprintf( stderr, "xyz input is not implemented yet\n" ); - exit( 1 ); + Read_Geo( geo_file, system, control, data, workspace ); } else if( control->geo_format == PDB ) { - Read_PDB( geof, system, control, data, workspace ); + Read_PDB( geo_file, system, control, data, workspace ); } else if( control->geo_format == BGF ) { - Read_BGF( geof, system, control, data, workspace ); + Read_BGF( geo_file, system, control, data, workspace ); } else if( control->geo_format == ASCII_RESTART ) { - Read_ASCII_Restart( geof, system, control, data, workspace ); + Read_ASCII_Restart( geo_file, system, control, data, workspace ); control->restart = 1; } else if( control->geo_format == BINARY_RESTART ) { - Read_Binary_Restart( geof, system, control, data, workspace ); + Read_Binary_Restart( geo_file, system, control, data, workspace ); control->restart = 1; } else { fprintf( stderr, "unknown geo file format. terminating!\n" ); - exit( 1 ); + exit( INVALID_GEO ); } #if defined(DEBUG_FOCUS) @@ -172,7 +189,14 @@ void Init_Data_Structures( simulation_data *data ) } -int main( int argc, char* argv[] ) +static void usage(char* argv[]) +{ + fprintf(stderr, "usage: ./%s geometry ffield control\n", argv[0]); +} + + +#ifdef HAVE_CUDA +static void gpu_main( int argc, char* argv[] ) { reax_system system; control_params control; @@ -183,7 +207,6 @@ int main( int argc, char* argv[] ) evolve_function Evolve; evolve_function Cuda_Evolve; int steps; - real t_start, t_elapsed; real *results = NULL; @@ -259,8 +282,8 @@ int main( int argc, char* argv[] ) #ifdef __BUILD_DEBUG__ if( !validate_device (&system, &data, &workspace, &lists) ) { - fprintf (stderr, " Results does not match between Device and host @ step --> %d \n", data.step); - exit (1); + fprintf( stderr, " Results does not match between Device and host @ step --> %d \n", data.step ); + exit( 1 ); } #endif @@ -331,6 +354,92 @@ int main( int argc, char* argv[] ) fprintf( out_control.log, "total: %.2f secs\n", data.timing.elapsed ); Cleanup_Cuda_Environment( ); +} + + +#else +static void cpu_main( int argc, char* argv[] ) +{ + reax_system system; + control_params control; + simulation_data data; + static_storage workspace; + list *lists; + output_controls out_control; + evolve_function Evolve; + int steps; + + if ( argc != 4 ) + { + usage(argv); + exit( INVALID_INPUT ); + } + + lists = (list*) malloc( sizeof(list) * LIST_N ); + + Read_System( argv[1], argv[2], argv[3], &system, &control, + &data, &workspace, &out_control ); + + Initialize( &system, &control, &data, &workspace, &lists, + &out_control, &Evolve ); + + /* compute f_0 */ + //if( control.restart == 0 ) { + Reset( &system, &control, &data, &workspace, &lists ); + Generate_Neighbor_Lists( &system, &control, &data, &workspace, + &lists, &out_control ); + + //fprintf( stderr, "total: %.2f secs\n", data.timing.nbrs); + Compute_Forces(&system, &control, &data, &workspace, &lists, &out_control); + Compute_Kinetic_Energy( &system, &data ); + Output_Results(&system, &control, &data, &workspace, &lists, &out_control); + ++data.step; + //} + + + for ( ; data.step <= control.nsteps; data.step++ ) + { + if ( control.T_mode ) + { + Temperature_Control( &control, &data, &out_control ); + } + Evolve( &system, &control, &data, &workspace, &lists, &out_control ); + Post_Evolve( &system, &control, &data, &workspace, &lists, &out_control ); + Output_Results(&system, &control, &data, &workspace, &lists, &out_control); + Analysis( &system, &control, &data, &workspace, &lists, &out_control ); + + steps = data.step - data.prev_steps; + if ( steps && out_control.restart_freq && + steps % out_control.restart_freq == 0 ) + Write_Restart( &system, &control, &data, &workspace, &out_control ); + } + + if ( out_control.write_steps > 0 ) + { + fclose( out_control.trj ); + Write_PDB( &system, &(lists[BONDS]), &data, &control, &workspace, &out_control ); + } + + data.timing.end = Get_Time( ); + data.timing.elapsed = Get_Timing_Info( data.timing.start ); + fprintf( out_control.log, "total: %.2f secs\n", data.timing.elapsed ); +} +#endif + + +int main( int argc, char* argv[] ) +{ + if ( argc != 4 ) + { + usage(argv); + exit( INVALID_INPUT ); + } + +#ifdef HAVE_CUDA + gpu_main( argc, argv ); +#else + cpu_main( argc, argv ); +#endif - return 0; + return SUCCESS; } diff --git a/PuReMD-GPU/src/three_body_interactions.c b/PuReMD-GPU/src/three_body_interactions.c index 7ac96e057c6c799ba88204f3f6339fe54b3c61da..f128d2a2749ead3d5b9a08e47f45c0538255caac 100644 --- a/PuReMD-GPU/src/three_body_interactions.c +++ b/PuReMD-GPU/src/three_body_interactions.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -21,59 +22,63 @@ #include "three_body_interactions.h" #include "bond_orders.h" +#include "index_utils.h" #include "list.h" #include "lookup.h" #include "vector.h" -#include "index_utils.h" /* calculates the theta angle between i-j-k */ -void Calculate_Theta( rvec dvec_ji, real d_ji, rvec dvec_jk, real d_jk, +void Calculate_Theta( rvec dvec_ji, real d_ji, rvec dvec_jk, real d_jk, real *theta, real *cos_theta ) { - (*cos_theta) = Dot( dvec_ji, dvec_jk, 3 ) / ( d_ji * d_jk ); - if( *cos_theta > 1. ) *cos_theta = 1.0; - if( *cos_theta < -1. ) *cos_theta = -1.0; + (*cos_theta) = rvec_Dot( dvec_ji, dvec_jk ) / ( d_ji * d_jk ); + if ( *cos_theta > 1. ) + { + *cos_theta = 1.0; + } + if ( *cos_theta < -1. ) + { + *cos_theta = -1.0; + } (*theta) = ACOS( *cos_theta ); } /* calculates the derivative of the cosine of the angle between i-j-k */ -void Calculate_dCos_Theta( rvec dvec_ji, real d_ji, rvec dvec_jk, real d_jk, - rvec* dcos_theta_di, rvec* dcos_theta_dj, - rvec* dcos_theta_dk ) +void Calculate_dCos_Theta( rvec dvec_ji, real d_ji, rvec dvec_jk, real d_jk, + rvec* dcos_theta_di, rvec* dcos_theta_dj, rvec* dcos_theta_dk ) { int t; real sqr_d_ji = SQR(d_ji); real sqr_d_jk = SQR(d_jk); real inv_dists = 1.0 / (d_ji * d_jk); real inv_dists3 = POW( inv_dists, 3 ); - real dot_dvecs = Dot( dvec_ji, dvec_jk, 3 ); + real dot_dvecs = rvec_Dot( dvec_ji, dvec_jk ); real Cdot_inv3 = dot_dvecs * inv_dists3; - for( t = 0; t < 3; ++t ) { - (*dcos_theta_di)[t] = dvec_jk[t] * inv_dists - - Cdot_inv3 * sqr_d_jk * dvec_ji[t]; + for ( t = 0; t < 3; ++t ) + { + (*dcos_theta_di)[t] = dvec_jk[t] * inv_dists - Cdot_inv3 * sqr_d_jk * dvec_ji[t]; - (*dcos_theta_dj)[t] = -(dvec_jk[t] + dvec_ji[t]) * inv_dists + - Cdot_inv3 * ( sqr_d_jk * dvec_ji[t] + sqr_d_ji * dvec_jk[t] ); + (*dcos_theta_dj)[t] = -(dvec_jk[t] + dvec_ji[t]) * inv_dists + + Cdot_inv3 * ( sqr_d_jk * dvec_ji[t] + sqr_d_ji * dvec_jk[t] ); - (*dcos_theta_dk)[t] = dvec_ji[t] * inv_dists - - Cdot_inv3 * sqr_d_ji * dvec_jk[t]; + (*dcos_theta_dk)[t] = dvec_ji[t] * inv_dists - Cdot_inv3 * sqr_d_ji * dvec_jk[t]; } - /*fprintf( stderr, + /*fprintf( stderr, "%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e\n", dvec_jk[t] * inv_dists*/ } -/* this is a 3-body interaction in which the main role is +/* this is a 3-body interaction in which the main role is played by j which sits in the middle of the other two. */ -void Three_Body_Interactions( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) +void Three_Body_Interactions( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, list **lists, + output_controls *out_control ) { int i, j, pi, k, pk, t; int type_i, type_j, type_k; @@ -123,7 +128,8 @@ void Three_Body_Interactions( reax_system *system, control_params *control, p_val10 = system->reaxprm.gp.l[17]; num_thb_intrs = 0; - for( j = 0; j < system->N; ++j ) { + for ( j = 0; j < system->N; ++j ) + { // fprintf( out_control->eval, "j: %d\n", j ); type_j = system->atoms[j].type; start_j = Start_Index(j, bonds); @@ -133,21 +139,24 @@ void Three_Body_Interactions( reax_system *system, control_params *control, p_val5 = system->reaxprm.sbp[ type_j ].p_val5; SBOp = 0, prod_SBO = 1; - for( t = start_j; t < end_j; ++t ) { + for ( t = start_j; t < end_j; ++t ) + { bo_jt = &(bond_list[t].bo_data); SBOp += (bo_jt->BO_pi + bo_jt->BO_pi2); temp = SQR( bo_jt->BO ); - temp *= temp; + temp *= temp; temp *= temp; prod_SBO *= EXP( -temp ); } /* modifications to match Adri's code - 09/01/09 */ - if( workspace->vlpex[j] >= 0 ){ + if ( workspace->vlpex[j] >= 0 ) + { vlpadj = 0; dSBO2 = prod_SBO - 1; } - else{ + else + { vlpadj = workspace->nlp[j]; dSBO2 = (prod_SBO - 1) * (1 - p_val8 * workspace->dDelta_lp[j]); } @@ -155,65 +164,67 @@ void Three_Body_Interactions( reax_system *system, control_params *control, SBO = SBOp + (1 - prod_SBO) * (-workspace->Delta_boc[j] - p_val8 * vlpadj); dSBO1 = -8 * prod_SBO * ( workspace->Delta_boc[j] + p_val8 * vlpadj ); - if( SBO <= 0 ) + if ( SBO <= 0 ) SBO2 = 0, CSBO2 = 0; - else if( SBO > 0 && SBO <= 1 ) { + else if ( SBO > 0 && SBO <= 1 ) + { SBO2 = POW( SBO, p_val9 ); CSBO2 = p_val9 * POW( SBO, p_val9 - 1 ); } - else if( SBO > 1 && SBO < 2 ) { - SBO2 = 2 - POW( 2-SBO, p_val9 ); + else if ( SBO > 1 && SBO < 2 ) + { + SBO2 = 2 - POW( 2 - SBO, p_val9 ); CSBO2 = p_val9 * POW( 2 - SBO, p_val9 - 1 ); } - else - SBO2 = 2, CSBO2 = 0; + else + SBO2 = 2, CSBO2 = 0; expval6 = EXP( p_val6 * workspace->Delta_boc[j] ); - /* unlike 2-body intrs where we enforce i<j, we cannot put any such - restrictions here. such a restriction would prevent us from producing + /* unlike 2-body intrs where we enforce i<j, we cannot put any such + restrictions here. such a restriction would prevent us from producing all 4-body intrs correctly */ - for( pi = start_j; pi < end_j; ++pi ) { + for ( pi = start_j; pi < end_j; ++pi ) + { Set_Start_Index( pi, num_thb_intrs, thb_intrs ); - pbond_ij = &(bond_list[pi]); bo_ij = &(pbond_ij->bo_data); BOA_ij = bo_ij->BO - control->thb_cut; - if( BOA_ij/*bo_ij->BO*/ > (real) 0.0 ) { + if ( BOA_ij/*bo_ij->BO*/ > 0.0 ) + { i = pbond_ij->nbr; - r_ij = pbond_ij->d; + r_ij = pbond_ij->d; type_i = system->atoms[i].type; // fprintf( out_control->eval, "i: %d\n", i ); /* first copy 3-body intrs from previously computed ones where i>k. - IMPORTANT: if it is less costly to compute theta and its - derivative, we should definitely re-compute them, + IMPORTANT: if it is less costly to compute theta and its + derivative, we should definitely re-compute them, instead of copying! - in the second for-loop below, we compute only new 3-body intrs + in the second for-loop below, we compute only new 3-body intrs where i < k */ - for( pk = start_j; pk < pi; ++pk ) { + for ( pk = start_j; pk < pi; ++pk ) + { // fprintf( out_control->eval, "pk: %d\n", pk ); start_pk = Start_Index( pk, thb_intrs ); end_pk = End_Index( pk, thb_intrs ); - for( t = start_pk; t < end_pk; ++t ) - if( thb_list[t].thb == i ) { + for ( t = start_pk; t < end_pk; ++t ) + if ( thb_list[t].thb == i ) + { p_ijk = &(thb_list[num_thb_intrs]); p_kji = &(thb_list[t]); p_ijk->thb = bond_list[pk].nbr; p_ijk->pthb = pk; - p_ijk->theta = p_kji->theta; + p_ijk->theta = p_kji->theta; rvec_Copy( p_ijk->dcos_di, p_kji->dcos_dk ); rvec_Copy( p_ijk->dcos_dj, p_kji->dcos_dj ); rvec_Copy( p_ijk->dcos_dk, p_kji->dcos_di ); - //if (j == 12) - //fprintf (stderr, "Adding one for matched atom %d \n", i); - ++num_thb_intrs; break; } @@ -221,7 +232,8 @@ void Three_Body_Interactions( reax_system *system, control_params *control, /* and this is the second for loop mentioned above */ - for( pk = pi+1; pk < end_j; ++pk ) { + for ( pk = pi + 1; pk < end_j; ++pk ) + { pbond_jk = &(bond_list[pk]); bo_jk = &(pbond_jk->bo_data); BOA_jk = bo_jk->BO - control->thb_cut; @@ -229,53 +241,55 @@ void Three_Body_Interactions( reax_system *system, control_params *control, type_k = system->atoms[k].type; p_ijk = &( thb_list[num_thb_intrs] ); - //TODO - CHANGE ORIGINAL + //CHANGE ORIGINAL if (BOA_jk <= 0) continue; + //CHANGE ORIGINAL - Calculate_Theta( pbond_ij->dvec, pbond_ij->d, - pbond_jk->dvec, pbond_jk->d, - &theta, &cos_theta ); - Calculate_dCos_Theta( pbond_ij->dvec, pbond_ij->d, - pbond_jk->dvec, pbond_jk->d, - &(p_ijk->dcos_di), &(p_ijk->dcos_dj), - &(p_ijk->dcos_dk) ); + Calculate_Theta( pbond_ij->dvec, pbond_ij->d, + pbond_jk->dvec, pbond_jk->d, + &theta, &cos_theta ); + + Calculate_dCos_Theta( pbond_ij->dvec, pbond_ij->d, + pbond_jk->dvec, pbond_jk->d, + &(p_ijk->dcos_di), &(p_ijk->dcos_dj), + &(p_ijk->dcos_dk) ); p_ijk->thb = k; p_ijk->pthb = pk; p_ijk->theta = theta; - //if (j == 12) - //fprintf (stderr, "Adding one for the rest %d \n", k); - sin_theta = SIN( theta ); - if( sin_theta < 1.0e-5 ) + if ( sin_theta < 1.0e-5 ) sin_theta = 1.0e-5; ++num_thb_intrs; - if( BOA_jk > 0.0 && - (bo_ij->BO * bo_jk->BO) > SQR(control->thb_cut)/*0*/) { - r_jk = pbond_jk->d; + if ( BOA_jk > 0.0 && + (bo_ij->BO * bo_jk->BO) > SQR(control->thb_cut)/*0*/) + { + r_jk = pbond_jk->d; thbh = &( system->reaxprm.thbp[ index_thbp(type_i,type_j,type_k,system->reaxprm.num_atom_types) ] ); flag = 0; /* if( workspace->orig_id[i] < workspace->orig_id[k] ) - fprintf( stdout, "%6d %6d %6d %7.3f %7.3f %7.3f\n", + fprintf( stdout, "%6d %6d %6d %7.3f %7.3f %7.3f\n", workspace->orig_id[i], workspace->orig_id[j], workspace->orig_id[k], bo_ij->BO, bo_jk->BO, p_ijk->theta ); - else - fprintf( stdout, "%6d %6d %6d %7.3f %7.3f %7.3f\n", + else + fprintf( stdout, "%6d %6d %6d %7.3f %7.3f %7.3f\n", workspace->orig_id[k], workspace->orig_id[j], workspace->orig_id[i], bo_jk->BO, bo_ij->BO, p_ijk->theta ); */ - for( cnt = 0; cnt < thbh->cnt; ++cnt ) { - // fprintf( out_control->eval, + for ( cnt = 0; cnt < thbh->cnt; ++cnt ) + { + // fprintf( out_control->eval, // "%6d%6d%6d -- exists in thbp\n", i+1, j+1, k+1 ); - if( fabs(thbh->prm[cnt].p_val1) > 0.001 ) { + if ( fabs(thbh->prm[cnt].p_val1) > 0.001 ) + { thbp = &( thbh->prm[cnt] ); /* ANGLE ENERGY */ @@ -287,27 +301,27 @@ void Three_Body_Interactions( reax_system *system, control_params *control, exp3ij = EXP( -p_val3 * POW( BOA_ij, p_val4 ) ); f7_ij = 1.0 - exp3ij; - Cf7ij = p_val3 * p_val4 * - POW( BOA_ij, p_val4 - 1.0 ) * exp3ij; + Cf7ij = p_val3 * p_val4 * + POW( BOA_ij, p_val4 - 1.0 ) * exp3ij; exp3jk = EXP( -p_val3 * POW( BOA_jk, p_val4 ) ); f7_jk = 1.0 - exp3jk; - Cf7jk = p_val3 * p_val4 * - POW( BOA_jk, p_val4 - 1.0 ) * exp3jk; + Cf7jk = p_val3 * p_val4 * + POW( BOA_jk, p_val4 - 1.0 ) * exp3jk; expval7 = EXP( -p_val7 * workspace->Delta_boc[j] ); trm8 = 1.0 + expval6 + expval7; f8_Dj = p_val5 - ( (p_val5 - 1.0) * (2.0 + expval6) / trm8 ); Cf8j = ( (1.0 - p_val5) / SQR(trm8) ) * - (p_val6 * expval6 * trm8 - - (2.0 + expval6) * ( p_val6 * expval6 - p_val7 * expval7 )); + (p_val6 * expval6 * trm8 - + (2.0 + expval6) * ( p_val6 * expval6 - p_val7 * expval7 )); - theta_0 = 180.0 - - theta_00 * (1.0 - EXP(-p_val10 * (2.0 - SBO2))); - theta_0 = DEG2RAD( theta_0 ); + theta_0 = 180.0 - + theta_00 * (1.0 - EXP(-p_val10 * (2.0 - SBO2))); + theta_0 = DEG2RAD( theta_0 ); - expval2theta = EXP(-p_val2 * SQR(theta_0-theta)); - if( p_val1 >= 0 ) + expval2theta = EXP(-p_val2 * SQR(theta_0 - theta)); + if ( p_val1 >= 0 ) expval12theta = p_val1 * (1.0 - expval2theta); else // To avoid linear Me-H-Me angles (6/6/06) expval12theta = p_val1 * -expval2theta; @@ -315,11 +329,11 @@ void Three_Body_Interactions( reax_system *system, control_params *control, CEval1 = Cf7ij * f7_jk * f8_Dj * expval12theta; CEval2 = Cf7jk * f7_ij * f8_Dj * expval12theta; CEval3 = Cf8j * f7_ij * f7_jk * expval12theta; - CEval4 = -2.0 * p_val1 * p_val2 * f7_ij * f7_jk * f8_Dj * - expval2theta * (theta_0 - theta); + CEval4 = -2.0 * p_val1 * p_val2 * f7_ij * f7_jk * f8_Dj * + expval2theta * (theta_0 - theta); - Ctheta_0 = p_val10 * DEG2RAD(theta_00) * - exp( -p_val10 * (2.0 - SBO2) ); + Ctheta_0 = p_val10 * DEG2RAD(theta_00) * + exp( -p_val10 * (2.0 - SBO2) ); CEval5 = -CEval4 * Ctheta_0 * CSBO2; CEval6 = CEval5 * dSBO1; @@ -342,13 +356,13 @@ void Three_Body_Interactions( reax_system *system, control_params *control, exp_pen4 = EXP( p_pen4 * workspace->Delta[j] ); trm_pen34 = 1.0 + exp_pen3 + exp_pen4; f9_Dj = ( 2.0 + exp_pen3 ) / trm_pen34; - Cf9j = (-p_pen3 * exp_pen3 * trm_pen34 - + Cf9j = (-p_pen3 * exp_pen3 * trm_pen34 - (2.0 + exp_pen3) * ( -p_pen3 * exp_pen3 + - p_pen4 * exp_pen4 )) / - SQR( trm_pen34 ); + p_pen4 * exp_pen4 )) / + SQR( trm_pen34 ); - data->E_Pen += e_pen = - p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; + data->E_Pen += e_pen = + p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; CEpen1 = e_pen * Cf9j / f9_Dj; temp = -2.0 * p_pen2 * e_pen; @@ -364,66 +378,64 @@ void Three_Body_Interactions( reax_system *system, control_params *control, p_coa4 = system->reaxprm.gp.l[30]; exp_coa2 = EXP( p_coa2 * workspace->Delta_boc[j] ); - data->E_Coa += e_coa = - p_coa1 / (1. + exp_coa2) * - EXP( -p_coa3 * SQR(total_bo[i] - BOA_ij) ) * - EXP( -p_coa3 * SQR(total_bo[k] - BOA_jk) ) * - EXP( -p_coa4 * SQR(BOA_ij - 1.5) ) * - EXP( -p_coa4 * SQR(BOA_jk - 1.5) ); + data->E_Coa += e_coa = + p_coa1 / (1. + exp_coa2) * + EXP( -p_coa3 * SQR(total_bo[i] - BOA_ij) ) * + EXP( -p_coa3 * SQR(total_bo[k] - BOA_jk) ) * + EXP( -p_coa4 * SQR(BOA_ij - 1.5) ) * + EXP( -p_coa4 * SQR(BOA_jk - 1.5) ); CEcoa1 = -2 * p_coa4 * (BOA_ij - 1.5) * e_coa; CEcoa2 = -2 * p_coa4 * (BOA_jk - 1.5) * e_coa; - CEcoa3 = -p_coa2 * exp_coa2 * e_coa / (1+exp_coa2); - CEcoa4 = -2*p_coa3 * (total_bo[i]-BOA_ij) * e_coa; - CEcoa5 = -2*p_coa3 * (total_bo[k]-BOA_jk) * e_coa; + CEcoa3 = -p_coa2 * exp_coa2 * e_coa / (1 + exp_coa2); + CEcoa4 = -2 * p_coa3 * (total_bo[i] - BOA_ij) * e_coa; + CEcoa5 = -2 * p_coa3 * (total_bo[k] - BOA_jk) * e_coa; /* END COALITION ENERGY */ /* FORCES */ - bo_ij->Cdbo += (CEval1 + CEpen2 + (CEcoa1-CEcoa4)); - bo_jk->Cdbo += (CEval2 + CEpen3 + (CEcoa2-CEcoa5)); - workspace->CdDelta[j] += ((CEval3 + CEval7) + - CEpen1 + CEcoa3); + bo_ij->Cdbo += (CEval1 + CEpen2 + (CEcoa1 - CEcoa4)); + bo_jk->Cdbo += (CEval2 + CEpen3 + (CEcoa2 - CEcoa5)); + workspace->CdDelta[j] += ((CEval3 + CEval7) + + CEpen1 + CEcoa3); workspace->CdDelta[i] += CEcoa4; - workspace->CdDelta[k] += CEcoa5; + workspace->CdDelta[k] += CEcoa5; - for( t = start_j; t < end_j; ++t ) { + for ( t = start_j; t < end_j; ++t ) + { pbond_jt = &( bond_list[t] ); bo_jt = &(pbond_jt->bo_data); temp_bo_jt = bo_jt->BO; temp = CUBE( temp_bo_jt ); - pBOjt7 = temp * temp * temp_bo_jt; + pBOjt7 = temp * temp * temp_bo_jt; - // fprintf( out_control->eval, "%6d%12.8f\n", - // workspace->orig_id[ bond_list[t].nbr ], + // fprintf( out_control->eval, "%6d%12.8f\n", + // workspace->orig_id[ bond_list[t].nbr ], // (CEval6 * pBOjt7) ); bo_jt->Cdbo += (CEval6 * pBOjt7); bo_jt->Cdbopi += CEval5; bo_jt->Cdbopi2 += CEval5; - } - + } - if( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT) { + if ( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT) + { rvec_ScaledAdd( system->atoms[i].f, CEval8, p_ijk->dcos_di ); rvec_ScaledAdd( system->atoms[j].f, CEval8, p_ijk->dcos_dj ); rvec_ScaledAdd( system->atoms[k].f, CEval8, p_ijk->dcos_dk ); - - /* - if (i == 0) fprintf (stderr, " atom %d adding to i (j) = 0\n", j); - if (k == 0) fprintf (stderr, " atom %d adding to i (k) = 0\n", j); - */ } - else { + else + { /* terms not related to bond order derivatives - are added directly into + are added directly into forces and pressure vector/tensor */ rvec_Scale( force, CEval8, p_ijk->dcos_di ); rvec_Add( system->atoms[i].f, force ); rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); rvec_Add( data->ext_press, ext_press ); - rvec_ScaledAdd( system->atoms[j].f, CEval8, p_ijk->dcos_dj ); + rvec_ScaledAdd( system->atoms[j].f, + CEval8, p_ijk->dcos_dj ); rvec_Scale( force, CEval8, p_ijk->dcos_dk ); rvec_Add( system->atoms[k].f, force ); @@ -432,96 +444,97 @@ void Three_Body_Interactions( reax_system *system, control_params *control, /* This part is for a fully-flexible box */ - /* rvec_OuterProduct( temp_rtensor, + /* rvec_OuterProduct( temp_rtensor, p_ijk->dcos_di, system->atoms[i].x ); rtensor_Scale( total_rtensor, +CEval8, temp_rtensor ); - rvec_OuterProduct( temp_rtensor, + rvec_OuterProduct( temp_rtensor, p_ijk->dcos_dj, system->atoms[j].x ); rtensor_ScaledAdd(total_rtensor, CEval8, temp_rtensor); - rvec_OuterProduct( temp_rtensor, + rvec_OuterProduct( temp_rtensor, p_ijk->dcos_dk, system->atoms[k].x ); rtensor_ScaledAdd(total_rtensor, CEval8, temp_rtensor); if( pbond_ij->imaginary || pbond_jk->imaginary ) - rtensor_ScaledAdd( data->flex_bar.P, + rtensor_ScaledAdd( data->flex_bar.P, -1.0, total_rtensor ); else rtensor_Add( data->flex_bar.P, total_rtensor ); */ } #ifdef TEST_ENERGY - fprintf( out_control->eval, - //"%6d%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e", - "%6d%6d%6d%23.15e%23.15e%23.15e\n", - i+1, j+1, k+1, - //workspace->orig_id[i]+1, - //workspace->orig_id[j]+1, - //workspace->orig_id[k]+1, - //workspace->Delta_boc[j], - RAD2DEG(theta), /*BOA_ij, BOA_jk, */ - e_ang, data->E_Ang ); - - /*fprintf( out_control->eval, + fprintf( out_control->eval, + //"%6d%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e", + "%6d%6d%6d%23.15e%23.15e%23.15e\n", + i + 1, j + 1, k + 1, + //workspace->orig_id[i]+1, + //workspace->orig_id[j]+1, + //workspace->orig_id[k]+1, + //workspace->Delta_boc[j], + RAD2DEG(theta), /*BOA_ij, BOA_jk, */ + e_ang, data->E_Ang ); + + /*fprintf( out_control->eval, "%23.15e%23.15e%23.15e%23.15e", p_val3, p_val4, BOA_ij, BOA_jk ); - fprintf( out_control->eval, + fprintf( out_control->eval, "%23.15e%23.15e%23.15e%23.15e", f7_ij, f7_jk, f8_Dj, expval12theta ); - fprintf( out_control->eval, + fprintf( out_control->eval, "%23.15e%23.15e%23.15e%23.15e%23.15e\n", CEval1, CEval2, CEval3, CEval4, CEval5 - //CEval6, CEval7, CEval8 );*/ + //CEval6, CEval7, CEval8 );*/ - /*fprintf( out_control->eval, + /*fprintf( out_control->eval, "%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e%23.15e\n", - -p_ijk->dcos_di[0]/sin_theta, - -p_ijk->dcos_di[1]/sin_theta, - -p_ijk->dcos_di[2]/sin_theta, - -p_ijk->dcos_dj[0]/sin_theta, - -p_ijk->dcos_dj[1]/sin_theta, - -p_ijk->dcos_dj[2]/sin_theta, - -p_ijk->dcos_dk[0]/sin_theta, - -p_ijk->dcos_dk[1]/sin_theta, + -p_ijk->dcos_di[0]/sin_theta, + -p_ijk->dcos_di[1]/sin_theta, + -p_ijk->dcos_di[2]/sin_theta, + -p_ijk->dcos_dj[0]/sin_theta, + -p_ijk->dcos_dj[1]/sin_theta, + -p_ijk->dcos_dj[2]/sin_theta, + -p_ijk->dcos_dk[0]/sin_theta, + -p_ijk->dcos_dk[1]/sin_theta, -p_ijk->dcos_dk[2]/sin_theta );*/ - /* fprintf( out_control->epen, - "%23.15e%23.15e%23.15e\n", + /* fprintf( out_control->epen, + "%23.15e%23.15e%23.15e\n", CEpen1, CEpen2, CEpen3 ); - fprintf( out_control->epen, + fprintf( out_control->epen, "%6d%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", workspace->orig_id[i], workspace->orig_id[j], - workspace->orig_id[k], RAD2DEG(theta), + workspace->orig_id[k], RAD2DEG(theta), BOA_ij, BOA_jk, e_pen, data->E_Pen ); */ - fprintf( out_control->ecoa, - "%6d%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", - workspace->orig_id[i], - workspace->orig_id[j], - workspace->orig_id[k], - RAD2DEG(theta), BOA_ij, BOA_jk, - e_coa, data->E_Coa ); + fprintf( out_control->ecoa, + "%6d%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", + workspace->orig_id[i], + workspace->orig_id[j], + workspace->orig_id[k], + RAD2DEG(theta), BOA_ij, BOA_jk, + e_coa, data->E_Coa ); #endif #ifdef TEST_FORCES /* angle forces */ Add_dBO( system, lists, j, pi, CEval1, workspace->f_ang ); Add_dBO( system, lists, j, pk, CEval2, workspace->f_ang ); - Add_dDelta( system, lists, - j, CEval3 + CEval7, workspace->f_ang ); + Add_dDelta( system, lists, + j, CEval3 + CEval7, workspace->f_ang ); - for( t = start_j; t < end_j; ++t ) { + for ( t = start_j; t < end_j; ++t ) + { pbond_jt = &( bond_list[t] ); bo_jt = &(pbond_jt->bo_data); temp_bo_jt = bo_jt->BO; temp = CUBE( temp_bo_jt ); - pBOjt7 = temp * temp * temp_bo_jt; + pBOjt7 = temp * temp * temp_bo_jt; Add_dBO( system, lists, j, t, pBOjt7 * CEval6, - workspace->f_ang ); - Add_dBOpinpi2( system, lists, j, t, - CEval5, CEval5, - workspace->f_ang, workspace->f_ang ); + workspace->f_ang ); + Add_dBOpinpi2( system, lists, j, t, + CEval5, CEval5, + workspace->f_ang, workspace->f_ang ); } rvec_ScaledAdd( workspace->f_ang[i], CEval8, p_ijk->dcos_di ); @@ -536,10 +549,10 @@ void Three_Body_Interactions( reax_system *system, control_params *control, /* end penalty forces */ /* coalition forces */ - Add_dBO( system, lists, - j, pi, CEcoa1-CEcoa4, workspace->f_coa ); - Add_dBO( system, lists, - j, pk, CEcoa2-CEcoa5, workspace->f_coa ); + Add_dBO( system, lists, + j, pi, CEcoa1 - CEcoa4, workspace->f_coa ); + Add_dBO( system, lists, + j, pk, CEcoa2 - CEcoa5, workspace->f_coa ); Add_dDelta( system, lists, j, CEcoa3, workspace->f_coa ); Add_dDelta( system, lists, i, CEcoa4, workspace->f_coa ); Add_dDelta( system, lists, k, CEcoa5, workspace->f_coa ); @@ -555,32 +568,36 @@ void Three_Body_Interactions( reax_system *system, control_params *control, } } - if( num_thb_intrs >= thb_intrs->num_intrs * DANGER_ZONE ) { + + if ( num_thb_intrs >= thb_intrs->num_intrs * DANGER_ZONE ) + { workspace->realloc.num_3body = num_thb_intrs; - if( num_thb_intrs > thb_intrs->num_intrs ) { + if ( num_thb_intrs > thb_intrs->num_intrs ) + { fprintf( stderr, "step%d-ran out of space on angle_list: top=%d, max=%d", - data->step, num_thb_intrs, thb_intrs->num_intrs ); - exit( INSUFFICIENT_SPACE ); + data->step, num_thb_intrs, thb_intrs->num_intrs ); + exit( INSUFFICIENT_MEMORY ); } } - //fprintf( stderr,"%d: Number of angle interactions: %d\n", + //fprintf( stderr,"%d: Number of angle interactions: %d\n", // data->step, num_thb_intrs ); #ifdef TEST_ENERGY - fprintf( stderr,"Number of angle interactions: %d\n", num_thb_intrs ); + fprintf( stderr, "Number of angle interactions: %d\n", num_thb_intrs ); - fprintf( stderr,"Angle Energy:%g\t Penalty Energy:%g\t Coalition Energy:%g\n", - data->E_Ang, data->E_Pen, data->E_Coa ); + fprintf( stderr, "Angle Energy:%g\t Penalty Energy:%g\t Coalition Energy:%g\n", + data->E_Ang, data->E_Pen, data->E_Coa ); - fprintf( stderr,"3body: ext_press (%23.15e %23.15e %23.15e)\n", - data->ext_press[0], data->ext_press[1], data->ext_press[2] ); + fprintf( stderr, "3body: ext_press (%23.15e %23.15e %23.15e)\n", + data->ext_press[0], data->ext_press[1], data->ext_press[2] ); #endif } -void Hydrogen_Bonds( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) + +void Hydrogen_Bonds( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, + list **lists, output_controls *out_control ) { int i, j, k, pi, pk, itr, top; int type_i, type_j, type_k; @@ -610,10 +627,11 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, /* loops below discover the Hydrogen bonds between i-j-k triplets. here j is H atom and there has to be some bond between i and j. Hydrogen bond is between j and k. - so in this function i->X, j->H, k->Z when we map + so in this function i->X, j->H, k->Z when we map variables onto the ones in the handout.*/ - for( j = 0; j < system->N; ++j ) - if( system->reaxprm.sbp[system->atoms[j].type].p_hbond==1 ) {// j must be H + for ( j = 0; j < system->N; ++j ) + if ( system->reaxprm.sbp[system->atoms[j].type].p_hbond == 1 ) // j must be H + { /*set j's variables */ type_j = system->atoms[j].type; start_j = Start_Index(j, bonds); @@ -622,21 +640,23 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, hb_end_j = End_Index ( workspace->hbond_index[j], hbonds ); top = 0; - for( pi = start_j; pi < end_j; ++pi ) { + for ( pi = start_j; pi < end_j; ++pi ) + { pbond_ij = &( bond_list[pi] ); i = pbond_ij->nbr; bo_ij = &(pbond_ij->bo_data); type_i = system->atoms[i].type; - if( system->reaxprm.sbp[type_i].p_hbond == 2 && + if ( system->reaxprm.sbp[type_i].p_hbond == 2 && bo_ij->BO >= HB_THRESHOLD ) hblist[top++] = pi; } - // fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n", + // fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n", // j, top, hb_start_j, hb_end_j ); - for( pk = hb_start_j; pk < hb_end_j; ++pk ) { + for ( pk = hb_start_j; pk < hb_end_j; ++pk ) + { /* set k's varibles */ k = hbond_list[pk].nbr; type_k = system->atoms[k].type; @@ -644,69 +664,59 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, r_jk = nbr_jk->d; rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec ); - for( itr=0; itr < top; ++itr ) { + for ( itr = 0; itr < top; ++itr ) + { pi = hblist[itr]; pbond_ij = &( bond_list[pi] ); i = pbond_ij->nbr; - if( i != k ) { + if ( i != k ) + { bo_ij = &(pbond_ij->bo_data); type_i = system->atoms[i].type; - r_ij = pbond_ij->d; + r_ij = pbond_ij->d; hbp = &(system->reaxprm.hbp[ index_hbp(type_i, type_j, type_k, system->reaxprm.num_atom_types) ]); ++num_hb_intrs; - Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, - &theta, &cos_theta ); + Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, &theta, &cos_theta ); /* the derivative of cos(theta) */ Calculate_dCos_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, - &dcos_theta_di, &dcos_theta_dj, - &dcos_theta_dk ); + &dcos_theta_di, &dcos_theta_dj, &dcos_theta_dk ); /* hydrogen bond energy*/ - sin_theta2 = SIN( theta/2.0 ); + sin_theta2 = SIN( theta / 2.0 ); sin_xhz4 = SQR(sin_theta2); sin_xhz4 *= sin_xhz4; cos_xhz1 = ( 1.0 - cos_theta ); exp_hb2 = EXP( -hbp->p_hb2 * bo_ij->BO ); - exp_hb3 = EXP( -hbp->p_hb3 * ( hbp->r0_hb / r_jk + - r_jk / hbp->r0_hb - 2.0 ) ); + exp_hb3 = EXP( -hbp->p_hb3 * ( hbp->r0_hb / r_jk + + r_jk / hbp->r0_hb - 2.0 ) ); - data->E_HB += e_hb = - hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4; + data->E_HB += e_hb = hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4; - CEhb1 = hbp->p_hb1*hbp->p_hb2 * exp_hb2*exp_hb3 * sin_xhz4; - CEhb2 = -hbp->p_hb1/2.0*(1.0 - exp_hb2) * exp_hb3 * cos_xhz1; - CEhb3 = -hbp->p_hb3 * e_hb * (-hbp->r0_hb / SQR(r_jk) + - 1.0 / hbp->r0_hb); + CEhb1 = hbp->p_hb1 * hbp->p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4; + CEhb2 = -hbp->p_hb1 / 2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1; + CEhb3 = -hbp->p_hb3 * e_hb * (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb); /* hydrogen bond forces */ bo_ij->Cdbo += CEhb1; // dbo term - if( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT ) { - rvec_ScaledAdd( system->atoms[i].f, - +CEhb2, dcos_theta_di ); //dcos terms - rvec_ScaledAdd( system->atoms[j].f, - +CEhb2, dcos_theta_dj ); - - - - - //TODO - rvec_ScaledAdd( system->atoms[k].f, - +CEhb2, dcos_theta_dk ); - + if ( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT) + { + rvec_ScaledAdd( system->atoms[i].f, + +CEhb2, dcos_theta_di ); //dcos terms + rvec_ScaledAdd( system->atoms[j].f, + +CEhb2, dcos_theta_dj ); + rvec_ScaledAdd( system->atoms[k].f, + +CEhb2, dcos_theta_dk ); //dr terms - rvec_ScaledAdd( system->atoms[j].f, -CEhb3/r_jk, dvec_jk ); - - - //TODO - rvec_ScaledAdd( system->atoms[k].f, +CEhb3/r_jk, dvec_jk ); + rvec_ScaledAdd( system->atoms[j].f, -CEhb3 / r_jk, dvec_jk ); + rvec_ScaledAdd( system->atoms[k].f, +CEhb3 / r_jk, dvec_jk ); } else { - /* for pressure coupling, terms that are not related - to bond order derivatives are added directly into + /* for pressure coupling, terms that are not related + to bond order derivatives are added directly into pressure vector/tensor */ rvec_Scale( force, +CEhb2, dcos_theta_di ); // dcos terms rvec_Add( system->atoms[i].f, force ); @@ -717,39 +727,32 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, ivec_Scale( rel_jk, hbond_list[pk].scl, nbr_jk->rel_box ); rvec_Scale( force, +CEhb2, dcos_theta_dk ); - - - - //TODO rvec_Add( system->atoms[k].f, force ); - - - rvec_iMultiply( ext_press, rel_jk, force ); rvec_ScaledAdd( data->ext_press, 1.0, ext_press ); //dr terms - rvec_ScaledAdd( system->atoms[j].f, -CEhb3/r_jk, dvec_jk ); + rvec_ScaledAdd( system->atoms[j].f, -CEhb3 / r_jk, dvec_jk ); - rvec_Scale( force, CEhb3/r_jk, dvec_jk ); + rvec_Scale( force, CEhb3 / r_jk, dvec_jk ); rvec_Add( system->atoms[k].f, force ); rvec_iMultiply( ext_press, rel_jk, force ); rvec_ScaledAdd( data->ext_press, 1.0, ext_press ); /* This part is intended for a fully-flexible box */ - /* rvec_OuterProduct( temp_rtensor, + /* rvec_OuterProduct( temp_rtensor, dcos_theta_di, system->atoms[i].x ); rtensor_Scale( total_rtensor, -CEhb2, temp_rtensor ); rvec_ScaledSum( temp_rvec, -CEhb2, dcos_theta_dj, -CEhb3/r_jk, pbond_jk->dvec ); - rvec_OuterProduct( temp_rtensor, + rvec_OuterProduct( temp_rtensor, temp_rvec, system->atoms[j].x ); rtensor_Add( total_rtensor, temp_rtensor ); rvec_ScaledSum( temp_rvec, -CEhb2, dcos_theta_dk, +CEhb3/r_jk, pbond_jk->dvec ); - rvec_OuterProduct( temp_rtensor, + rvec_OuterProduct( temp_rtensor, temp_rvec, system->atoms[k].x ); rtensor_Add( total_rtensor, temp_rtensor ); @@ -760,38 +763,38 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, } #ifdef TEST_ENERGY - /*fprintf( out_control->ehb, + /*fprintf( out_control->ehb, "%23.15e%23.15e%23.15e\n%23.15e%23.15e%23.15e\n%23.15e%23.15e%23.15e\n", - dcos_theta_di[0], dcos_theta_di[1], dcos_theta_di[2], - dcos_theta_dj[0], dcos_theta_dj[1], dcos_theta_dj[2], + dcos_theta_di[0], dcos_theta_di[1], dcos_theta_di[2], + dcos_theta_dj[0], dcos_theta_dj[1], dcos_theta_dj[2], dcos_theta_dk[0], dcos_theta_dk[1], dcos_theta_dk[2]); fprintf( out_control->ehb, "%23.15e%23.15e%23.15e\n", CEhb1, CEhb2, CEhb3 ); */ - fprintf( stderr, //out_control->ehb, - "%6d%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", - workspace->orig_id[i], - workspace->orig_id[j], - workspace->orig_id[k], - r_jk, theta, bo_ij->BO, e_hb, data->E_HB ); + fprintf( stderr, //out_control->ehb, + "%6d%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", + workspace->orig_id[i], + workspace->orig_id[j], + workspace->orig_id[k], + r_jk, theta, bo_ij->BO, e_hb, data->E_HB ); #endif #ifdef TEST_FORCES // dbo term Add_dBO( system, lists, j, pi, +CEhb1, workspace->f_hb ); // dcos terms - rvec_ScaledAdd( workspace->f_hb[i], +CEhb2, dcos_theta_di ); + rvec_ScaledAdd( workspace->f_hb[i], +CEhb2, dcos_theta_di ); rvec_ScaledAdd( workspace->f_hb[j], +CEhb2, dcos_theta_dj ); rvec_ScaledAdd( workspace->f_hb[k], +CEhb2, dcos_theta_dk ); // dr terms - rvec_ScaledAdd( workspace->f_hb[j], -CEhb3/r_jk, dvec_jk ); - rvec_ScaledAdd( workspace->f_hb[k], +CEhb3/r_jk, dvec_jk ); + rvec_ScaledAdd( workspace->f_hb[j], -CEhb3 / r_jk, dvec_jk ); + rvec_ScaledAdd( workspace->f_hb[k], +CEhb3 / r_jk, dvec_jk ); #endif } } } } - /* fprintf( stderr, "hydbonds: ext_press (%23.15e %23.15e %23.15e)\n", + /* fprintf( stderr, "hydbonds: ext_press (%23.15e %23.15e %23.15e)\n", data->ext_press[0], data->ext_press[1], data->ext_press[2] ); */ #ifdef TEST_FORCES diff --git a/PuReMD-GPU/src/tool_box.c b/PuReMD-GPU/src/tool_box.c new file mode 100644 index 0000000000000000000000000000000000000000..1782e71cdac028ddb5c0947ef371cf9259bfad2b --- /dev/null +++ b/PuReMD-GPU/src/tool_box.c @@ -0,0 +1,467 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "tool_box.h" + +#include <ctype.h> + + +/************** taken from box.c **************/ +void Transform( rvec x1, simulation_box *box, char flag, rvec x2 ) +{ + int i, j; + real tmp; + + // printf(">x1: (%lf, %lf, %lf)\n",x1[0],x1[1],x1[2]); + + if (flag > 0) + { + for (i = 0; i < 3; i++) + { + tmp = 0.0; + for (j = 0; j < 3; j++) + tmp += box->trans[i][j] * x1[j]; + x2[i] = tmp; + } + } + else + { + for (i = 0; i < 3; i++) + { + tmp = 0.0; + for (j = 0; j < 3; j++) + tmp += box->trans_inv[i][j] * x1[j]; + x2[i] = tmp; + } + } + // printf(">x2: (%lf, %lf, %lf)\n", x2[0], x2[1], x2[2]); +} + + +void Transform_to_UnitBox( rvec x1, simulation_box *box, char flag, rvec x2 ) +{ + Transform( x1, box, flag, x2 ); + + x2[0] /= box->box_norms[0]; + x2[1] /= box->box_norms[1]; + x2[2] /= box->box_norms[2]; +} + + +/* determine whether point p is inside the box */ +void Fit_to_Periodic_Box( simulation_box *box, rvec *p ) +{ + int i; + + for ( i = 0; i < 3; ++i ) + { + //TODO: verify box boundary coordinates -- assuming orthogonal box pinned at origin + if ( (*p)[i] < 0. ) + { + /* handle lower coords */ + while ( (*p)[i] < 0. ) + (*p)[i] += box->box_norms[i]; + } + else if ( (*p)[i] >= box->box_norms[i] ) + { + /* handle higher coords */ + while ( (*p)[i] >= box->box_norms[i] ) + (*p)[i] -= box->box_norms[i]; + } +// if ( (*p)[i] < box->min[i] ) +// { +// /* handle lower coords */ +// while ( (*p)[i] < box->min[i] ) +// (*p)[i] += box->box_norms[i]; +// } +// else if ( (*p)[i] >= box->max[i] ) +// { +// /* handle higher coords */ +// while ( (*p)[i] >= box->max[i] ) +// (*p)[i] -= box->box_norms[i]; +// } + } +} + + +/* determine the touch point, tp, of a box to + its neighbor denoted by the relative coordinate rl */ +/* +inline void Box_Touch_Point( simulation_box *box, ivec rl, rvec tp ) +{ + int d; + + for ( d = 0; d < 3; ++d ) + if ( rl[d] == -1 ) + tp[d] = box->min[d]; + else if ( rl[d] == 0 ) + tp[d] = NEG_INF - 1.; + else + tp[d] = box->max[d]; +} +*/ + + +/* determine whether point p is inside the box */ +/* assumes orthogonal box */ +/* +inline int is_Inside_Box( simulation_box *box, rvec p ) +{ + if ( p[0] < box->min[0] || p[0] >= box->max[0] || + p[1] < box->min[1] || p[1] >= box->max[1] || + p[2] < box->min[2] || p[2] >= box->max[2] ) + return FALSE; + + return TRUE; +} +*/ + + +/* +inline int iown_midpoint( simulation_box *box, rvec p1, rvec p2 ) +{ + rvec midp; + + midp[0] = (p1[0] + p2[0]) / 2; + midp[1] = (p1[1] + p2[1]) / 2; + midp[2] = (p1[2] + p2[2]) / 2; + + if ( midp[0] < box->min[0] || midp[0] >= box->max[0] || + midp[1] < box->min[1] || midp[1] >= box->max[1] || + midp[2] < box->min[2] || midp[2] >= box->max[2] ) + return FALSE; + + return TRUE; +} +*/ + + +/**************** from grid.c ****************/ +/* finds the closest point of grid cell cj to ci. + no need to consider periodic boundary conditions as in the serial case + because the box of a process is not periodic in itself */ +/* +inline void GridCell_Closest_Point( grid_cell *gci, grid_cell *gcj, + ivec ci, ivec cj, rvec cp ) +{ + int d; + + for ( d = 0; d < 3; d++ ) + if ( cj[d] > ci[d] ) + cp[d] = gcj->min[d]; + else if ( cj[d] == ci[d] ) + cp[d] = NEG_INF - 1.; + else + cp[d] = gcj->max[d]; +} + + +inline void GridCell_to_Box_Points( grid_cell *gc, ivec rl, rvec cp, rvec fp ) +{ + int d; + + for ( d = 0; d < 3; ++d ) + if ( rl[d] == -1 ) + { + cp[d] = gc->min[d]; + fp[d] = gc->max[d]; + } + else if ( rl[d] == 0 ) + { + cp[d] = fp[d] = NEG_INF - 1.; + } + else + { + cp[d] = gc->max[d]; + fp[d] = gc->min[d]; + } +} + + +inline real DistSqr_between_Special_Points( rvec sp1, rvec sp2 ) +{ + int i; + real d_sqr = 0; + + for ( i = 0; i < 3; ++i ) + { + if ( sp1[i] > NEG_INF && sp2[i] > NEG_INF ) + { + d_sqr += SQR( sp1[i] - sp2[i] ); + } + } + + return d_sqr; +} + + +inline real DistSqr_to_Special_Point( rvec cp, rvec x ) +{ + int i; + real d_sqr = 0; + + for ( i = 0; i < 3; ++i ) + { + if ( cp[i] > NEG_INF ) + { + d_sqr += SQR( cp[i] - x[i] ); + } + } + + return d_sqr; +} + + +inline int Relative_Coord_Encoding( ivec c ) +{ + return 9 * (c[0] + 1) + 3 * (c[1] + 1) + (c[2] + 1); +} +*/ + + +/************** from geo_tools.c *****************/ +void Make_Point( real x, real y, real z, rvec* p ) +{ + (*p)[0] = x; + (*p)[1] = y; + (*p)[2] = z; +} + + +int is_Valid_Serial( static_storage *workspace, int serial ) +{ + if( workspace->map_serials[ serial ] < 0 ) + { + fprintf( stderr, "CONECT line includes invalid pdb serial number %d.\n", serial ); + fprintf( stderr, "Please correct the input file.Terminating...\n" ); + exit( INVALID_INPUT ); + } + + return TRUE; +} + + +int Check_Input_Range( int val, int lo, int hi, char *message ) +{ + if ( val < lo || val > hi ) + { + fprintf( stderr, "%s\nInput %d - Out of range %d-%d. Terminating...\n", + message, val, lo, hi ); + exit( INVALID_INPUT ); + } + + return SUCCESS; +} + + +void Trim_Spaces( char *element ) +{ + int i, j; + + for ( i = 0; element[i] == ' '; ++i ); // skip initial space chars + + for ( j = i; j < (int)(strlen(element)) && element[j] != ' '; ++j ) + { + element[j - i] = toupper( element[j] ); // make uppercase, offset to 0 + } + element[j - i] = 0; // finalize the string +} + + +/************ from system_props.c *************/ +real Get_Time( ) +{ + gettimeofday(&tim, NULL ); + return ( tim.tv_sec + (tim.tv_usec / 1000000.0) ); +} + + +real Get_Timing_Info( real t_start ) +{ + gettimeofday(&tim, NULL ); + t_end = tim.tv_sec + (tim.tv_usec / 1000000.0); + return (t_end - t_start); +} + + +void Update_Timing_Info( real *t_start, real *timing ) +{ + gettimeofday(&tim, NULL ); + t_end = tim.tv_sec + (tim.tv_usec / 1000000.0); + *timing += (t_end - *t_start); + *t_start = t_end; +} + + +/*********** from io_tools.c **************/ +int Get_Atom_Type( reax_interaction *reax_param, char *s ) +{ + int i; + + for ( i = 0; i < reax_param->num_atom_types; ++i ) + { + if ( !strcmp( reax_param->sbp[i].name, s ) ) + { + return i; + } + } + + fprintf( stderr, "Unknown atom type %s. Terminating...\n", s ); + exit( UNKNOWN_ATOM_TYPE ); + + return FAILURE; +} + + +char *Get_Element( reax_system *system, int i ) +{ + return &( system->reaxprm.sbp[system->atoms[i].type].name[0] ); +} + + +char *Get_Atom_Name( reax_system *system, int i ) +{ + return &(system->atoms[i].name[0]); +} + + +int Allocate_Tokenizer_Space( char **line, char **backup, char ***tokens ) +{ + int i; + + if ( (*line = (char*) malloc( sizeof(char) * MAX_LINE )) == NULL ) + { + return FAILURE; + } + + if ( (*backup = (char*) malloc( sizeof(char) * MAX_LINE )) == NULL ) + { + return FAILURE; + } + + if ( (*tokens = (char**) malloc( sizeof(char*) * MAX_TOKENS )) == NULL ) + { + return FAILURE; + } + + for ( i = 0; i < MAX_TOKENS; i++ ) + { + if ( ((*tokens)[i] = (char*) malloc(sizeof(char) * MAX_TOKEN_LEN)) == NULL ) + { + return FAILURE; + } + } + + return SUCCESS; +} + + +int Tokenize( char* s, char*** tok ) +{ + char test[MAX_LINE]; + char *sep = "\t \n!="; + char *word; + int count = 0; + + strncpy( test, s, MAX_LINE ); + + for ( word = strtok(test, sep); word; word = strtok(NULL, sep) ) + { + strncpy( (*tok)[count], word, MAX_LINE ); + count++; + } + + return count; +} + + +/***************** taken from lammps ************************/ +/* safe malloc */ +void *smalloc( long n, char *name ) +{ + void *ptr; + + if ( n <= 0 ) + { + fprintf( stderr, "WARNING: trying to allocate %ld bytes for array %s. ", + n, name ); + fprintf( stderr, "returning NULL.\n" ); + return NULL; + } + + ptr = malloc( n ); + if ( ptr == NULL ) + { + fprintf( stderr, "ERROR: failed to allocate %ld bytes for array %s", + n, name ); + exit( INSUFFICIENT_MEMORY ); + } + + return ptr; +} + + +/* safe calloc */ +void *scalloc( int n, int size, char *name ) +{ + void *ptr; + + if ( n <= 0 ) + { + fprintf( stderr, "WARNING: trying to allocate %d elements for array %s. ", + n, name ); + fprintf( stderr, "returning NULL.\n" ); + return NULL; + } + + if ( size <= 0 ) + { + fprintf( stderr, "WARNING: elements size for array %s is %d. ", + name, size ); + fprintf( stderr, "returning NULL.\n" ); + return NULL; + } + + ptr = calloc( n, size ); + if ( ptr == NULL ) + { + fprintf( stderr, "ERROR: failed to allocate %d bytes for array %s", + n * size, name ); + exit( INSUFFICIENT_MEMORY ); + } + + return ptr; +} + + +/* safe free */ +void sfree( void *ptr, char *name ) +{ + if ( ptr == NULL ) + { + fprintf( stderr, "WARNING: trying to free the already NULL pointer %s!\n", + name ); + return; + } + + free( ptr ); + ptr = NULL; +} diff --git a/PuReMD-GPU/src/tool_box.h b/PuReMD-GPU/src/tool_box.h new file mode 100644 index 0000000000000000000000000000000000000000..db97076149a5f5c8868d02d299a25581d3b5a934 --- /dev/null +++ b/PuReMD-GPU/src/tool_box.h @@ -0,0 +1,72 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __TOOL_BOX_H_ +#define __TOOL_BOX_H_ + +#include "mytypes.h" + +struct timeval tim; +real t_end; + + +/* from box.h */ +void Transform( rvec, simulation_box*, char, rvec ); +void Transform_to_UnitBox( rvec, simulation_box*, char, rvec ); +void Fit_to_Periodic_Box( simulation_box*, rvec* ); +//void Box_Touch_Point( simulation_box*, ivec, rvec ); +//int is_Inside_Box( simulation_box*, rvec ); +//int iown_midpoint( simulation_box*, rvec, rvec ); + +/* from grid.h */ +/* +void GridCell_Closest_Point( grid_cell*, grid_cell*, ivec, ivec, rvec ); +void GridCell_to_Box_Points( grid_cell*, ivec, rvec, rvec ); +real DistSqr_between_Special_Points( rvec, rvec ); +real DistSqr_to_Special_Point( rvec, rvec ); +int Relative_Coord_Encoding( ivec ); +*/ + +/* from geo_tools.h */ +void Make_Point( real, real, real, rvec* ); +int is_Valid_Serial( static_storage*, int ); +int Check_Input_Range( int, int, int, char* ); +void Trim_Spaces( char* ); + +/* from system_props.h */ +real Get_Time( ); +real Get_Timing_Info( real ); +void Update_Timing_Info( real*, real* ); + +/* from io_tools.h */ +int Get_Atom_Type( reax_interaction*, char* ); +char *Get_Element( reax_system*, int ); +char *Get_Atom_Name( reax_system*, int ); +int Allocate_Tokenizer_Space( char**, char**, char*** ); +int Tokenize( char*, char*** ); + +/* from lammps */ +void *smalloc( long, char* ); +void *scalloc( int, int, char* ); +void sfree( void*, char* ); + + +#endif diff --git a/PuReMD-GPU/src/traj.c b/PuReMD-GPU/src/traj.c index 2844c370ee79702ed0c75d090afe545149aae185..f8852d5d2cc2e425b67f6ffa6871b52c2f755046 100644 --- a/PuReMD-GPU/src/traj.c +++ b/PuReMD-GPU/src/traj.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -30,7 +31,8 @@ /************************************************/ /* CUSTOM FORMAT ROUTINES */ /************************************************/ -int Write_Custom_Header(reax_system *system, control_params *control, + +int Write_Custom_Header(reax_system *system, control_params *control, static_storage *workspace, output_controls *out_control) { int i, header_len, control_block_len, frame_format_len; @@ -40,119 +42,129 @@ int Write_Custom_Header(reax_system *system, control_params *control, char atom_format[100], bond_format[100], angle_format[100]; sprintf( control_block, CONTROL_BLOCK, - system->N, - control->restart, - control->restart_from, - control->random_vel, - out_control->restart_freq, - control->ensemble, - control->nsteps, - control->dt, - control->reposition_atoms, - control->restrict_bonds, - control->tabulate, - control->nbr_cut, - control->r_cut, - control->bg_cut, - control->bo_cut, - control->thb_cut, - control->hb_cut, - control->q_err, - control->T_init, - control->T_final, - control->Tau_T, - control->T_mode, - control->T_rate, - control->T_freq, - control->P[0], control->P[1], control->P[2], - control->Tau_P[0], control->Tau_P[1], control->Tau_P[2], - control->compressibility, - control->press_mode, - control->remove_CoM_vel, - out_control->write_steps, - out_control->traj_compress, - out_control->traj_format, - out_control->atom_format, - out_control->bond_info, - out_control->angle_info, - out_control->energy_update_freq, - control->molec_anal, - control->freq_molec_anal ); - - control_block_len = strlen( control_block ); - - - sprintf( frame_format, "Frame Format: %d\n%s\n%s\n", - NUM_FRAME_GLOBALS, FRAME_GLOBALS_FORMAT, FRAME_GLOBAL_NAMES ); - - atom_format[0] = OPT_NOATOM; - switch( out_control->atom_format ) - { - case OPT_ATOM_BASIC: sprintf( atom_format, "Atom_Basic: %s", ATOM_BASIC ); - break; - case OPT_ATOM_wF: sprintf( atom_format, "Atom_wF: %s", ATOM_wF ); - break; - case OPT_ATOM_wV: sprintf( atom_format, "Atom_wV: %s", ATOM_wV ); - break; - case OPT_ATOM_FULL: sprintf( atom_format, "Atom_Full: %s", ATOM_FULL ); - break; - } - strcat( frame_format, atom_format ); - - bond_format[0] = OPT_NOBOND; - if( out_control->bond_info == OPT_BOND_BASIC ) - sprintf( bond_format, "Bond_Line: %s", BOND_BASIC ); - else if( out_control->bond_info == OPT_BOND_FULL ) - sprintf( bond_format, "Bond_Line_Full: %s", BOND_FULL ); - strcat( frame_format, bond_format ); + system->N, + control->restart, + control->restart_from, + control->random_vel, + out_control->restart_freq, + control->ensemble, + control->nsteps, + control->dt, + control->reposition_atoms, + control->restrict_bonds, + control->tabulate, + control->nbr_cut, + control->r_cut, + control->bg_cut, + control->bo_cut, + control->thb_cut, + control->hb_cut, + control->qeq_solver_q_err, + control->T_init, + control->T_final, + control->Tau_T, + control->T_mode, + control->T_rate, + control->T_freq, + control->P[0], control->P[1], control->P[2], + control->Tau_P[0], control->Tau_P[1], control->Tau_P[2], + control->compressibility, + control->press_mode, + control->remove_CoM_vel, + out_control->write_steps, + out_control->traj_compress, + out_control->traj_format, + out_control->atom_format, + out_control->bond_info, + out_control->angle_info, + out_control->energy_update_freq, + control->molec_anal, + control->freq_molec_anal ); + + control_block_len = strlen( control_block ); + + sprintf( frame_format, "Frame Format: %d\n%s\n%s\n", + NUM_FRAME_GLOBALS, FRAME_GLOBALS_FORMAT, FRAME_GLOBAL_NAMES ); + + atom_format[0] = OPT_NOATOM; + switch ( out_control->atom_format ) + { + case OPT_ATOM_BASIC: + sprintf( atom_format, "Atom_Basic: %s", ATOM_BASIC ); + break; + case OPT_ATOM_wF: + sprintf( atom_format, "Atom_wF: %s", ATOM_wF ); + break; + case OPT_ATOM_wV: + sprintf( atom_format, "Atom_wV: %s", ATOM_wV ); + break; + case OPT_ATOM_FULL: + sprintf( atom_format, "Atom_Full: %s", ATOM_FULL ); + break; + } + strcat( frame_format, atom_format ); - angle_format[0] = OPT_NOANGLE; - if( out_control->angle_info == OPT_ANGLE_BASIC ) - sprintf( angle_format, "Angle_Line: %s", ANGLE_BASIC ); - strcat( frame_format, angle_format ); + bond_format[0] = OPT_NOBOND; + if ( out_control->bond_info == OPT_BOND_BASIC ) + { + sprintf( bond_format, "Bond_Line: %s", BOND_BASIC ); + } + else if ( out_control->bond_info == OPT_BOND_FULL ) + { + sprintf( bond_format, "Bond_Line_Full: %s", BOND_FULL ); + } + strcat( frame_format, bond_format ); - frame_format_len = strlen( frame_format ); + angle_format[0] = OPT_NOANGLE; + if ( out_control->angle_info == OPT_ANGLE_BASIC ) + { + sprintf( angle_format, "Angle_Line: %s", ANGLE_BASIC ); + } + strcat( frame_format, angle_format ); + frame_format_len = strlen( frame_format ); - header_len = HEADER_INIT_LEN + (control_block_len + SIZE_INFO_LEN2)+ - (frame_format_len + SIZE_INFO_LEN2) + - (ATOM_MAPPING_LEN * system->N + SIZE_INFO_LEN2); + header_len = HEADER_INIT_LEN + (control_block_len + SIZE_INFO_LEN2) + + (frame_format_len + SIZE_INFO_LEN2) + + (ATOM_MAPPING_LEN * system->N + SIZE_INFO_LEN2); - out_control->write( out_control->trj, HEADER_INIT, - header_len, HEADER_INIT_LEN, out_control->traj_title ); + out_control->write( out_control->trj, HEADER_INIT, + header_len, HEADER_INIT_LEN, out_control->traj_title ); - out_control->write( out_control->trj, SIZE_INFO_LINE2, - control_block_len + (frame_format_len + SIZE_INFO_LEN2) + - (ATOM_MAPPING_LEN * system->N + SIZE_INFO_LEN2), - control_block_len ); - out_control->write( out_control->trj, "%s", control_block ); + out_control->write( out_control->trj, SIZE_INFO_LINE2, + control_block_len + (frame_format_len + SIZE_INFO_LEN2) + + (ATOM_MAPPING_LEN * system->N + SIZE_INFO_LEN2), + control_block_len ); + out_control->write( out_control->trj, "%s", control_block ); - out_control->write( out_control->trj, SIZE_INFO_LINE2, - frame_format_len + - (ATOM_MAPPING_LEN * system->N + SIZE_INFO_LEN2), - frame_format_len ); - out_control->write( out_control->trj, "%s", frame_format ); + out_control->write( out_control->trj, SIZE_INFO_LINE2, + frame_format_len + + (ATOM_MAPPING_LEN * system->N + SIZE_INFO_LEN2), + frame_format_len ); + out_control->write( out_control->trj, "%s", frame_format ); - out_control->write( out_control->trj, SIZE_INFO_LINE2, - ATOM_MAPPING_LEN * system->N, - ATOM_MAPPING_LEN * system->N ); + out_control->write( out_control->trj, SIZE_INFO_LINE2, + ATOM_MAPPING_LEN * system->N, + ATOM_MAPPING_LEN * system->N ); - for( i = 0; i < system->N; ++i ) - out_control->write( out_control->trj, ATOM_MAPPING, - workspace->orig_id[i], - system->atoms[i].type, - system->atoms[i].name, - system->reaxprm.sbp[ system->atoms[i].type ].mass ); + for ( i = 0; i < system->N; ++i ) + { + out_control->write( out_control->trj, ATOM_MAPPING, + workspace->orig_id[i], + system->atoms[i].type, + system->atoms[i].name, + system->reaxprm.sbp[ system->atoms[i].type ].mass ); + } - fflush( out_control->trj ); + fflush( out_control->trj ); - return 0; + return 0; } -int Append_Custom_Frame( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) +int Append_Custom_Frame( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, list **lists, + output_controls *out_control ) { int i, j, pi, pk, pk_j; int write_atoms, write_bonds, write_angles; @@ -166,278 +178,326 @@ int Append_Custom_Frame( reax_system *system, control_params *control, /* IMPORTANT: This whole part will go to init_trj after finalized! */ - switch( out_control->atom_format ) + switch ( out_control->atom_format ) { - case OPT_ATOM_BASIC: - atom_line_len = ATOM_BASIC_LEN; - write_atoms = 1; - break; - case OPT_ATOM_wF: - atom_line_len = ATOM_wF_LEN; - write_atoms = 1; - break; - case OPT_ATOM_wV: - atom_line_len = ATOM_wV_LEN; - write_atoms = 1; - break; - case OPT_ATOM_FULL: - atom_line_len = ATOM_FULL_LEN; - write_atoms = 1; - break; - default: - atom_line_len = 0; - write_atoms = 0; + case OPT_ATOM_BASIC: + atom_line_len = ATOM_BASIC_LEN; + write_atoms = 1; + break; + case OPT_ATOM_wF: + atom_line_len = ATOM_wF_LEN; + write_atoms = 1; + break; + case OPT_ATOM_wV: + atom_line_len = ATOM_wV_LEN; + write_atoms = 1; + break; + case OPT_ATOM_FULL: + atom_line_len = ATOM_FULL_LEN; + write_atoms = 1; + break; + default: + atom_line_len = 0; + write_atoms = 0; } - /* bond preparations */ bond_line_len = write_bonds = 0; - if( out_control->bond_info == OPT_BOND_BASIC ) + if ( out_control->bond_info == OPT_BOND_BASIC ) { bond_line_len = BOND_BASIC_LEN; write_bonds = 1; } - else if( out_control->bond_info == OPT_BOND_FULL ) + else if ( out_control->bond_info == OPT_BOND_FULL ) { bond_line_len = BOND_FULL_LEN; write_bonds = 1; } #ifdef __DEBUG_CUDA__ - fprintf (stderr, "Append Custom Frame -- write_bonds --> %d \n", write_bonds); + fprintf( stderr, "Append Custom Frame -- write_bonds --> %d \n", write_bonds ); #endif num_bonds = 0; - if( write_bonds ) + if ( write_bonds ) { - #ifdef __PRINT_CPU_RESULTS__ - //fprintf (stderr, "Synching bonds from device for printing ....\n"); +#ifdef __DEBUG_CUDA__ + fprintf( stderr, "Synching bonds from device for printing ....\n" ); +#endif Sync_Host_Device_List( bonds, (dev_lists + BONDS), TYP_BOND ); #endif - for( i = 0; i < system->N; ++i ) - for( j = Start_Index( i, bonds ); j < End_Index( i, bonds ); ++j ) - if( i < bonds->select.bond_list[j].nbr && + for ( i = 0; i < system->N; ++i ) + { + for ( j = Start_Index( i, bonds ); j < End_Index( i, bonds ); ++j ) + { + if ( i < bonds->select.bond_list[j].nbr && bonds->select.bond_list[j].bo_data.BO >= control->bg_cut ) + { ++num_bonds; + } + } + } } - /* angle preparations */ - if( out_control->angle_info == OPT_ANGLE_BASIC ) + if ( out_control->angle_info == OPT_ANGLE_BASIC ) { angle_line_len = ANGLE_BASIC_LEN; write_angles = 1; } - else + else { angle_line_len = 0; write_angles = 0; } #ifdef __DEBUG_CUDA__ - fprintf (stderr, "Append Custom Frame -- write-angles --> %d \n", write_angles ); + fprintf( stderr, "Append Custom Frame -- write-angles --> %d \n", write_angles ); #endif num_thb_intrs = 0; - if( write_angles ) { - + if ( write_angles ) + { #ifdef __PRINT_CPU_RESULTS__ - //fprintf (stderr, "Synching three bodies from deivce for printing ... \n"); +#ifdef __DEBUG_CUDA__ + fprintf( stderr, "Synching three bodies from deivce for printing ... \n" ); +#endif Sync_Host_Device_List( thb_intrs, dev_lists + THREE_BODIES, TYP_THREE_BODY ); - if ( !write_bonds) { - //fprintf (stderr, "Synching bonds for three bodies from device for printing ... \n"); + if ( !write_bonds ) + { +#ifdef __DEBUG_CUDA__ + fprintf( stderr, "Synching bonds for three bodies from device for printing ... \n" ); +#endif Sync_Host_Device_List( bonds, (dev_lists + BONDS), TYP_BOND ); } #endif - for( j = 0; j < system->N; ++j ) - for( pi = Start_Index(j, bonds); pi < End_Index(j, bonds); ++pi ) - if( bonds->select.bond_list[pi].bo_data.BO >= control->bg_cut ) + for ( j = 0; j < system->N; ++j ) + { + for ( pi = Start_Index(j, bonds); pi < End_Index(j, bonds); ++pi ) + { + if ( bonds->select.bond_list[pi].bo_data.BO >= control->bg_cut ) + { // physical j&i bond - for( pk = Start_Index( pi, thb_intrs ); + for ( pk = Start_Index( pi, thb_intrs ); pk < End_Index( pi, thb_intrs ); ++pk ) - if( bonds->select.bond_list[pi].nbr < - thb_intrs->select.three_body_list[pk].thb ) { + { + if ( bonds->select.bond_list[pi].nbr < + thb_intrs->select.three_body_list[pk].thb ) + { // get k's pointer on j's bond list pk_j = thb_intrs->select.three_body_list[pk].pthb; - if( bonds->select.bond_list[pk_j].bo_data.BO >= control->bg_cut ) + if ( bonds->select.bond_list[pk_j].bo_data.BO >= control->bg_cut ) // physical j&k bond ++num_thb_intrs; } + } + } + } + } } - /* get correct pressure */ - if( control->ensemble == NPT || control->ensemble == sNPT ) + if ( control->ensemble == NPT || control->ensemble == sNPT ) + { P = data->flex_bar.P_scalar; - else if( control->ensemble == iNPT ) + } + else if ( control->ensemble == iNPT ) + { P = data->iso_bar.P; - else P = 0; - + } + else + { + P = 0; + } /* calculate total frame length*/ sprintf( buffer, FRAME_GLOBALS, - data->step, data->time, - data->E_Tot, data->E_Pot, E_CONV * data->E_Kin, data->therm.T, - P, system->box.volume, - system->box.box_norms[0], - system->box.box_norms[1], - system->box.box_norms[2], - 90.0, 90.0, 90.0, // IMPORTANT: need to rewrite for flexible boxes! - data->E_BE, - data->E_Ov, data->E_Un, data->E_Lp, - data->E_Ang, data->E_Pen, data->E_Coa, data->E_HB, - data->E_Tor, data->E_Con, - data->E_vdW, data->E_Ele, data->E_Pol ); + data->step, data->time, + data->E_Tot, data->E_Pot, E_CONV * data->E_Kin, data->therm.T, + P, system->box.volume, + system->box.box_norms[0], + system->box.box_norms[1], + system->box.box_norms[2], + 90.0, 90.0, 90.0, // IMPORTANT: need to rewrite for flexible boxes! + data->E_BE, + data->E_Ov, data->E_Un, data->E_Lp, + data->E_Ang, data->E_Pen, data->E_Coa, data->E_HB, + data->E_Tor, data->E_Con, + data->E_vdW, data->E_Ele, data->E_Pol ); frame_globals_len = strlen( buffer ); - frame_len = frame_globals_len + - write_atoms * SIZE_INFO_LEN3 + system->N * atom_line_len + - write_bonds * SIZE_INFO_LEN3 + num_bonds * bond_line_len + - write_angles * SIZE_INFO_LEN3 + num_thb_intrs * angle_line_len; + frame_len = frame_globals_len + + write_atoms * SIZE_INFO_LEN3 + system->N * atom_line_len + + write_bonds * SIZE_INFO_LEN3 + num_bonds * bond_line_len + + write_angles * SIZE_INFO_LEN3 + num_thb_intrs * angle_line_len; /* write size info & frame globals */ - out_control->write( out_control->trj, SIZE_INFO_LINE2, - frame_len, frame_globals_len ); + out_control->write( out_control->trj, SIZE_INFO_LINE2, + frame_len, frame_globals_len ); out_control->write( out_control->trj, "%s", buffer ); - /* write size info & atom lines */ - if( write_atoms ) + /* write size info & atom lines */ + if ( write_atoms ) { rest_of_frame_len = system->N * atom_line_len + - write_bonds * SIZE_INFO_LEN3 + num_bonds * bond_line_len + - write_angles * SIZE_INFO_LEN3 + num_thb_intrs * angle_line_len; + write_bonds * SIZE_INFO_LEN3 + num_bonds * bond_line_len + + write_angles * SIZE_INFO_LEN3 + num_thb_intrs * angle_line_len; - out_control->write( out_control->trj, SIZE_INFO_LINE3, - rest_of_frame_len, system->N * atom_line_len, - system->N ); + out_control->write( out_control->trj, SIZE_INFO_LINE3, + rest_of_frame_len, system->N * atom_line_len, + system->N ); } - switch( out_control->atom_format ) + switch ( out_control->atom_format ) { - case 4: - for( i = 0; i < system->N; ++i ) - out_control->write( out_control->trj, ATOM_BASIC, - workspace->orig_id[i], - system->atoms[i].x[0], - system->atoms[i].x[1], - system->atoms[i].x[2], - system->atoms[i].q ); - break; - case 5: - for( i = 0; i < system->N; ++i ) - out_control->write( out_control->trj, ATOM_wF, - workspace->orig_id[i], - system->atoms[i].x[0], - system->atoms[i].x[1], - system->atoms[i].x[2], - system->atoms[i].f[0], - system->atoms[i].f[1], - system->atoms[i].f[2], - system->atoms[i].q ); - break; - case 6: - for( i = 0; i < system->N; ++i ) - out_control->write( out_control->trj, ATOM_wV, - workspace->orig_id[i], - system->atoms[i].x[0], - system->atoms[i].x[1], - system->atoms[i].x[2], - system->atoms[i].v[0], - system->atoms[i].v[1], - system->atoms[i].v[2], - system->atoms[i].q ); - break; - case 7: - for( i = 0; i < system->N; ++i ) - out_control->write( out_control->trj, ATOM_FULL, - workspace->orig_id[i], - system->atoms[i].x[0], - system->atoms[i].x[1], - system->atoms[i].x[2], - system->atoms[i].v[0], - system->atoms[i].v[1], - system->atoms[i].v[2], - system->atoms[i].f[0], - system->atoms[i].f[1], - system->atoms[i].f[2], - system->atoms[i].q ); - break; + case 4: + for ( i = 0; i < system->N; ++i ) + out_control->write( out_control->trj, ATOM_BASIC, + workspace->orig_id[i], + system->atoms[i].x[0], + system->atoms[i].x[1], + system->atoms[i].x[2], + system->atoms[i].q ); + break; + case 5: + for ( i = 0; i < system->N; ++i ) + out_control->write( out_control->trj, ATOM_wF, + workspace->orig_id[i], + system->atoms[i].x[0], + system->atoms[i].x[1], + system->atoms[i].x[2], + system->atoms[i].f[0], + system->atoms[i].f[1], + system->atoms[i].f[2], + system->atoms[i].q ); + break; + case 6: + for ( i = 0; i < system->N; ++i ) + out_control->write( out_control->trj, ATOM_wV, + workspace->orig_id[i], + system->atoms[i].x[0], + system->atoms[i].x[1], + system->atoms[i].x[2], + system->atoms[i].v[0], + system->atoms[i].v[1], + system->atoms[i].v[2], + system->atoms[i].q ); + break; + case 7: + for ( i = 0; i < system->N; ++i ) + out_control->write( out_control->trj, ATOM_FULL, + workspace->orig_id[i], + system->atoms[i].x[0], + system->atoms[i].x[1], + system->atoms[i].x[2], + system->atoms[i].v[0], + system->atoms[i].v[1], + system->atoms[i].v[2], + system->atoms[i].f[0], + system->atoms[i].f[1], + system->atoms[i].f[2], + system->atoms[i].q ); + break; } fflush( out_control->trj ); /* write size info & bond lines */ - if( write_bonds ) + if ( write_bonds ) { rest_of_frame_len = num_bonds * bond_line_len + - write_angles * SIZE_INFO_LEN3 + num_thb_intrs * angle_line_len; + write_angles * SIZE_INFO_LEN3 + num_thb_intrs * angle_line_len; - out_control->write( out_control->trj, SIZE_INFO_LINE3, - rest_of_frame_len, num_bonds * bond_line_len, - num_bonds ); + out_control->write( out_control->trj, SIZE_INFO_LINE3, + rest_of_frame_len, num_bonds * bond_line_len, + num_bonds ); } - if( out_control->bond_info == 1 ) { - for( i = 0; i < system->N; ++i ) - for( j = Start_Index( i, bonds ); j < End_Index( i, bonds ); ++j ) - if( i < bonds->select.bond_list[j].nbr && - bonds->select.bond_list[j].bo_data.BO >= control->bg_cut ) { + if ( out_control->bond_info == 1 ) + { + for ( i = 0; i < system->N; ++i ) + { + for ( j = Start_Index( i, bonds ); j < End_Index( i, bonds ); ++j ) + { + if ( i < bonds->select.bond_list[j].nbr && + bonds->select.bond_list[j].bo_data.BO >= control->bg_cut ) + { bo_ij = &( bonds->select.bond_list[j] ); - out_control->write( out_control->trj, BOND_BASIC, - workspace->orig_id[i], - workspace->orig_id[bo_ij->nbr], - bo_ij->d, bo_ij->bo_data.BO ); + out_control->write( out_control->trj, BOND_BASIC, + workspace->orig_id[i], + workspace->orig_id[bo_ij->nbr], + bo_ij->d, bo_ij->bo_data.BO ); } + } + } } - else if( out_control->bond_info == 2 ) { - for( i = 0; i < system->N; ++i ) - for( j = Start_Index( i, bonds ); j < End_Index( i, bonds ); ++j ) - if( i < bonds->select.bond_list[j].nbr && - bonds->select.bond_list[j].bo_data.BO >= control->bg_cut ) { + else if ( out_control->bond_info == 2 ) + { + for ( i = 0; i < system->N; ++i ) + { + for ( j = Start_Index( i, bonds ); j < End_Index( i, bonds ); ++j ) + { + if ( i < bonds->select.bond_list[j].nbr && + bonds->select.bond_list[j].bo_data.BO >= control->bg_cut ) + { bo_ij = &( bonds->select.bond_list[j] ); - out_control->write( out_control->trj, BOND_FULL, - workspace->orig_id[i], - workspace->orig_id[bo_ij->nbr], - bo_ij->d, bo_ij->bo_data.BO, bo_ij->bo_data.BO_s, - bo_ij->bo_data.BO_pi, bo_ij->bo_data.BO_pi2 ); + out_control->write( out_control->trj, BOND_FULL, + workspace->orig_id[i], + workspace->orig_id[bo_ij->nbr], + bo_ij->d, bo_ij->bo_data.BO, bo_ij->bo_data.BO_s, + bo_ij->bo_data.BO_pi, bo_ij->bo_data.BO_pi2 ); } + } + } } fflush( out_control->trj ); /* write size info & angle lines */ - if( out_control->angle_info ) { + if ( out_control->angle_info ) + { out_control->write( out_control->trj, SIZE_INFO_LINE3, - num_thb_intrs * angle_line_len, - num_thb_intrs * angle_line_len, num_thb_intrs ); + num_thb_intrs * angle_line_len, + num_thb_intrs * angle_line_len, num_thb_intrs ); - for( j = 0; j < system->N; ++j ) - for( pi = Start_Index(j, bonds); pi < End_Index(j, bonds); ++pi ) - if( bonds->select.bond_list[pi].bo_data.BO >= control->bg_cut ) + for ( j = 0; j < system->N; ++j ) + { + for ( pi = Start_Index(j, bonds); pi < End_Index(j, bonds); ++pi ) + { + if ( bonds->select.bond_list[pi].bo_data.BO >= control->bg_cut ) + { // physical j&i bond - for( pk = Start_Index( pi, thb_intrs ); + for ( pk = Start_Index( pi, thb_intrs ); pk < End_Index( pi, thb_intrs ); ++pk ) - if( bonds->select.bond_list[pi].nbr < - thb_intrs->select.three_body_list[pk].thb ) { - pk_j = thb_intrs->select.three_body_list[pk].pthb; + { + if ( bonds->select.bond_list[pi].nbr < + thb_intrs->select.three_body_list[pk].thb ) + { + pk_j = thb_intrs->select.three_body_list[pk].pthb; // get k's pointer on j's bond list - if( bonds->select.bond_list[pk_j].bo_data.BO >= control->bg_cut ) + if ( bonds->select.bond_list[pk_j].bo_data.BO >= control->bg_cut ) + { // physical j&k bond out_control->write( out_control->trj, ANGLE_BASIC, - workspace->orig_id[bonds->select.bond_list[pi].nbr], - workspace->orig_id[j], - workspace->orig_id[thb_intrs->select.three_body_list[pk].thb], - RAD2DEG(thb_intrs->select.three_body_list[pk].theta) ); + workspace->orig_id[bonds->select.bond_list[pi].nbr], + workspace->orig_id[j], + workspace->orig_id[thb_intrs->select.three_body_list[pk].thb], + RAD2DEG(thb_intrs->select.three_body_list[pk].theta) ); + } } + } + } + } + } } fflush( out_control->trj ); @@ -445,45 +505,47 @@ int Append_Custom_Frame( reax_system *system, control_params *control, return 0; } -/* - void Read_Traj( output_controls *out_control, char *traj_name ) - { - int skip_all, skip_part, n; - char size_buffer[50]; -// char read_buffer[2048]; -out_control->trj = (FILE *)gzopen( traj_name, "r" ); +void Read_Traj( output_controls *out_control, char *traj_name ) +{ + int skip_all, skip_part, n; + char size_buffer[50]; -fprintf( stderr, "file opened!\n" ); + out_control->trj = gzopen( traj_name, "r" ); -while( !gzeof( out_control->trj ) ) -{ -if( gzgets( out_control->trj, size_buffer, 50 ) == Z_NULL ) -break; + fprintf( stderr, "file opened!\n" ); + + while ( !gzeof( out_control->trj ) ) + { + if ( gzgets( out_control->trj, size_buffer, 50 ) == Z_NULL ) + { + break; + } -fprintf( stderr, "read line\n" ); + fprintf( stderr, "read line\n" ); -if( strlen( size_buffer ) >= SIZE_INFO_LEN3 ) -sscanf( size_buffer, "%d %d %d", &skip_all, &skip_part, &n ); -else -sscanf( size_buffer, "%d %d", &skip_all, &skip_part ); + if ( strlen( size_buffer ) >= SIZE_INFO_LEN3 ) + { + sscanf( size_buffer, "%d %d %d", &skip_all, &skip_part, &n ); + } + else + { + sscanf( size_buffer, "%d %d", &skip_all, &skip_part ); + } -fprintf( stderr, "%d %d\n", skip_all, skip_part ); + fprintf( stderr, "%d %d\n", skip_all, skip_part ); -gzseek( out_control->trj, skip_part, SEEK_CUR ); -} + gzseek( out_control->trj, skip_part, SEEK_CUR ); + } -gzclose( out_control->trj ); + gzclose( out_control->trj ); } - */ - /********************************************************/ /************ XYZ FORMAT ROUTINES ***************/ /********************************************************/ - -int Write_xyz_Header( reax_system *system, control_params *control, +int Write_xyz_Header( reax_system *system, control_params *control, static_storage* workspace, output_controls *out_control ) { fflush( out_control->trj ); @@ -492,25 +554,27 @@ int Write_xyz_Header( reax_system *system, control_params *control, } -int Append_xyz_Frame( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) +int Append_xyz_Frame( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, list **lists, + output_controls *out_control ) { int i; out_control->write( out_control->trj, "%d\n", system->N ); out_control->write( out_control->trj, "%d\t%8.3f\t%8.3f\t%8.3f\t%8.3f\n", - data->step, - data->E_Tot, data->E_Pot, - E_CONV*data->E_Kin, data->therm.T ); + data->step, + data->E_Tot, data->E_Pot, + E_CONV * data->E_Kin, data->therm.T ); - for( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N; ++i ) + { out_control->write( out_control->trj, "%3s %10.5f %10.5f %10.5f\n", - system->reaxprm.sbp[ system->atoms[i].type ].name, - system->atoms[i].x[0], - system->atoms[i].x[1], - system->atoms[i].x[2] ); + system->reaxprm.sbp[ system->atoms[i].type ].name, + system->atoms[i].x[0], + system->atoms[i].x[1], + system->atoms[i].x[2] ); + } fflush( out_control->trj ); diff --git a/PuReMD-GPU/src/traj.h b/PuReMD-GPU/src/traj.h index 35d92602eee7c2d0b5ee83889623df2cb2106c71..200f67711e60285f67f32ecb238f81e95d3f9b0d 100644 --- a/PuReMD-GPU/src/traj.h +++ b/PuReMD-GPU/src/traj.h @@ -25,6 +25,7 @@ #include <zlib.h> + #define BLOCK_MARK "REAX_BLOCK_MARK " #define BLOCK_MARK_LEN 16 @@ -74,11 +75,27 @@ #define SIZE_INFO_LEN3 33 -enum ATOM_LINE_OPTS {OPT_NOATOM = 0, OPT_ATOM_BASIC = 4, OPT_ATOM_wF = 5, - OPT_ATOM_wV = 6, OPT_ATOM_FULL = 7 - }; -enum BOND_LINE_OPTS {OPT_NOBOND, OPT_BOND_BASIC, OPT_BOND_FULL}; -enum ANGLE_LINE_OPTS {OPT_NOANGLE, OPT_ANGLE_BASIC}; +enum ATOM_LINE_OPTS +{ + OPT_NOATOM = 0, + OPT_ATOM_BASIC = 4, + OPT_ATOM_wF = 5, + OPT_ATOM_wV = 6, + OPT_ATOM_FULL = 7, +}; + +enum BOND_LINE_OPTS +{ + OPT_NOBOND = 0, + OPT_BOND_BASIC = 1, + OPT_BOND_FULL = 2, +}; + +enum ANGLE_LINE_OPTS +{ + OPT_NOANGLE = 0, + OPT_ANGLE_BASIC = 1, +}; struct @@ -143,10 +160,8 @@ int Skip_Next_Block( gzFile, int*); No. of torsion entries (int) Torsion info lines as per torsion format. */ -int Write_Custom_Header( reax_system*, control_params*, - static_storage*, output_controls* ); -int Write_xyz_Header ( reax_system*, control_params*, - static_storage*, output_controls* ); +int Write_Custom_Header( reax_system*, control_params*, static_storage*, output_controls* ); +int Write_xyz_Header ( reax_system*, control_params*, static_storage*, output_controls* ); /* Write_Traj_Header( gzfile file, @@ -168,7 +183,7 @@ char Write_Traj_Header( FILE*, int, char**, char**, control_params* ); char** various flags); */ int Push_Traj_Frame( /*gzfile*/ FILE*, reax_system*, control_params*, - simulation_data*, static_storage*, list**, char** ); + simulation_data*, static_storage*, list**, char** ); /* Append_Traj_Frame( gzfile file, @@ -180,11 +195,11 @@ int Push_Traj_Frame( /*gzfile*/ FILE*, reax_system*, control_params*, char** various flags); */ int Append_Custom_Frame( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); + static_storage*, list**, output_controls* ); int Append_xyz_Frame ( reax_system*, control_params*, simulation_data*, - static_storage*, list**, output_controls* ); - + static_storage*, list**, output_controls* ); void Read_Traj( output_controls*, char * ); + #endif diff --git a/PuReMD-GPU/src/two_body_interactions.c b/PuReMD-GPU/src/two_body_interactions.c index 2e7a6daf9039ea26c22b2fcfda5913e46255ad75..d5b53a05e607d043ba5f59f96f51a1065438c1d4 100644 --- a/PuReMD-GPU/src/two_body_interactions.c +++ b/PuReMD-GPU/src/two_body_interactions.c @@ -1,19 +1,20 @@ /*---------------------------------------------------------------------- - PuReMD-GPU - Reax Force Field Simulator + SerialReax - Reax Force Field Simulator - Copyright (2014) Purdue University - Sudhir Kylasa, skylasa@purdue.edu + Copyright (2010) Purdue University Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu Ananth Y Grama, ayg@cs.purdue.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ @@ -21,14 +22,14 @@ #include "two_body_interactions.h" #include "bond_orders.h" +#include "index_utils.h" #include "list.h" #include "lookup.h" #include "vector.h" -#include "index_utils.h" -void Bond_Energy( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, +void Bond_Energy( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control ) { int i, j, pj; @@ -50,12 +51,14 @@ void Bond_Energy( reax_system *system, control_params *control, gp10 = system->reaxprm.gp.l[10]; gp37 = (int) system->reaxprm.gp.l[37]; - for( i=0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { start_i = Start_Index(i, bonds); end_i = End_Index(i, bonds); //fprintf( stderr, "i=%d start=%d end=%d\n", i, start_i, end_i ); - for( pj = start_i; pj < end_i; ++pj ) - if( i < bonds->select.bond_list[pj].nbr ) { + for ( pj = start_i; pj < end_i; ++pj ) + if ( i < bonds->select.bond_list[pj].nbr ) + { /* set the pointers */ j = bonds->select.bond_list[pj].nbr; type_i = system->atoms[i].type; @@ -68,15 +71,12 @@ void Bond_Energy( reax_system *system, control_params *control, /* calculate the constants */ pow_BOs_be2 = POW( bo_ij->BO_s, twbp->p_be2 ); exp_be12 = EXP( twbp->p_be1 * ( 1.0 - pow_BOs_be2 ) ); - CEbo = -twbp->De_s * exp_be12 * - ( 1.0 - twbp->p_be1 * twbp->p_be2 * pow_BOs_be2 ); + CEbo = -twbp->De_s * exp_be12 * + ( 1.0 - twbp->p_be1 * twbp->p_be2 * pow_BOs_be2 ); /* calculate the Bond Energy */ - ebond = - -twbp->De_s * bo_ij->BO_s * exp_be12 - -twbp->De_p * bo_ij->BO_pi - -twbp->De_pp * bo_ij->BO_pi2; - + ebond = -twbp->De_s * bo_ij->BO_s * exp_be12 + - twbp->De_p * bo_ij->BO_pi - twbp->De_pp * bo_ij->BO_pi2; data->E_BE += ebond; /* calculate derivatives of Bond Orders */ @@ -85,34 +85,36 @@ void Bond_Energy( reax_system *system, control_params *control, bo_ij->Cdbopi2 -= (CEbo + twbp->De_pp); #ifdef TEST_ENERGY - fprintf( out_control->ebond, "%6d%6d%24.15e%24.15e\n", - workspace->orig_id[i], workspace->orig_id[j], - // i+1, j+1, - bo_ij->BO, ebond/*, data->E_BE*/ ); - /* fprintf( out_control->ebond, "%6d%6d%12.6f%12.6f%12.6f\n", - workspace->orig_id[i], workspace->orig_id[j], + fprintf( out_control->ebond, "%6d%6d%24.15e%24.15e\n", + workspace->orig_id[i], workspace->orig_id[j], + // i+1, j+1, + bo_ij->BO, ebond/*, data->E_BE*/ ); + /* fprintf( out_control->ebond, "%6d%6d%12.6f%12.6f%12.6f\n", + workspace->orig_id[i], workspace->orig_id[j], CEbo, -twbp->De_p, -twbp->De_pp );*/ #endif #ifdef TEST_FORCES Add_dBO( system, lists, i, pj, CEbo, workspace->f_be ); - Add_dBOpinpi2( system, lists, i, pj, - -(CEbo + twbp->De_p), -(CEbo + twbp->De_pp), - workspace->f_be, workspace->f_be ); + Add_dBOpinpi2( system, lists, i, pj, + -(CEbo + twbp->De_p), -(CEbo + twbp->De_pp), + workspace->f_be, workspace->f_be ); #endif /* Stabilisation terminal triple bond */ - if( bo_ij->BO >= 1.00 ) { - if( gp37 == 2 || - (sbp_i->mass == 12.0000 && sbp_j->mass == 15.9990) || - (sbp_j->mass == 12.0000 && sbp_i->mass == 15.9990) ) { + if ( bo_ij->BO >= 1.00 ) + { + if ( gp37 == 2 || + (sbp_i->mass == 12.0000 && sbp_j->mass == 15.9990) || + (sbp_j->mass == 12.0000 && sbp_i->mass == 15.9990) ) + { // ba = SQR(bo_ij->BO - 2.50); exphu = EXP( -gp7 * SQR(bo_ij->BO - 2.50) ); //oboa=abo(j1)-boa; //obob=abo(j2)-boa; - exphua1 = EXP(-gp3*(workspace->total_bond_order[i]-bo_ij->BO)); - exphub1 = EXP(-gp3*(workspace->total_bond_order[j]-bo_ij->BO)); + exphua1 = EXP(-gp3 * (workspace->total_bond_order[i] - bo_ij->BO)); + exphub1 = EXP(-gp3 * (workspace->total_bond_order[j] - bo_ij->BO)); //ovoab=abo(j1)-aval(it1)+abo(j2)-aval(it2); - exphuov = EXP(gp4*(workspace->Delta[i] + workspace->Delta[j])); + exphuov = EXP(gp4 * (workspace->Delta[i] + workspace->Delta[j])); hulpov = 1.0 / (1.0 + 25.0 * exphuov); estriph = gp10 * exphu * hulpov * (exphua1 + exphub1); @@ -120,24 +122,22 @@ void Bond_Energy( reax_system *system, control_params *control, //estrain(j2) = estrain(j2) + 0.50*estriph; data->E_BE += estriph; - decobdbo = gp10 * exphu * hulpov * (exphua1 + exphub1) * - ( gp3 - 2.0 * gp7 * (bo_ij->BO-2.50) ); - decobdboua = -gp10 * exphu * hulpov * - (gp3*exphua1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1)); - decobdboub = -gp10 * exphu * hulpov * - (gp3*exphub1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1)); + decobdbo = gp10 * exphu * hulpov * (exphua1 + exphub1) * + ( gp3 - 2.0 * gp7 * (bo_ij->BO - 2.50) ); + decobdboua = -gp10 * exphu * hulpov * + (gp3 * exphua1 + 25.0 * gp4 * exphuov * hulpov * (exphua1 + exphub1)); + decobdboub = -gp10 * exphu * hulpov * + (gp3 * exphub1 + 25.0 * gp4 * exphuov * hulpov * (exphua1 + exphub1)); bo_ij->Cdbo += decobdbo; workspace->CdDelta[i] += decobdboua; workspace->CdDelta[j] += decobdboub; - //loop_j ++; - //fprintf (stderr, "incrementing loopj %d \n", loop_j); #ifdef TEST_ENERGY - fprintf( out_control->ebond, - "%6d%6d%24.15e%24.15e%24.15e%24.15e\n", - workspace->orig_id[i], workspace->orig_id[j], - //i+1, j+1, - estriph, decobdbo, decobdboua, decobdboub ); + fprintf( out_control->ebond, + "%6d%6d%24.15e%24.15e%24.15e%24.15e\n", + workspace->orig_id[i], workspace->orig_id[j], + //i+1, j+1, + estriph, decobdbo, decobdboua, decobdboub ); #endif #ifdef TEST_FORCES Add_dBO( system, lists, i, pj, decobdbo, workspace->f_be ); @@ -151,9 +151,9 @@ void Bond_Energy( reax_system *system, control_params *control, } -void vdW_Coulomb_Energy( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - list **lists, output_controls *out_control ) +void vdW_Coulomb_Energy( reax_system *system, control_params *control, + simulation_data *data, static_storage *workspace, list **lists, + output_controls *out_control ) { int i, j, pj; int start_i, end_i; @@ -172,20 +172,22 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, p_vdW1 = system->reaxprm.gp.l[28]; p_vdW1i = 1.0 / p_vdW1; - far_nbrs = (*lists) + FAR_NBRS; + far_nbrs = (*lists) + FAR_NBRS; e_ele = 0; e_vdW = 0; e_core = 0; de_core = 0; - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { start_i = Start_Index(i, far_nbrs); end_i = End_Index(i, far_nbrs); // fprintf( stderr, "i: %d, start: %d, end: %d\n", // i, start_i, end_i ); - for( pj = start_i; pj < end_i; ++pj ) - if( far_nbrs->select.far_nbr_list[pj].d <= control->r_cut ) { + for ( pj = start_i; pj < end_i; ++pj ) + if ( far_nbrs->select.far_nbr_list[pj].d <= control->r_cut ) + { nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); j = nbr_pj->nbr; r_ij = nbr_pj->d; @@ -202,15 +204,16 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, Tap = Tap * r_ij + control->Tap1; Tap = Tap * r_ij + control->Tap0; - dTap = 7*control->Tap7 * r_ij + 6*control->Tap6; - dTap = dTap * r_ij + 5*control->Tap5; - dTap = dTap * r_ij + 4*control->Tap4; - dTap = dTap * r_ij + 3*control->Tap3; - dTap = dTap * r_ij + 2*control->Tap2; - dTap += control->Tap1/r_ij; + dTap = 7 * control->Tap7 * r_ij + 6 * control->Tap6; + dTap = dTap * r_ij + 5 * control->Tap5; + dTap = dTap * r_ij + 4 * control->Tap4; + dTap = dTap * r_ij + 3 * control->Tap3; + dTap = dTap * r_ij + 2 * control->Tap2; + dTap += control->Tap1 / r_ij; /*vdWaals Calculations*/ - if(system->reaxprm.gp.vdw_type==1 || system->reaxprm.gp.vdw_type==3) { + if (system->reaxprm.gp.vdw_type == 1 || system->reaxprm.gp.vdw_type == 3) + { // shielding powr_vdW1 = POW(r_ij, p_vdW1); powgi_vdW1 = POW( 1.0 / twbp->gamma_w, p_vdW1); @@ -219,35 +222,37 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, exp1 = EXP( twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); exp2 = EXP( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); - data->E_vdW += e_vdW = - self_coef * Tap * twbp->D * (exp1 - 2.0 * exp2); + data->E_vdW += e_vdW = + self_coef * Tap * twbp->D * (exp1 - 2.0 * exp2); - dfn13 = POW( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) * - POW(r_ij, p_vdW1 - 2.0); + dfn13 = POW( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) * + POW(r_ij, p_vdW1 - 2.0); - CEvd = self_coef * ( dTap * twbp->D * (exp1 - 2 * exp2) - - Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * - (exp1 - exp2) * dfn13 ); + CEvd = self_coef * ( dTap * twbp->D * (exp1 - 2 * exp2) - + Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * + (exp1 - exp2) * dfn13 ); } - else{ // no shielding + else // no shielding + { exp1 = EXP( twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); exp2 = EXP( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); - data->E_vdW += e_vdW = - self_coef * Tap * twbp->D * (exp1 - 2.0 * exp2); + data->E_vdW += e_vdW = + self_coef * Tap * twbp->D * (exp1 - 2.0 * exp2); - CEvd = self_coef * ( dTap * twbp->D * (exp1 - 2.0 * exp2) - - Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * - (exp1 - exp2) ); + CEvd = self_coef * ( dTap * twbp->D * (exp1 - 2.0 * exp2) - + Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * + (exp1 - exp2) ); } - if(system->reaxprm.gp.vdw_type==2 || system->reaxprm.gp.vdw_type==3) { + if (system->reaxprm.gp.vdw_type == 2 || system->reaxprm.gp.vdw_type == 3) + { // innner wall - e_core = twbp->ecore * EXP(twbp->acore * (1.0-(r_ij/twbp->rcore))); + e_core = twbp->ecore * EXP(twbp->acore * (1.0 - (r_ij / twbp->rcore))); e_vdW += self_coef * Tap * e_core; data->E_vdW += self_coef * Tap * e_core; - de_core = -(twbp->acore/twbp->rcore) * e_core; + de_core = -(twbp->acore / twbp->rcore) * e_core; CEvd += self_coef * ( dTap * e_core + Tap * de_core ); } @@ -257,24 +262,26 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, tmp = Tap / dr3gamij_3; //tmp = Tap * nbr_pj->inv_dr3gamij_3; -- precomputed during compte_H - data->E_Ele += e_ele = - self_coef * C_ele * system->atoms[i].q * system->atoms[j].q * tmp; + data->E_Ele += e_ele = + self_coef * C_ele * system->atoms[i].q * system->atoms[j].q * tmp; CEclmb = self_coef * C_ele * system->atoms[i].q * system->atoms[j].q * - ( dTap - Tap * r_ij / dr3gamij_1 ) / dr3gamij_3; - /*CEclmb = self_coef*C_ele*system->atoms[i].q*system->atoms[j].q* + ( dTap - Tap * r_ij / dr3gamij_1 ) / dr3gamij_3; + /*CEclmb = self_coef*C_ele*system->atoms[i].q*system->atoms[j].q* ( dTap- Tap*r_ij*nbr_pj->inv_dr3gamij_1 )*nbr_pj->inv_dr3gamij_3;*/ - if( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT) { - rvec_ScaledAdd( system->atoms[i].f, - -(CEvd+CEclmb), nbr_pj->dvec ); - rvec_ScaledAdd( system->atoms[j].f, - +(CEvd+CEclmb), nbr_pj->dvec ); + if ( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT ) + { + rvec_ScaledAdd( system->atoms[i].f, + -(CEvd + CEclmb), nbr_pj->dvec ); + rvec_ScaledAdd( system->atoms[j].f, + +(CEvd + CEclmb), nbr_pj->dvec ); } - else { // NPT, iNPT or sNPT - /* for pressure coupling, terms not related to bond order + else // NPT, iNPT or sNPT + { + /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); @@ -284,47 +291,47 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, rvec_iMultiply( ext_press, nbr_pj->rel_box, temp ); rvec_Add( data->ext_press, ext_press ); - /*fprintf( stderr, "nonbonded(%d,%d): rel_box (%f %f %f)", + /*fprintf( stderr, "nonbonded(%d,%d): rel_box (%f %f %f)", i,j,nbr_pj->rel_box[0],nbr_pj->rel_box[1],nbr_pj->rel_box[2] ); fprintf( stderr, "force(%f %f %f)", temp[0], temp[1], temp[2] ); - fprintf( stderr, "ext_press (%12.6f %12.6f %12.6f)\n", + fprintf( stderr, "ext_press (%12.6f %12.6f %12.6f)\n", data->ext_press[0], data->ext_press[1], data->ext_press[2] );*/ - /* This part is intended for a fully-flexible box */ - /* rvec_OuterProduct( temp_rtensor, nbr_pj->dvec, + /* This part is intended for a fully-flexible box */ + /* rvec_OuterProduct( temp_rtensor, nbr_pj->dvec, system->atoms[i].x ); - rtensor_Scale( total_rtensor, + rtensor_Scale( total_rtensor, F_C * -(CEvd + CEclmb), temp_rtensor ); - rvec_OuterProduct( temp_rtensor, + rvec_OuterProduct( temp_rtensor, nbr_pj->dvec, system->atoms[j].x ); - rtensor_ScaledAdd( total_rtensor, + rtensor_ScaledAdd( total_rtensor, F_C * +(CEvd + CEclmb), temp_rtensor ); if( nbr_pj->imaginary ) - // This is an external force due to an imaginary nbr - rtensor_ScaledAdd( data->flex_bar.P, -1.0, total_rtensor ); - else - // This interaction is completely internal - rtensor_Add( data->flex_bar.P, total_rtensor ); */ + // This is an external force due to an imaginary nbr + rtensor_ScaledAdd( data->flex_bar.P, -1.0, total_rtensor ); + else + // This interaction is completely internal + rtensor_Add( data->flex_bar.P, total_rtensor ); */ } #ifdef TEST_ENERGY rvec_MakeZero( temp ); rvec_ScaledAdd( temp, +CEvd, nbr_pj->dvec ); fprintf( out_control->evdw, - "%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n", - //i+1, j+1, - MIN( workspace->orig_id[i], workspace->orig_id[j] ), - MAX( workspace->orig_id[i], workspace->orig_id[j] ), - r_ij, e_vdW, temp[0], temp[1], temp[2]/*, data->E_vdW*/ ); + "%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n", + //i+1, j+1, + MIN( workspace->orig_id[i], workspace->orig_id[j] ), + MAX( workspace->orig_id[i], workspace->orig_id[j] ), + r_ij, e_vdW, temp[0], temp[1], temp[2]/*, data->E_vdW*/ ); fprintf( out_control->ecou, "%6d%6d%24.15e%24.15e%24.15e%24.15e\n", - MIN( workspace->orig_id[i], workspace->orig_id[j] ), - MAX( workspace->orig_id[i], workspace->orig_id[j] ), - r_ij, system->atoms[i].q, system->atoms[j].q, - e_ele/*, data->E_Ele*/ ); + MIN( workspace->orig_id[i], workspace->orig_id[j] ), + MAX( workspace->orig_id[i], workspace->orig_id[j] ), + r_ij, system->atoms[i].q, system->atoms[j].q, + e_ele/*, data->E_Ele*/ ); #endif #ifdef TEST_FORCES rvec_ScaledAdd( workspace->f_vdw[i], -CEvd, nbr_pj->dvec ); @@ -337,13 +344,13 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, // fclose( fout ); - // fprintf( stderr, "nonbonded: ext_press (%24.15e %24.15e %24.15e)\n", + // fprintf( stderr, "nonbonded: ext_press (%24.15e %24.15e %24.15e)\n", // data->ext_press[0], data->ext_press[1], data->ext_press[2] ); } -void LR_vdW_Coulomb( reax_system *system, control_params *control, - int i, int j, real r_ij, LR_data *lr ) +void LR_vdW_Coulomb( reax_system *system, control_params *control, + int i, int j, real r_ij, LR_data *lr ) { real p_vdW1 = system->reaxprm.gp.l[28]; real p_vdW1i = 1.0 / p_vdW1; @@ -367,12 +374,12 @@ void LR_vdW_Coulomb( reax_system *system, control_params *control, Tap = Tap * r_ij + control->Tap1; Tap = Tap * r_ij + control->Tap0; - dTap = 7*control->Tap7 * r_ij + 6*control->Tap6; - dTap = dTap * r_ij + 5*control->Tap5; - dTap = dTap * r_ij + 4*control->Tap4; - dTap = dTap * r_ij + 3*control->Tap3; - dTap = dTap * r_ij + 2*control->Tap2; - dTap += control->Tap1/r_ij; + dTap = 7 * control->Tap7 * r_ij + 6 * control->Tap6; + dTap = dTap * r_ij + 5 * control->Tap5; + dTap = dTap * r_ij + 4 * control->Tap4; + dTap = dTap * r_ij + 3 * control->Tap3; + dTap = dTap * r_ij + 2 * control->Tap2; + dTap += control->Tap1 / r_ij; /* vdWaals calculations */ @@ -383,20 +390,21 @@ void LR_vdW_Coulomb( reax_system *system, control_params *control, exp1 = EXP( twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); exp2 = EXP( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); - lr->e_vdW = Tap * twbp->D * (exp1 - 2.0 * exp2); + lr->e_vdW = Tap * twbp->D * (exp1 - 2.0 * exp2); /* fprintf(stderr,"vdW: Tap:%f, r: %f, f13:%f, D:%f, Energy:%f,\ -Gamma_w:%f, p_vdw: %f, alpha: %f, r_vdw: %f, %lf %lf\n", -Tap, r_ij, fn13, twbp->D, Tap * twbp->D * (exp1 - 2.0 * exp2), -powgi_vdW1, p_vdW1, twbp->alpha, twbp->r_vdW, exp1, exp2); */ + Gamma_w:%f, p_vdw: %f, alpha: %f, r_vdw: %f, %lf %lf\n", + Tap, r_ij, fn13, twbp->D, Tap * twbp->D * (exp1 - 2.0 * exp2), + powgi_vdW1, p_vdW1, twbp->alpha, twbp->r_vdW, exp1, exp2); */ dfn13 = POW( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) * POW(r_ij, p_vdW1 - 2.0); - lr->CEvd = dTap * twbp->D * (exp1 - 2 * exp2) - - Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13; + lr->CEvd = dTap * twbp->D * (exp1 - 2 * exp2) - + Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13; /*vdWaals Calculations*/ - if(system->reaxprm.gp.vdw_type==1 || system->reaxprm.gp.vdw_type==3) - { // shielding + if (system->reaxprm.gp.vdw_type == 1 || system->reaxprm.gp.vdw_type == 3) + { + // shielding powr_vdW1 = POW(r_ij, p_vdW1); powgi_vdW1 = POW( 1.0 / twbp->gamma_w, p_vdW1); @@ -404,30 +412,32 @@ powgi_vdW1, p_vdW1, twbp->alpha, twbp->r_vdW, exp1, exp2); */ exp1 = EXP( twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); exp2 = EXP( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); - lr->e_vdW = Tap * twbp->D * (exp1 - 2.0 * exp2); + lr->e_vdW = Tap * twbp->D * (exp1 - 2.0 * exp2); - dfn13 = POW( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) * - POW(r_ij, p_vdW1 - 2.0); + dfn13 = POW( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) * + POW(r_ij, p_vdW1 - 2.0); - lr->CEvd = dTap * twbp->D * (exp1 - 2.0 * exp2) - - Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13; + lr->CEvd = dTap * twbp->D * (exp1 - 2.0 * exp2) - + Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13; } - else{ // no shielding + else // no shielding + { exp1 = EXP( twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); exp2 = EXP( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); lr->e_vdW = Tap * twbp->D * (exp1 - 2.0 * exp2); - lr->CEvd = dTap * twbp->D * (exp1 - 2.0 * exp2) - - Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2); + lr->CEvd = dTap * twbp->D * (exp1 - 2.0 * exp2) - + Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2); } - if(system->reaxprm.gp.vdw_type==2 || system->reaxprm.gp.vdw_type==3) - { // innner wall - e_core = twbp->ecore * EXP(twbp->acore * (1.0-(r_ij/twbp->rcore))); + if (system->reaxprm.gp.vdw_type == 2 || system->reaxprm.gp.vdw_type == 3) + { + // innner wall + e_core = twbp->ecore * EXP(twbp->acore * (1.0 - (r_ij / twbp->rcore))); lr->e_vdW += Tap * e_core; - de_core = -(twbp->acore/twbp->rcore) * e_core; + de_core = -(twbp->acore / twbp->rcore) * e_core; lr->CEvd += dTap * e_core + Tap * de_core; } @@ -439,10 +449,10 @@ powgi_vdW1, p_vdW1, twbp->alpha, twbp->r_vdW, exp1, exp2); */ lr->H = EV_to_KCALpMOL * tmp; lr->e_ele = C_ele * tmp; /* fprintf( stderr,"i:%d(%d), j:%d(%d), gamma:%f,\ -Tap:%f, dr3gamij_3:%f, qi: %f, qj: %f\n", -i, system->atoms[i].type, j, system->atoms[j].type, -twbp->gamma, Tap, dr3gamij_3, -system->atoms[i].q, system->atoms[j].q ); */ + Tap:%f, dr3gamij_3:%f, qi: %f, qj: %f\n", + i, system->atoms[i].type, j, system->atoms[j].type, + twbp->gamma, Tap, dr3gamij_3, + system->atoms[i].q, system->atoms[j].q ); */ lr->CEclmb = C_ele * ( dTap - Tap * r_ij / dr3gamij_1 ) / dr3gamij_3; /* fprintf( stdout, "%d %d\t%g\t%g %g\t%g %g\t%g %g\n", @@ -454,10 +464,9 @@ system->atoms[i].q, system->atoms[j].q ); */ } -void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, - simulation_data *data, - static_storage *workspace, list **lists, - output_controls *out_control ) +void Tabulated_vdW_Coulomb_Energy( reax_system *system, + control_params *control, simulation_data *data, static_storage *workspace, + list **lists, output_controls *out_control ) { int i, j, pj, r, steps, update_freq, update_energies; int type_i, type_j, tmin, tmax; @@ -474,13 +483,16 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, update_freq = out_control->energy_update_freq; update_energies = update_freq > 0 && steps % update_freq == 0; - for( i = 0; i < system->N; ++i ) { + for ( i = 0; i < system->N; ++i ) + { type_i = system->atoms[i].type; - start_i = Start_Index(i,far_nbrs); - end_i = End_Index(i,far_nbrs); + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); - for( pj = start_i; pj < end_i; ++pj ) - if( far_nbrs->select.far_nbr_list[pj].d <= control->r_cut ) { + for ( pj = start_i; pj < end_i; ++pj ) + { + if ( far_nbrs->select.far_nbr_list[pj].d <= control->r_cut ) + { nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); j = nbr_pj->nbr; type_j = system->atoms[j].type; @@ -488,43 +500,46 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, self_coef = (i == j) ? 0.5 : 1.0; tmin = MIN( type_i, type_j ); tmax = MAX( type_i, type_j ); - t = &( LR[ index_lr (tmin,tmax,system->reaxprm.num_atom_types) ] ); + t = &( LR[ index_lr(tmin,tmax,system->reaxprm.num_atom_types) ] ); /* Cubic Spline Interpolation */ r = (int)(r_ij * t->inv_dx); - if( r == 0 ) ++r; - base = (real)(r+1) * t->dx; + if ( r == 0 ) ++r; + base = (real)(r + 1) * t->dx; dif = r_ij - base; //fprintf(stderr, "r: %f, i: %d, base: %f, dif: %f\n", r, i, base, dif); - if( update_energies ) { - e_vdW = ((t->vdW[r].d*dif + t->vdW[r].c)*dif + t->vdW[r].b)*dif + - t->vdW[r].a; + if ( update_energies ) + { + e_vdW = ((t->vdW[r].d * dif + t->vdW[r].c) * dif + t->vdW[r].b) * dif + + t->vdW[r].a; e_vdW *= self_coef; - e_ele = ((t->ele[r].d*dif + t->ele[r].c)*dif + t->ele[r].b)*dif + - t->ele[r].a; + e_ele = ((t->ele[r].d * dif + t->ele[r].c) * dif + t->ele[r].b) * dif + + t->ele[r].a; e_ele *= self_coef * system->atoms[i].q * system->atoms[j].q; data->E_vdW += e_vdW; data->E_Ele += e_ele; - } + } - CEvd = ((t->CEvd[r].d*dif + t->CEvd[r].c)*dif + t->CEvd[r].b)*dif + - t->CEvd[r].a; + CEvd = ((t->CEvd[r].d * dif + t->CEvd[r].c) * dif + t->CEvd[r].b) * dif + + t->CEvd[r].a; CEvd *= self_coef; //CEvd = (3*t->vdW[r].d*dif + 2*t->vdW[r].c)*dif + t->vdW[r].b; - CEclmb = ((t->CEclmb[r].d*dif+t->CEclmb[r].c)*dif+t->CEclmb[r].b)*dif + - t->CEclmb[r].a; + CEclmb = ((t->CEclmb[r].d * dif + t->CEclmb[r].c) * dif + t->CEclmb[r].b) * dif + + t->CEclmb[r].a; CEclmb *= self_coef * system->atoms[i].q * system->atoms[j].q; - if( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT) { + if ( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT) + { rvec_ScaledAdd( system->atoms[i].f, -(CEvd + CEclmb), nbr_pj->dvec ); rvec_ScaledAdd( system->atoms[j].f, +(CEvd + CEclmb), nbr_pj->dvec ); } - else { // NPT, iNPT or sNPT - /* for pressure coupling, terms not related to bond order + else // NPT, iNPT or sNPT + { + /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); rvec_ScaledAdd( system->atoms[i].f, -1., temp ); @@ -535,11 +550,11 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, #ifdef TEST_ENERGY fprintf(out_control->evdw, "%6d%6d%24.15e%24.15e%24.15e\n", - workspace->orig_id[i], workspace->orig_id[j], + workspace->orig_id[i], workspace->orig_id[j], r_ij, e_vdW, data->E_vdW ); - fprintf(out_control->ecou,"%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n", + fprintf(out_control->ecou, "%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n", workspace->orig_id[i], workspace->orig_id[j], - r_ij, system->atoms[i].q, system->atoms[j].q, + r_ij, system->atoms[i].q, system->atoms[j].q, e_ele, data->E_Ele ); #endif #ifdef TEST_FORCES @@ -549,23 +564,24 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, rvec_ScaledAdd( workspace->f_ele[j], +CEclmb, nbr_pj->dvec ); #endif } + } } } #if defined(OLD) - /* Linear extrapolation */ - /*p = (r_ij * t->inv_dx; - r = (int) p; - prev = &( t->y[r] ); - next = &( t->y[r+1] ); - - tmp = p - r; - e_vdW = self_coef * (prev->e_vdW + tmp*(next->e_vdW - prev->e_vdW )); - CEvd = self_coef * (prev->CEvd + tmp*(next->CEvd - prev->CEvd )); - - e_ele = self_coef * (prev->e_ele + tmp*(next->e_ele - prev->e_ele )); - e_ele = e_ele * system->atoms[i].q * system->atoms[j].q; - CEclmb = self_coef * (prev->CEclmb+tmp*(next->CEclmb - prev->CEclmb)); - CEclmb = CEclmb * system->atoms[i].q * system->atoms[j].q;*/ +/* Linear extrapolation */ +/*p = (r_ij * t->inv_dx; + r = (int) p; + prev = &( t->y[r] ); + next = &( t->y[r+1] ); + + tmp = p - r; + e_vdW = self_coef * (prev->e_vdW + tmp*(next->e_vdW - prev->e_vdW )); + CEvd = self_coef * (prev->CEvd + tmp*(next->CEvd - prev->CEvd )); + + e_ele = self_coef * (prev->e_ele + tmp*(next->e_ele - prev->e_ele )); + e_ele = e_ele * system->atoms[i].q * system->atoms[j].q; + CEclmb = self_coef * (prev->CEclmb+tmp*(next->CEclmb - prev->CEclmb)); + CEclmb = CEclmb * system->atoms[i].q * system->atoms[j].q;*/ #endif diff --git a/PuReMD-GPU/src/vector.c b/PuReMD-GPU/src/vector.c index 7cf06eb8e6cb1560b651b8b16a091f3a387cdb6c..e396344d173a6d5343faf9d675f48a8ea4e0ca04 100644 --- a/PuReMD-GPU/src/vector.c +++ b/PuReMD-GPU/src/vector.c @@ -21,53 +21,90 @@ #include "vector.h" -int Vector_isZero( real* v, int k ) +inline int Vector_isZero( const real * const v, const unsigned int k ) { - for( --k; k>=0; --k ) - if( fabs( v[k] ) > ALMOST_ZERO ) - return 0; + unsigned int i; - return 1; + #pragma omp master + { + ret = TRUE; + } + + #pragma omp barrier + + #pragma omp for reduction(&&: ret) schedule(static) + for ( i = 0; i < k; ++i ) + { + if ( FABS( v[i] ) > ALMOST_ZERO ) + { + ret = FALSE; + } + } + + return ret; } -void Vector_MakeZero( real *v, int k ) +inline void Vector_MakeZero( real * const v, const unsigned int k ) { - for( --k; k>=0; --k ) - v[k] = 0; + unsigned int i; + + #pragma omp for schedule(static) + for ( i = 0; i < k; ++i ) + { + v[i] = ZERO; + } } -void Vector_Copy( real* dest, real* v, int k ) +inline void Vector_Copy( real * const dest, const real * const v, const unsigned int k ) { - for( --k; k>=0; --k ) - dest[k] = v[k]; + unsigned int i; + + #pragma omp for schedule(static) + for ( i = 0; i < k; ++i ) + { + dest[i] = v[i]; + } } -void Vector_Print( FILE *fout, char *vname, real *v, int k ) +void Vector_Print( FILE * const fout, const char * const vname, const real * const v, + const unsigned int k ) { - int i; + unsigned int i; fprintf( fout, "%s:\n", vname ); - for( i = 0; i < k; ++i ) + for ( i = 0; i < k; ++i ) + { fprintf( fout, "%24.15e\n", v[i] ); + } fprintf( fout, "\n" ); } -real Norm( real* v1, int k ) +inline real Norm( const real * const v1, const unsigned int k ) { - real ret = 0; + unsigned int i; + + #pragma omp master + { + ret2 = ZERO; + } - for( --k; k>=0; --k ) - ret += SQR( v1[k] ); + #pragma omp barrier + + #pragma omp for reduction(+: ret2) schedule(static) + for ( i = 0; i < k; ++i ) + { + ret2 += SQR( v1[i] ); + } - return SQRT( ret ); + return SQRT( ret2 ); } -void rvec_Sum( rvec ret, rvec v1 ,rvec v2 ) +inline void rvec_Sum( rvec ret, const rvec v1 , const rvec v2 ) { ret[0] = v1[0] + v2[0]; ret[1] = v1[1] + v2[1]; @@ -75,13 +112,14 @@ void rvec_Sum( rvec ret, rvec v1 ,rvec v2 ) } -real rvec_ScaledDot( real c1, rvec v1, real c2, rvec v2 ) +inline real rvec_ScaledDot( const real c1, const rvec v1, + const real c2, const rvec v2 ) { - return (c1*c2) * (v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2]); + return (c1 * c2) * (v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]); } -void rvec_Multiply( rvec r, rvec v1, rvec v2 ) +inline void rvec_Multiply( rvec r, const rvec v1, const rvec v2 ) { r[0] = v1[0] * v2[0]; r[1] = v1[1] * v2[1]; @@ -89,7 +127,7 @@ void rvec_Multiply( rvec r, rvec v1, rvec v2 ) } -void rvec_Divide( rvec r, rvec v1, rvec v2 ) +inline void rvec_Divide( rvec r, const rvec v1, const rvec v2 ) { r[0] = v1[0] / v2[0]; r[1] = v1[1] / v2[1]; @@ -97,7 +135,7 @@ void rvec_Divide( rvec r, rvec v1, rvec v2 ) } -void rvec_iDivide( rvec r, rvec v1, ivec v2 ) +inline void rvec_iDivide( rvec r, const rvec v1, const ivec v2 ) { r[0] = v1[0] / v2[0]; r[1] = v1[1] / v2[1]; @@ -105,7 +143,7 @@ void rvec_iDivide( rvec r, rvec v1, ivec v2 ) } -void rvec_Invert( rvec r, rvec v ) +inline void rvec_Invert( rvec r, const rvec v ) { r[0] = 1. / v[0]; r[1] = 1. / v[1]; @@ -113,154 +151,189 @@ void rvec_Invert( rvec r, rvec v ) } -void rvec_OuterProduct( rtensor r, rvec v1, rvec v2 ) +inline void rvec_OuterProduct( rtensor r, const rvec v1, const rvec v2 ) { - int i, j; + unsigned int i, j; - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) + for ( i = 0; i < 3; ++i ) + { + for ( j = 0; j < 3; ++j ) + { r[i][j] = v1[i] * v2[j]; + } + } } - -int rvec_isZero( rvec v ) +inline int rvec_isZero( const rvec v ) { - if( fabs(v[0]) > ALMOST_ZERO || - fabs(v[1]) > ALMOST_ZERO || + if ( fabs(v[0]) > ALMOST_ZERO || + fabs(v[1]) > ALMOST_ZERO || fabs(v[2]) > ALMOST_ZERO ) - return 0; - return 1; + { + return FALSE; + } + return TRUE; } -void rtensor_Multiply( rtensor ret, rtensor m1, rtensor m2 ) +inline void rtensor_Multiply( rtensor ret, rtensor m1, rtensor m2 ) { - int i, j, k; + unsigned int i, j, k; rtensor temp; // check if the result matrix is the same as one of m1, m2. - // if so, we cannot modify the contents of m1 or m2, so + // if so, we cannot modify the contents of m1 or m2, so // we have to use a temp matrix. - if( ret == m1 || ret == m2 ) + if ( ret == m1 || ret == m2 ) { - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) + for ( i = 0; i < 3; ++i ) + for ( j = 0; j < 3; ++j ) { - temp[i][j] = 0; - for( k = 0; k < 3; ++k ) + temp[i][j] = 0; + for ( k = 0; k < 3; ++k ) temp[i][j] += m1[i][k] * m2[k][j]; } - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) - ret[i][j] = temp[i][j]; + for ( i = 0; i < 3; ++i ) + for ( j = 0; j < 3; ++j ) + ret[i][j] = temp[i][j]; } else { - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) + for ( i = 0; i < 3; ++i ) + for ( j = 0; j < 3; ++j ) { - ret[i][j] = 0; - for( k = 0; k < 3; ++k ) + ret[i][j] = 0; + for ( k = 0; k < 3; ++k ) ret[i][j] += m1[i][k] * m2[k][j]; } } } -void rtensor_MatVec( rvec ret, rtensor m, rvec v ) +inline void rtensor_MatVec( rvec ret, rtensor m, const rvec v ) { - int i; + unsigned int i; rvec temp; - // if ret is the same vector as v, we cannot modify the + // if ret is the same vector as v, we cannot modify the // contents of v until all computation is finished. - if( ret == v ) + if ( ret == v ) { - for( i = 0; i < 3; ++i ) + for ( i = 0; i < 3; ++i ) + { temp[i] = m[i][0] * v[0] + m[i][1] * v[1] + m[i][2] * v[2]; + } - for( i = 0; i < 3; ++i ) + for ( i = 0; i < 3; ++i ) + { ret[i] = temp[i]; + } } else { - for( i = 0; i < 3; ++i ) + for ( i = 0; i < 3; ++i ) + { ret[i] = m[i][0] * v[0] + m[i][1] * v[1] + m[i][2] * v[2]; + } } } -void rtensor_Scale( rtensor ret, real c, rtensor m ) +inline void rtensor_Scale( rtensor ret, const real c, rtensor m ) { - int i, j; + unsigned int i, j; - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) + for ( i = 0; i < 3; ++i ) + { + for ( j = 0; j < 3; ++j ) + { ret[i][j] = c * m[i][j]; + } + } } -void rtensor_Add( rtensor ret, rtensor t ) +inline void rtensor_Add( rtensor ret, rtensor t ) { int i, j; - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) + for ( i = 0; i < 3; ++i ) + { + for ( j = 0; j < 3; ++j ) + { ret[i][j] += t[i][j]; + } + } } -void rtensor_ScaledAdd( rtensor ret, real c, rtensor t ) +inline void rtensor_ScaledAdd( rtensor ret, const real c, rtensor t ) { - int i, j; + unsigned int i, j; - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) + for ( i = 0; i < 3; ++i ) + { + for ( j = 0; j < 3; ++j ) + { ret[i][j] += c * t[i][j]; + } + } } -void rtensor_Sum( rtensor ret, rtensor t1, rtensor t2 ) +inline void rtensor_Sum( rtensor ret, rtensor t1, rtensor t2 ) { - int i, j; + unsigned int i, j; - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) + for ( i = 0; i < 3; ++i ) + { + for ( j = 0; j < 3; ++j ) + { ret[i][j] = t1[i][j] + t2[i][j]; + } + } } -void rtensor_ScaledSum( rtensor ret, real c1, rtensor t1, - real c2, rtensor t2 ) +inline void rtensor_ScaledSum( rtensor ret, const real c1, rtensor t1, + const real c2, rtensor t2 ) { - int i, j; + unsigned int i, j; - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) + for ( i = 0; i < 3; ++i ) + { + for ( j = 0; j < 3; ++j ) + { ret[i][j] = c1 * t1[i][j] + c2 * t2[i][j]; + } + } } -void rtensor_Copy( rtensor ret, rtensor t ) +inline void rtensor_Copy( rtensor ret, rtensor t ) { - int i, j; + unsigned int i, j; - for( i = 0; i < 3; ++i ) - for( j = 0; j < 3; ++j ) + for ( i = 0; i < 3; ++i ) + { + for ( j = 0; j < 3; ++j ) + { ret[i][j] = t[i][j]; + } + } } -void rtensor_Identity( rtensor t ) +inline void rtensor_Identity( rtensor t ) { t[0][0] = t[1][1] = t[2][2] = 1; t[0][1] = t[0][2] = t[1][0] = t[1][2] = t[2][0] = t[2][1] = ZERO; } -void rtensor_MakeZero( rtensor t ) +inline void rtensor_MakeZero( rtensor t ) { t[0][0] = t[0][1] = t[0][2] = ZERO; t[1][0] = t[1][1] = t[1][2] = ZERO; @@ -268,50 +341,58 @@ void rtensor_MakeZero( rtensor t ) } -void rtensor_Transpose( rtensor ret, rtensor t ) +inline void rtensor_Transpose( rtensor ret, rtensor t ) { - ret[0][0] = t[0][0], ret[1][1] = t[1][1], ret[2][2] = t[2][2]; - ret[0][1] = t[1][0], ret[0][2] = t[2][0]; - ret[1][0] = t[0][1], ret[1][2] = t[2][1]; - ret[2][0] = t[0][2], ret[2][1] = t[1][2]; + ret[0][0] = t[0][0]; + ret[1][1] = t[1][1]; + ret[2][2] = t[2][2]; + + ret[0][1] = t[1][0]; + ret[0][2] = t[2][0]; + + ret[1][0] = t[0][1]; + ret[1][2] = t[2][1]; + + ret[2][0] = t[0][2]; + ret[2][1] = t[1][2]; } -real rtensor_Det( rtensor t ) +inline real rtensor_Det( rtensor t ) { return ( t[0][0] * (t[1][1] * t[2][2] - t[1][2] * t[2][1] ) + - t[0][1] * (t[1][2] * t[2][0] - t[1][0] * t[2][2] ) + - t[0][2] * (t[1][0] * t[2][1] - t[1][1] * t[2][0] ) ); + t[0][1] * (t[1][2] * t[2][0] - t[1][0] * t[2][2] ) + + t[0][2] * (t[1][0] * t[2][1] - t[1][1] * t[2][0] ) ); } -real rtensor_Trace( rtensor t ) +inline real rtensor_Trace( rtensor t ) { return (t[0][0] + t[1][1] + t[2][2]); } -void Print_rTensor(FILE* fp, rtensor t) +void Print_rTensor(FILE * const fp, rtensor t) { - int i, j; + unsigned int i, j; - for (i=0; i < 3; i++) + for (i = 0; i < 3; i++) { - fprintf(fp,"["); - for (j=0; j < 3; j++) - fprintf(fp,"%8.3f,\t",t[i][j]); - fprintf(fp,"]\n"); + fprintf(fp, "["); + for (j = 0; j < 3; j++) + fprintf(fp, "%8.3f,\t", t[i][j]); + fprintf(fp, "]\n"); } } -void ivec_MakeZero( ivec v ) +inline void ivec_MakeZero( ivec v ) { v[0] = v[1] = v[2] = 0; } -void ivec_rScale( ivec dest, real C, rvec src ) +inline void ivec_rScale( ivec dest, const real C, const rvec src ) { dest[0] = (int)(C * src[0]); dest[1] = (int)(C * src[1]); @@ -319,20 +400,22 @@ void ivec_rScale( ivec dest, real C, rvec src ) } -int ivec_isZero( ivec v ) +inline int ivec_isZero( const ivec v ) { - if( v[0]==0 && v[1]==0 && v[2]==0 ) - return 1; - return 0; + if ( v[0] == 0 && v[1] == 0 && v[2] == 0 ) + { + return TRUE; + } + return FALSE; } -int ivec_isEqual( ivec v1, ivec v2 ) +inline int ivec_isEqual( const ivec v1, const ivec v2 ) { - if( v1[0]==v2[0] && v1[1]==v2[1] && v1[2]==v2[2] ) - return 1; + if ( v1[0] == v2[0] && v1[1] == v2[1] && v1[2] == v2[2] ) + { + return TRUE; + } - return 0; + return FALSE; } - - diff --git a/PuReMD-GPU/src/vector.h b/PuReMD-GPU/src/vector.h index e1111e514928e79fc79197a0f2486d5eefb1cfa3..79748544fb8349bb797efe246e3430561fefef14 100644 --- a/PuReMD-GPU/src/vector.h +++ b/PuReMD-GPU/src/vector.h @@ -26,72 +26,85 @@ #include "random.h" +/* global to make OpenMP shared (Vector_isZero) */ +unsigned int ret; +/* global to make OpenMP shared (Dot, Norm) */ +real ret2; + + #ifdef __cplusplus extern "C" { #endif -int Vector_isZero( real*, int ); -void Vector_MakeZero( real*, int ); -void Vector_Copy( real*, real*, int ); -//void Vector_Scale( real*, real, real*, int ); -//void Vector_Sum( real*, real, real*, real, real*, int ); -//void Vector_Add( real*, real, real*, int ); -void Vector_Print( FILE*, char*, real*, int ); -real Norm( real*, int ); - -void rvec_Sum( rvec, rvec, rvec ); -real rvec_ScaledDot( real, rvec, real, rvec ); -void rvec_Multiply( rvec, rvec, rvec ); -void rvec_Divide( rvec, rvec, rvec ); -void rvec_iDivide( rvec, rvec, ivec ); -void rvec_Invert( rvec, rvec ); -void rvec_OuterProduct( rtensor, rvec, rvec ); -int rvec_isZero( rvec ); +int Vector_isZero( const real * const, const unsigned int ); +void Vector_MakeZero( real * const, const unsigned int ); +void Vector_Copy( real * const, const real * const, const unsigned int ); +void Vector_Print( FILE * const, const char * const, const real * const, const unsigned int ); +real Norm( const real * const, const unsigned int ); + +void rvec_Sum( rvec, const rvec, const rvec ); +real rvec_ScaledDot( const real, const rvec, const real, const rvec ); +void rvec_Multiply( rvec, const rvec, const rvec ); +void rvec_Divide( rvec, const rvec, const rvec ); +void rvec_iDivide( rvec, const rvec, const ivec ); +void rvec_Invert( rvec, const rvec ); +void rvec_OuterProduct( rtensor, const rvec, const rvec ); +int rvec_isZero( const rvec ); void rtensor_MakeZero( rtensor ); void rtensor_Multiply( rtensor, rtensor, rtensor ); -void rtensor_MatVec( rvec, rtensor, rvec ); -void rtensor_Scale( rtensor, real, rtensor ); +void rtensor_MatVec( rvec, rtensor, const rvec ); +void rtensor_Scale( rtensor, const real, rtensor ); void rtensor_Add( rtensor, rtensor ); -void rtensor_ScaledAdd( rtensor, real, rtensor ); +void rtensor_ScaledAdd( rtensor, const real, rtensor ); void rtensor_Sum( rtensor, rtensor, rtensor ); -void rtensor_ScaledSum( rtensor, real, rtensor, real, rtensor ); -void rtensor_Scale( rtensor, real, rtensor ); +void rtensor_ScaledSum( rtensor, const real, rtensor, const real, rtensor ); +void rtensor_Scale( rtensor, const real, rtensor ); void rtensor_Copy( rtensor, rtensor ); void rtensor_Identity( rtensor ); void rtensor_Transpose( rtensor, rtensor ); real rtensor_Det( rtensor ); real rtensor_Trace( rtensor ); -void Print_rTensor(FILE*, rtensor); +void Print_rTensor(FILE * const, rtensor); -int ivec_isZero( ivec ); -int ivec_isEqual( ivec, ivec ); +int ivec_isZero( const ivec ); +int ivec_isEqual( const ivec, const ivec ); void ivec_MakeZero( ivec ); -void ivec_rScale( ivec, real, rvec ); +void ivec_rScale( ivec, const real, const rvec ); -static inline HOST_DEVICE real Dot( real* v1, real* v2, int k ) +static inline HOST_DEVICE real Dot( const real * const v1, const real * const v2, const unsigned int k ) { - real ret = 0; + unsigned int i; + + #pragma omp master + { + ret2 = ZERO; + } + + #pragma omp barrier - for ( --k; k >= 0; --k ) - ret += v1[k] * v2[k]; - return ret; + #pragma omp for reduction(+: ret2) schedule(static) + for ( i = 0; i < k; ++i ) + { + ret2 += v1[i] * v2[i]; + } + + return ret2; } -///////////////////////////// -//rvec functions -///////////////////////////// static inline HOST_DEVICE void rvec_MakeZero( rvec v ) { - v[0] = v[1] = v[2] = ZERO; + v[0] = ZERO; + v[1] = ZERO; + v[2] = ZERO; } -static inline HOST_DEVICE void rvec_Add( rvec ret, rvec v ) +static inline HOST_DEVICE void rvec_Add( rvec ret, const rvec v ) { ret[0] += v[0]; ret[1] += v[1]; @@ -99,13 +112,15 @@ static inline HOST_DEVICE void rvec_Add( rvec ret, rvec v ) } -static inline HOST_DEVICE void rvec_Copy( rvec dest, rvec src ) +static inline HOST_DEVICE void rvec_Copy( rvec dest, const rvec src ) { - dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; + dest[0] = src[0]; + dest[1] = src[1]; + dest[2] = src[2]; } -static inline HOST_DEVICE void rvec_Cross( rvec ret, rvec v1, rvec v2 ) +static inline HOST_DEVICE void rvec_Cross( rvec ret, const rvec v1, const rvec v2 ) { ret[0] = v1[1] * v2[2] - v1[2] * v2[1]; ret[1] = v1[2] * v2[0] - v1[0] * v2[2]; @@ -113,13 +128,16 @@ static inline HOST_DEVICE void rvec_Cross( rvec ret, rvec v1, rvec v2 ) } -static inline HOST_DEVICE void rvec_ScaledAdd( rvec ret, real c, rvec v ) +static inline HOST_DEVICE void rvec_ScaledAdd( rvec ret, const real c, const rvec v ) { - ret[0] += c * v[0], ret[1] += c * v[1], ret[2] += c * v[2]; + ret[0] += c * v[0]; + ret[1] += c * v[1]; + ret[2] += c * v[2]; } -static inline HOST_DEVICE void rvec_ScaledSum( rvec ret, real c1, rvec v1 , real c2, rvec v2 ) +static inline HOST_DEVICE void rvec_ScaledSum( rvec ret, const real c1, const rvec v1, + const real c2, const rvec v2 ) { ret[0] = c1 * v1[0] + c2 * v2[0]; ret[1] = c1 * v1[1] + c2 * v2[1]; @@ -135,25 +153,27 @@ static inline HOST_DEVICE void rvec_Random( rvec v ) } -static inline HOST_DEVICE real rvec_Norm_Sqr( rvec v ) +static inline HOST_DEVICE real rvec_Norm_Sqr( const rvec v ) { return SQR(v[0]) + SQR(v[1]) + SQR(v[2]); } -static inline HOST_DEVICE void rvec_Scale( rvec ret, real c, rvec v ) +static inline HOST_DEVICE void rvec_Scale( rvec ret, const real c, const rvec v ) { - ret[0] = c * v[0], ret[1] = c * v[1], ret[2] = c * v[2]; + ret[0] = c * v[0]; + ret[1] = c * v[1]; + ret[2] = c * v[2]; } -static inline HOST_DEVICE real rvec_Dot( rvec v1, rvec v2 ) +static inline HOST_DEVICE real rvec_Dot( const rvec v1, const rvec v2 ) { return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]; } -static inline HOST_DEVICE void rvec_iMultiply( rvec r, ivec v1, rvec v2 ) +static inline HOST_DEVICE void rvec_iMultiply( rvec r, const ivec v1, const rvec v2 ) { r[0] = v1[0] * v2[0]; r[1] = v1[1] * v2[1]; @@ -161,22 +181,21 @@ static inline HOST_DEVICE void rvec_iMultiply( rvec r, ivec v1, rvec v2 ) } -static inline HOST_DEVICE real rvec_Norm( rvec v ) +static inline HOST_DEVICE real rvec_Norm( const rvec v ) { return SQRT( SQR(v[0]) + SQR(v[1]) + SQR(v[2]) ); } -///////////////// -//ivec functions -///////////////// -static inline HOST_DEVICE void ivec_Copy( ivec dest , ivec src ) +static inline HOST_DEVICE void ivec_Copy( ivec dest , const ivec src ) { - dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; + dest[0] = src[0]; + dest[1] = src[1]; + dest[2] = src[2]; } -static inline HOST_DEVICE void ivec_Scale( ivec dest, real C, ivec src ) +static inline HOST_DEVICE void ivec_Scale( ivec dest, const real C, const ivec src ) { dest[0] = C * src[0]; dest[1] = C * src[1]; @@ -184,7 +203,7 @@ static inline HOST_DEVICE void ivec_Scale( ivec dest, real C, ivec src ) } -static inline HOST_DEVICE void ivec_Sum( ivec dest, ivec v1, ivec v2 ) +static inline HOST_DEVICE void ivec_Sum( ivec dest, const ivec v1, const ivec v2 ) { dest[0] = v1[0] + v2[0]; dest[1] = v1[1] + v2[1]; @@ -192,27 +211,43 @@ static inline HOST_DEVICE void ivec_Sum( ivec dest, ivec v1, ivec v2 ) } -///////////////// -//vector functions -///////////////// -static inline HOST_DEVICE void Vector_Sum( real* dest, real c, real* v, real d, real* y, int k ) +static inline HOST_DEVICE void Vector_Sum( real * const dest, const real c, + const real * const v, const real d, const real * const y, + const unsigned int k ) { - for (k--; k >= 0; k--) - dest[k] = c * v[k] + d * y[k]; + unsigned int i; + + #pragma omp for schedule(static) + for ( i = 0; i < k; ++i ) + { + dest[i] = c * v[i] + d * y[i]; + } } -static inline HOST_DEVICE void Vector_Scale( real* dest, real c, real* v, int k ) +static inline HOST_DEVICE void Vector_Scale( real * const dest, const real c, + const real * const v, const unsigned int k ) { - for (k--; k >= 0; k--) - dest[k] = c * v[k]; + unsigned int i; + + #pragma omp for schedule(static) + for ( i = 0; i < k; ++i ) + { + dest[i] = c * v[i]; + } } -static inline HOST_DEVICE void Vector_Add( real* dest, real c, real* v, int k ) +static inline HOST_DEVICE void Vector_Add( real * const dest, const real c, + const real * const v, const unsigned int k ) { - for (k--; k >= 0; k--) - dest[k] += c * v[k]; + unsigned int i; + + #pragma omp for schedule(static) + for ( i = 0; i < k; ++i ) + { + dest[i] += c * v[i]; + } } #ifdef __cplusplus diff --git a/README.md b/README.md index 442146a67a8a5c20732075c6daf7c541aa72e5f6..abdadbab72c5d297de9a8559471a036ee229c001 100644 --- a/README.md +++ b/README.md @@ -7,5 +7,5 @@ Files from the [Purdue Reactive Molecular Dynamics](https://www.cs.purdue.edu/pu Roughly by target platform - [Serial](https://www.cs.purdue.edu/puremd/docs/80859.pdf) - [MPI (message passing interface)](https://www.cs.purdue.edu/puremd/docs/Parallel-Reactive-Molecular-Dynamics.pdf) -- [CUDA (single GPU)](http://dx.doi.org/10.1016/j.jcp.2014.04.035) (single GPU) +- [CUDA (single GPU)](http://dx.doi.org/10.1016/j.jcp.2014.04.035) - [CUDA+MPI (multi-GPU)](https://www.cs.purdue.edu/puremd/docs/pgpuremd.pdf) diff --git a/configure.ac b/configure.ac index 2488af52296535e6ec37ca3c3d687e7f905d78a7..659ab4575e6565b17a6624d586145077ee98a47a 100644 --- a/configure.ac +++ b/configure.ac @@ -46,23 +46,25 @@ AC_ARG_ENABLE([mpi-gpu], [enable MPI+CUDA (multi GPU) support @<:@default: no@:>@])], [pack_mpi_gpu_enabled=${enableval}], [pack_mpi_gpu_enabled=no]) -if test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = "xyes"; then - AC_CONFIG_SUBDIRS([sPuReMD]) +if test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = "xyes" || test "x${pack_gpu_enabled}" = "xyes"; then + AC_CONFIG_SUBDIRS([PuReMD-GPU]) if test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" != "xyes"; then export BUILD_OPENMP="no" else - export BUILD_OPENMP="yes" + if test "x${pack_gpu_enabled}" = "xyes"; then + export BUILD_GPU="yes" + else + export BUILD_OPENMP="yes" + fi fi fi AM_CONDITIONAL([BUILD_S_OMP], [test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = "xyes"]) +AM_CONDITIONAL([BUILD_GPU], [test "x${pack_gpu_enabled}" = "xyes"]) + if test "x${pack_mpi_enabled}" = "xyes"; then AC_CONFIG_SUBDIRS([PuReMD]) fi AM_CONDITIONAL([BUILD_MPI], [test "x${pack_mpi_enabled}" = "xyes"]) -if test "x${pack_gpu_enabled}" = "xyes"; then - AC_CONFIG_SUBDIRS([PuReMD-GPU]) -fi -AM_CONDITIONAL([BUILD_GPU], [test "x${pack_gpu_enabled}" = "xyes"]) if test "x${pack_mpi_not_gpu_enabled}" = "xyes" || test "x${pack_mpi_gpu_enabled}" = "xyes"; then AC_CONFIG_SUBDIRS([PG-PuReMD]) if test "x${pack_mpi_not_gpu_enabled}" = "xyes" || test "x${pack_mpi_gpu_enabled}" != "xyes"; then diff --git a/sPuReMD/Makefile.am b/sPuReMD/Makefile.am index 7c986471c7759f44ea3cd9aea126c246929f1647..12cffee9894677d37875b1d6dc9576d723447f21 100644 --- a/sPuReMD/Makefile.am +++ b/sPuReMD/Makefile.am @@ -4,7 +4,7 @@ bin_PROGRAMS = bin/spuremd bin_spuremd_SOURCES = src/ffield.c src/grid.c src/list.c src/lookup.c src/print_utils.c \ src/reset_utils.c src/restart.c src/random.c src/tool_box.c src/traj.c \ src/vector.c src/allocate.c src/analyze.c src/box.c src/system_props.c src/control.c \ - src/geo_tools.c src/neighbors.c src/lin_alg.c src/QEq.c src/bond_orders.c \ + src/geo_tools.c src/neighbors.c src/lin_alg.c src/qeq.c src/bond_orders.c \ src/single_body_interactions.c src/two_body_interactions.c \ src/three_body_interactions.c src/four_body_interactions.c src/forces.c \ src/integrate.c src/init_md.c src/testmd.c @@ -12,7 +12,7 @@ bin_spuremd_SOURCES = src/ffield.c src/grid.c src/list.c src/lookup.c src/print_ include_HEADERS = src/mytypes.h src/ffield.h src/grid.h src/list.h src/lookup.h src/print_utils.h \ src/reset_utils.h src/restart.h src/random.h src/tool_box.h src/traj.h \ src/vector.h src/allocate.h src/analyze.h src/box.h src/system_props.h src/control.h \ - src/geo_tools.h src/neighbors.h src/lin_alg.h src/QEq.h src/bond_orders.h \ + src/geo_tools.h src/neighbors.h src/lin_alg.h src/qeq.h src/bond_orders.h \ src/single_body_interactions.h src/two_body_interactions.h \ src/three_body_interactions.h src/four_body_interactions.h src/forces.h \ src/integrate.h src/init_md.h diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c index 9108a8dd4026e001716f707302e40d671d48b838..5a0585685b7af523c9b129d6ec57cd3589675edc 100644 --- a/sPuReMD/src/forces.c +++ b/sPuReMD/src/forces.c @@ -20,6 +20,7 @@ ----------------------------------------------------------------------*/ #include "forces.h" + #include "box.h" #include "bond_orders.h" #include "single_body_interactions.h" @@ -29,7 +30,7 @@ #include "list.h" #include "print_utils.h" #include "system_props.h" -#include "QEq.h" +#include "qeq.h" #include "vector.h" diff --git a/sPuReMD/src/integrate.c b/sPuReMD/src/integrate.c index 142863cad18291a04f4f56fc364c73cf8fdabd3b..f9768c49bc89915346645f6134e9e346c845c2d6 100644 --- a/sPuReMD/src/integrate.c +++ b/sPuReMD/src/integrate.c @@ -20,13 +20,14 @@ ----------------------------------------------------------------------*/ #include "integrate.h" + #include "allocate.h" #include "box.h" #include "forces.h" #include "grid.h" #include "neighbors.h" #include "print_utils.h" -#include "QEq.h" +#include "qeq.h" #include "reset_utils.h" #include "restart.h" #include "system_props.h" @@ -34,7 +35,6 @@ #include "list.h" - void Velocity_Verlet_NVE(reax_system* system, control_params* control, simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control ) diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h index fe2d644cae630be6414944989b52adae8a6e1d61..e5a468ffb1669e53e638972b82daad9f7fcc07d0 100644 --- a/sPuReMD/src/lin_alg.h +++ b/sPuReMD/src/lin_alg.h @@ -19,8 +19,8 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#ifndef __GMRES_H_ -#define __GMRES_H_ +#ifndef __LIN_ALG_H_ +#define __LIN_ALG_H_ #include "mytypes.h" diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index 69441bb72b7499b663a506dc8c95d40e56473a78..9d741f256799b99dc012f44a30070d3893546782 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -37,7 +37,7 @@ #include "zlib.h" #ifdef _OPENMP -#include <omp.h> + #include <omp.h> #endif //#define DEBUG_FOCUS @@ -51,6 +51,7 @@ #define TRUE 1 #define FALSE 0 +#define LOG log #define EXP exp #define SQRT sqrt #define POW pow @@ -71,10 +72,10 @@ /* NaN IEEE 754 representation for C99 in math.h * Note: function choice must match REAL typedef below */ #ifdef NAN -#define IS_NAN_REAL(a) (isnan(a)) + #define IS_NAN_REAL(a) (isnan(a)) #else -#warn "No support for NaN" -#define NAN_REAL(a) (0) + #warn "No support for NaN" + #define NAN_REAL(a) (0) #endif #define PI 3.14159265 @@ -948,20 +949,21 @@ typedef struct typedef void (*interaction_function)(reax_system*, control_params*, - simulation_data*, static_storage*, - list**, output_controls*); + simulation_data*, static_storage*, list**, output_controls*); + interaction_function Interaction_Functions[NO_OF_INTERACTIONS]; typedef void (*evolve_function)(reax_system*, control_params*, - simulation_data*, static_storage*, - list**, output_controls*); + simulation_data*, static_storage*, + list**, output_controls*); typedef real (*lookup_function)(real); + lookup_table Exp, Sqrt, Cube_Root, Four_Third_Root, Cos, Sin, ACos; LR_lookup_table **LR; - typedef void (*get_far_neighbors_function)(rvec, rvec, simulation_box*, - control_params*, far_neighbor_data*, - int*); + control_params*, far_neighbor_data*, int*); + + #endif diff --git a/sPuReMD/src/QEq.c b/sPuReMD/src/qeq.c similarity index 99% rename from sPuReMD/src/QEq.c rename to sPuReMD/src/qeq.c index 026a3ae1a84e4912958737161818aea77080da44..be99a1fd42194fc18b467a913f7a74485c125bec 100644 --- a/sPuReMD/src/QEq.c +++ b/sPuReMD/src/qeq.c @@ -19,7 +19,7 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include "QEq.h" +#include "qeq.h" #include "allocate.h" #include "list.h" diff --git a/sPuReMD/src/QEq.h b/sPuReMD/src/qeq.h similarity index 100% rename from sPuReMD/src/QEq.h rename to sPuReMD/src/qeq.h diff --git a/sPuReMD/src/random.c b/sPuReMD/src/random.c index f3a5096c65485111fb5cba7321976518a2e42972..9b09e7526b7a8418470cbf8c1b45bd1940dcbfa9 100644 --- a/sPuReMD/src/random.c +++ b/sPuReMD/src/random.c @@ -19,7 +19,8 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include <random.h> +#include "random.h" + /* System random number generator used linear congruance method with large periodicity for generation of pseudo random number. function @@ -53,5 +54,5 @@ double GRandom(double mean, double sigma) rsq = v1 * v1 + v2 * v2; } - return mean + v1 * sigma * sqrt(-2.0 * log(rsq) / rsq); + return mean + v1 * sigma * SQRT(-2.0 * LOG(rsq) / rsq); }