From 044bc618be0eb70c68e659dd8a1feeec1e92a859 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnk@msu.edu> Date: Sun, 26 Aug 2018 15:02:57 -0400 Subject: [PATCH] sPuReMD: remove defunct SuperLU code. Add boilerplate code for ILUT. Other cleanup work. --- configure.ac | 10 - environ/param.gpu.water | 14 +- sPuReMD/configure.ac | 41 - sPuReMD/src/; | 1391 +++++++++++++++++++++++++++ sPuReMD/src/box.c | 2 +- sPuReMD/src/box.h | 2 +- sPuReMD/src/charges.c | 415 +++----- sPuReMD/src/init_md.c | 354 +++---- sPuReMD/src/integrate.c | 275 +++--- sPuReMD/src/integrate.h | 17 +- sPuReMD/src/lin_alg.c | 751 ++++----------- sPuReMD/src/lin_alg.h | 17 +- sPuReMD/src/neighbors.c | 2 +- sPuReMD/src/print_utils.c | 2 +- sPuReMD/src/reax_types.h | 24 +- sPuReMD/src/traj.c | 2 +- sPuReMD/src/two_body_interactions.c | 5 +- 17 files changed, 2057 insertions(+), 1267 deletions(-) create mode 100644 sPuReMD/src/; diff --git a/configure.ac b/configure.ac index 95a04436..4be1f086 100644 --- a/configure.ac +++ b/configure.ac @@ -123,16 +123,6 @@ then export BUILD_TIMING="yes" fi -AC_ARG_WITH([superlu-mt], - [AS_HELP_STRING([--with-superlu-mt], - [enable usage of SuperLU MT for preconditioning @<:@default: no@:>@])], - [package_superlu_mt=${withval}], [package_superlu_mt=no]) - -if test "x${package_superlu_mt}" != "xno" -then - export BUILD_SUPERLU_MT="${package_superlu_mt}" -fi - AC_ARG_ENABLE([doc], [AS_HELP_STRING([--enable-doc], [enable documentation generation @<:@default: no@:>@])], diff --git a/environ/param.gpu.water b/environ/param.gpu.water index aff71210..c43c69cb 100644 --- a/environ/param.gpu.water +++ b/environ/param.gpu.water @@ -1,5 +1,5 @@ simulation_name water_6540_notab_qeq ! output files will carry this name + their specific extension -ensemble_type 0 ! 0: NVE, 1: NVT, 2: anisotropic NPT, 3: semi-isotropic NPT, 4: isotropic NPT 6: berendsen NVT +ensemble_type 0 ! 0: NVE, 1: Berendsen NVT, 2: nose-Hoover NVT, 3: semi-isotropic NPT, 4: isotropic NPT, 5: anisotropic NPT nsteps 100 ! number of simulation steps dt 0.25 ! time step in fs periodic_boundaries 1 ! 0: no periodic boundaries, 1: periodic boundaries @@ -17,20 +17,20 @@ hbond_cutoff 7.50 ! cutoff distance for hydrogen b charge_method 0 ! charge method: 0 = QEq, 1 = EEM, 2 = ACKS2 cm_q_net 0.0 ! net system charge -cm_solver_type 0 ! iterative linear solver for charge method: 0 = GMRES, 1 = GMRES_H, 2 = CG, 3 = SDM +cm_solver_type 0 ! iterative linear solver for charge method: 0 = GMRES(k), 1 = GMRES_H(k), 2 = CG, 3 = SDM cm_solver_max_iters 20 ! max solver iterations -cm_solver_restart 100 ! inner iterations of GMRES before restarting +cm_solver_restart 100 ! inner iterations of before restarting (GMRES(k)/GMRES_H(k)) cm_solver_q_err 1e-6 ! relative residual norm threshold used in solver cm_domain_sparsity 1.0 ! scalar for scaling cut-off distance, used to sparsify charge matrix (between 0.0 and 1.0) cm_init_guess_extrap1 3 ! order of spline extrapolation for initial guess (s) cm_init_guess_extrap2 2 ! order of spline extrapolation for initial guess (t) cm_solver_pre_comp_type 1 ! method used to compute preconditioner, if applicable cm_solver_pre_comp_refactor 1000 ! number of steps before recomputing preconditioner -cm_solver_pre_comp_droptol 0.0 ! threshold tolerance for dropping values in preconditioner computation, if applicable -cm_solver_pre_comp_sweeps 3 ! number of sweeps used to compute preconditioner (ILU_PAR) +cm_solver_pre_comp_droptol 0.0 ! threshold tolerance for dropping values in preconditioner computation (ICHOLT/ILUT/FG-ILUT) +cm_solver_pre_comp_sweeps 3 ! number of sweeps used to compute preconditioner (FG-ILUT) cm_solver_pre_comp_sai_thres 0.1 ! ratio of charge matrix NNZ's used to compute preconditioner (SAI) -cm_solver_pre_app_type 1 ! method used to apply preconditioner -cm_solver_pre_app_jacobi_iters 50 ! number of Jacobi iterations used for applying precondition, if applicable +cm_solver_pre_app_type 1 ! method used to apply preconditioner (ICHOLT/ILUT/FG-ILUT) +cm_solver_pre_app_jacobi_iters 50 ! num. Jacobi iterations used for applying precondition (ICHOLT/ILUT/FG-ILUT) temp_init 0.0 ! desired initial temperature of the simulated system temp_final 300.0 ! desired final temperature of the simulated system diff --git a/sPuReMD/configure.ac b/sPuReMD/configure.ac index 522a1075..093a415a 100644 --- a/sPuReMD/configure.ac +++ b/sPuReMD/configure.ac @@ -80,47 +80,6 @@ if test "x${BUILD_OPENMP}" = "xyes"; then fi fi -if test "x$BUILD_SUPERLU_MT" != "x" -then - CPPFLAGS="${CPPFLAGS} -I${BUILD_SUPERLU_MT}/include" - LDFLAGS="${LDFLAGS} -L${BUILD_SUPERLU_MT}/lib" - #TODO: implement better BLAS detection - LIBS="${LIBS} -lblas" -# BLAS_FOUND_LIBS="yes" -# AC_SEARCH_LIBS([dtrsv_], [blas blas_OPENMP], -# [], [BLAS_FOUND_LIBS="no"], []) -# AS_IF([test "x${BLAS_FOUND_LIBS}" != "xyes"], -# [AC_MSG_ERROR([Unable to find BLAS library.])]) - AC_CHECK_HEADERS([slu_mt_ddefs.h], [SUPERLU_MT_FOUND_HEADERS="yes"]) - AS_IF([test "x${SUPERLU_MT_FOUND_HEADERS}" != "xyes"], - [AC_MSG_ERROR([Unable to find SuperLU MT headers.])]) - SUPERLU_MT_FOUND_LIBS="yes" - #TODO: fix issue where multiple -l flags added, one for each call below - AC_SEARCH_LIBS([intMalloc], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) - AC_SEARCH_LIBS([get_perm_c], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) - AC_SEARCH_LIBS([pdgstrf_init], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp -lblas -lblas_OPENMP]) - AC_SEARCH_LIBS([pdgstrf], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp -lblas -lblas_OPENMP]) - AC_SEARCH_LIBS([pxgstrf_finalize], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp -lblas -lblas_OPENMP]) - AC_SEARCH_LIBS([StatAlloc], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) - AC_SEARCH_LIBS([StatInit], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) - AC_SEARCH_LIBS([StatFree], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) - AC_SEARCH_LIBS([Destroy_SuperNode_SCP], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) - AC_SEARCH_LIBS([Destroy_CompCol_NCP], [superlu_mt superlu_mt_OPENMP], - [], [SUPERLU_MT_FOUND_LIBS="no"], [-lgomp]) - AS_IF([test "x${SUPERLU_MT_FOUND_LIBS}" != "xyes"], - [AC_MSG_ERROR([Unable to find SuperLU MT library.])]) - AC_DEFINE([HAVE_SUPERLU_MT], [1], [Define to 1 if you have SuperLU_MT support enabled.]) -fi - if test "x$BUILD_DEBUG" != "x" then CFLAGS="${CFLAGS} ${DEBUG_FLAGS}" diff --git a/sPuReMD/src/; b/sPuReMD/src/; new file mode 100644 index 00000000..f90f7955 --- /dev/null +++ b/sPuReMD/src/; @@ -0,0 +1,1391 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "charges.h" + +#include "allocate.h" +#include "list.h" +#include "lin_alg.h" +#include "print_utils.h" +#include "tool_box.h" +#include "vector.h" + + +static void Extrapolate_Charges_QEq( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + int i; + real s_tmp, t_tmp; + + /* spline extrapolation for s & t */ + //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer +#ifdef _OPENMP + #pragma omp parallel for schedule(static) \ + default(none) private(i, s_tmp, t_tmp) +#endif + for ( i = 0; i < system->N_cm; ++i ) + { + /* no extrapolation, previous solution as initial guess */ + if ( control->cm_init_guess_extrap1 == 0 ) + { + s_tmp = workspace->s[0][i]; + } + /* linear */ + else if ( control->cm_init_guess_extrap1 == 1 ) + { + s_tmp = 2.0 * workspace->s[0][i] - workspace->s[1][i]; + } + /* quadratic */ + else if ( control->cm_init_guess_extrap1 == 2 ) + { + s_tmp = workspace->s[2][i] + 3.0 * (workspace->s[0][i]-workspace->s[1][i]); + } + /* cubic */ + else if ( control->cm_init_guess_extrap1 == 3 ) + { + s_tmp = 4.0 * (workspace->s[0][i] + workspace->s[2][i]) - + (6.0 * workspace->s[1][i] + workspace->s[3][i]); + } + /* 4th order */ + else if ( control->cm_init_guess_extrap1 == 4 ) + { + s_tmp = 5.0 * (workspace->s[0][i] - workspace->s[3][i]) + + 10.0 * (-1.0 * workspace->s[1][i] + workspace->s[2][i]) + workspace->s[4][i]; + } + else + { + s_tmp = 0.0; + } + + /* no extrapolation, previous solution as initial guess */ + if ( control->cm_init_guess_extrap2 == 0 ) + { + t_tmp = workspace->t[0][i]; + } + /* linear */ + else if ( control->cm_init_guess_extrap2 == 1 ) + { + t_tmp = 2.0 * workspace->t[0][i] - workspace->t[1][i]; + } + /* quadratic */ + else if ( control->cm_init_guess_extrap2 == 2 ) + { + t_tmp = workspace->t[2][i] + 3.0 * (workspace->t[0][i] - workspace->t[1][i]); + } + /* cubic */ + else if ( control->cm_init_guess_extrap2 == 3 ) + { + t_tmp = 4.0 * (workspace->t[0][i] + workspace->t[2][i]) - + (6.0 * workspace->t[1][i] + workspace->t[3][i]); + } + /* 4th order */ + else if ( control->cm_init_guess_extrap2 == 4 ) + { + t_tmp = 5.0 * (workspace->t[0][i] - workspace->t[3][i]) + + 10.0 * (-1.0 * workspace->t[1][i] + workspace->t[2][i]) + workspace->t[4][i]; + } + else + { + t_tmp = 0.0; + } + + workspace->s[4][i] = workspace->s[3][i]; + workspace->s[3][i] = workspace->s[2][i]; + workspace->s[2][i] = workspace->s[1][i]; + workspace->s[1][i] = workspace->s[0][i]; + workspace->s[0][i] = s_tmp; + + workspace->t[4][i] = workspace->t[3][i]; + workspace->t[3][i] = workspace->t[2][i]; + workspace->t[2][i] = workspace->t[1][i]; + workspace->t[1][i] = workspace->t[0][i]; + workspace->t[0][i] = t_tmp; + } +} + + +static void Extrapolate_Charges_EE( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + int i; + real s_tmp; + + /* spline extrapolation for s */ + //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer +#ifdef _OPENMP + #pragma omp parallel for schedule(static) \ + default(none) private(i, s_tmp) +#endif + for ( i = 0; i < system->N_cm; ++i ) + { + /* no extrapolation */ + if ( control->cm_init_guess_extrap1 == 0 ) + { + s_tmp = workspace->s[0][i]; + } + /* linear */ + else if ( control->cm_init_guess_extrap1 == 1 ) + { + s_tmp = 2.0 * workspace->s[0][i] - workspace->s[1][i]; + } + /* quadratic */ + else if ( control->cm_init_guess_extrap1 == 2 ) + { + s_tmp = workspace->s[2][i] + 3.0 * (workspace->s[0][i]-workspace->s[1][i]); + } + /* cubic */ + else if ( control->cm_init_guess_extrap1 == 3 ) + { + s_tmp = 4.0 * (workspace->s[0][i] + workspace->s[2][i]) - + (6.0 * workspace->s[1][i] + workspace->s[3][i] ); + } + /* 4th order */ + else if ( control->cm_init_guess_extrap1 == 4 ) + { + s_tmp = 5.0 * (workspace->s[0][i] - workspace->s[3][i]) + + 10.0 * (-workspace->s[1][i] + workspace->s[2][i] ) + workspace->s[4][i]; + } + else + { + s_tmp = 0.0; + } + + workspace->s[4][i] = workspace->s[3][i]; + workspace->s[3][i] = workspace->s[2][i]; + workspace->s[2][i] = workspace->s[1][i]; + workspace->s[1][i] = workspace->s[0][i]; + workspace->s[0][i] = s_tmp; + } +} + + +/* Compute preconditioner for QEq + */ +static void Compute_Preconditioner_QEq( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + real time; + sparse_matrix *Hptr; + + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + +#if defined(TEST_MAT) + Hptr = create_test_mat( ); +#endif + + time = Get_Time( ); + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + setup_graph_coloring( control, workspace, Hptr, &workspace->H_full, &workspace->H_p ); + Sort_Matrix_Rows( workspace->H_p ); + Hptr = workspace->H_p; + } + data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); + + switch ( control->cm_solver_pre_comp_type ) + { + case NONE_PC: + break; + + case JACOBI_PC: + data->timing.cm_solver_pre_comp += + jacobi( Hptr, workspace->Hdia_inv ); + break; + + case ICHOLT_PC: + data->timing.cm_solver_pre_comp += + ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); + break; + + case ILUT_PAR_PC: + data->timing.cm_solver_pre_comp += + FG_ILUT( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, + workspace->L, workspace->U ); + break; + + case SAI_PC: +#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) + data->timing.cm_solver_pre_comp += + sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full, + &workspace->H_app_inv ); +#else + fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); + exit( INVALID_INPUT ); +#endif + break; + + default: + fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + +#if defined(DEBUG) +#define SIZE (1000) + char fname[SIZE]; + + if ( control->cm_solver_pre_comp_type != NONE_PC && + control->cm_solver_pre_comp_type != JACOBI_PC ) + { + fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); + +#if defined(DEBUG_FOCUS) + snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->L, fname, NULL ); + snprintf( fname, SIZE + 10, "%s.U%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->U, fname, NULL ); +#endif + } +#undef SIZE +#endif +} + + +/* Compute preconditioner for EE + */ +//static void Compute_Preconditioner_EE( const reax_system * const system, +// const control_params * const control, +// simulation_data * const data, static_storage * const workspace ) +//{ +// int i, top; +// static real * ones = NULL, * x = NULL, * y = NULL; +// sparse_matrix *Hptr; +// +// Hptr = workspace->H_EE; +// +//#if defined(TEST_MAT) +// Hptr = create_test_mat( ); +//#endif +// +// if ( ones == NULL ) +// { +// ones = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::ones" ); +// x = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::x" ); +// y = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::y" ); +// +// for ( i = 0; i < system->N; ++i ) +// { +// ones[i] = 1.0; +// } +// } +// +// switch ( control->cm_solver_pre_comp_type ) +// { +// case JACOBI_PC: +// data->timing.cm_solver_pre_comp += +// jacobi( Hptr, workspace->Hdia_inv ); +// break; +// +// case ICHOLT_PC: +// data->timing.cm_solver_pre_comp += +// ICHOLT( Hptr, workspace->droptol, workspace->L_EE, workspace->U_EE ); +// break; +// +// case ILUT_PAR_PC: +// data->timing.cm_solver_pre_comp += +// FG_ILUT( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, +// workspace->L_EE, workspace->U_EE ); +// break; +// +// default: +// fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); +// exit( INVALID_INPUT ); +// break; +// } +// +// if ( control->cm_solver_pre_comp_type != JACOBI_PC ) +// { +// switch ( control->cm_solver_pre_app_type ) +// { +// case TRI_SOLVE_PA: +// tri_solve( workspace->L_EE, ones, x, workspace->L_EE->n, LOWER ); +// Transpose_I( workspace->U_EE ); +// tri_solve( workspace->U_EE, ones, y, workspace->U_EE->n, LOWER ); +// Transpose_I( workspace->U_EE ); +// +// memcpy( workspace->L->start, workspace->L_EE->start, sizeof(unsigned int) * (system->N + 1) ); +// memcpy( workspace->L->j, workspace->L_EE->j, sizeof(unsigned int) * workspace->L_EE->start[workspace->L_EE->n] ); +// memcpy( workspace->L->val, workspace->L_EE->val, sizeof(real) * workspace->L_EE->start[workspace->L_EE->n] ); +// +// top = workspace->L->start[system->N]; +// for ( i = 0; i < system->N; ++i ) +// { +// workspace->L->j[top] = i; +// workspace->L->val[top] = x[i]; +// ++top; +// } +// +// workspace->L->j[top] = system->N_cm - 1; +// workspace->L->val[top] = 1.0; +// ++top; +// +// workspace->L->start[system->N_cm] = top; +// +// top = 0; +// for ( i = 0; i < system->N; ++i ) +// { +// workspace->U->start[i] = top; +// memcpy( workspace->U->j + top, workspace->U_EE->j + workspace->U_EE->start[i], +// sizeof(unsigned int) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) ); +// memcpy( workspace->U->val + top, workspace->U_EE->val + workspace->U_EE->start[i], +// sizeof(real) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) ); +// top += (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]); +// +// workspace->U->j[top] = system->N_cm - 1; +// workspace->U->val[top] = y[i]; +// ++top; +// } +// +// workspace->U->start[system->N_cm - 1] = top; +// +// workspace->U->j[top] = system->N_cm - 1; +// workspace->U->val[top] = -Dot( x, y, system->N ); +// ++top; +// +// workspace->U->start[system->N_cm] = top; +// break; +// +// case TRI_SOLVE_LEVEL_SCHED_PA: +// tri_solve_level_sched( workspace->L_EE, ones, x, workspace->L_EE->n, LOWER, TRUE ); +// Transpose_I( workspace->U_EE ); +// tri_solve_level_sched( workspace->U_EE, ones, y, workspace->U_EE->n, LOWER, TRUE ); +// Transpose_I( workspace->U_EE ); +// +// memcpy( workspace->L->start, workspace->L_EE->start, sizeof(unsigned int) * (system->N + 1) ); +// memcpy( workspace->L->j, workspace->L_EE->j, sizeof(unsigned int) * workspace->L_EE->start[workspace->L_EE->n] ); +// memcpy( workspace->L->val, workspace->L_EE->val, sizeof(real) * workspace->L_EE->start[workspace->L_EE->n] ); +// +// top = workspace->L->start[system->N]; +// for ( i = 0; i < system->N; ++i ) +// { +// workspace->L->j[top] = i; +// workspace->L->val[top] = x[i]; +// ++top; +// } +// +// workspace->L->j[top] = system->N_cm - 1; +// workspace->L->val[top] = 1.0; +// ++top; +// +// workspace->L->start[system->N_cm] = top; +// +// top = 0; +// for ( i = 0; i < system->N; ++i ) +// { +// workspace->U->start[i] = top; +// memcpy( workspace->U->j + top, workspace->U_EE->j + workspace->U_EE->start[i], +// sizeof(unsigned int) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) ); +// memcpy( workspace->U->val + top, workspace->U_EE->val + workspace->U_EE->start[i], +// sizeof(real) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) ); +// top += (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]); +// +// workspace->U->j[top] = system->N_cm - 1; +// workspace->U->val[top] = y[i]; +// ++top; +// } +// +// workspace->U->start[system->N_cm - 1] = top; +// +// workspace->U->j[top] = system->N_cm - 1; +// workspace->U->val[top] = -Dot( x, y, system->N ); +// ++top; +// +// workspace->U->start[system->N_cm] = top; +// break; +// +// //TODO: add Jacobi iter, etc.? +// default: +// tri_solve( workspace->L_EE, ones, x, workspace->L_EE->n, LOWER ); +// Transpose_I( workspace->U_EE ); +// tri_solve( workspace->U_EE, ones, y, workspace->U_EE->n, LOWER ); +// Transpose_I( workspace->U_EE ); +// +// memcpy( workspace->L->start, workspace->L_EE->start, sizeof(unsigned int) * (system->N + 1) ); +// memcpy( workspace->L->j, workspace->L_EE->j, sizeof(unsigned int) * workspace->L_EE->start[workspace->L_EE->n] ); +// memcpy( workspace->L->val, workspace->L_EE->val, sizeof(real) * workspace->L_EE->start[workspace->L_EE->n] ); +// +// top = workspace->L->start[system->N]; +// for ( i = 0; i < system->N; ++i ) +// { +// workspace->L->j[top] = i; +// workspace->L->val[top] = x[i]; +// ++top; +// } +// +// workspace->L->j[top] = system->N_cm - 1; +// workspace->L->val[top] = 1.0; +// ++top; +// +// workspace->L->start[system->N_cm] = top; +// +// top = 0; +// for ( i = 0; i < system->N; ++i ) +// { +// workspace->U->start[i] = top; +// memcpy( workspace->U->j + top, workspace->U_EE->j + workspace->U_EE->start[i], +// sizeof(unsigned int) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) ); +// memcpy( workspace->U->val + top, workspace->U_EE->val + workspace->U_EE->start[i], +// sizeof(real) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) ); +// top += (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]); +// +// workspace->U->j[top] = system->N_cm - 1; +// workspace->U->val[top] = y[i]; +// ++top; +// } +// +// workspace->U->start[system->N_cm - 1] = top; +// +// workspace->U->j[top] = system->N_cm - 1; +// workspace->U->val[top] = -Dot( x, y, system->N ); +// ++top; +// +// workspace->U->start[system->N_cm] = top; +// break; +// } +// } +// +//#if defined(DEBUG) +//#define SIZE (1000) +// char fname[SIZE]; +// +// if ( control->cm_solver_pre_comp_type != JACOBI_PC ) +// { +// fprintf( stderr, "condest = %f\n", condest(workspace->L) ); +// +//#if defined(DEBUG_FOCUS) +// snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step ); +// Print_Sparse_Matrix2( workspace->L, fname, NULL ); +// snprintf( fname, SIZE + 10, "%s.U%d.out", control->sim_name, data->step ); +// Print_Sparse_Matrix2( workspace->U, fname, NULL ); +// +// fprintf( stderr, "icholt-" ); +// snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step ); +// Print_Sparse_Matrix2( workspace->L, fname, NULL ); +// Print_Sparse_Matrix( U ); +//#endif +// } +//#undef SIZE +//#endif +//} + + +/* Compute preconditioner for EE + */ +static void Compute_Preconditioner_EE( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + real time; + sparse_matrix *Hptr; + + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + +#if defined(TEST_MAT) + Hptr = create_test_mat( ); +#endif + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + } + + time = Get_Time( ); + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + setup_graph_coloring( control, workspace, Hptr, &workspace->H_full, &workspace->H_p ); + Sort_Matrix_Rows( workspace->H_p ); + Hptr = workspace->H_p; + } + data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); + + switch ( control->cm_solver_pre_comp_type ) + { + case NONE_PC: + break; + + case JACOBI_PC: + data->timing.cm_solver_pre_comp += + jacobi( Hptr, workspace->Hdia_inv ); + break; + + case ICHOLT_PC: + data->timing.cm_solver_pre_comp += + ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); + break; + + case ILUT_PAR_PC: + data->timing.cm_solver_pre_comp += + FG_ILUT( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, + workspace->L, workspace->U ); + break; + + case SAI_PC: +#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) + data->timing.cm_solver_pre_comp += + sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full, + &workspace->H_app_inv ); +#else + fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); + exit( INVALID_INPUT ); +#endif + break; + + default: + fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + } + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + } + +#if defined(DEBUG) +#define SIZE (1000) + char fname[SIZE]; + + if ( control->cm_solver_pre_comp_type != NONE_PC && + control->cm_solver_pre_comp_type != JACOBI_PC ) + { + fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); + +#if defined(DEBUG_FOCUS) + snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->L, fname, NULL ); + snprintf( fname, SIZE + 10, "%s.U%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->U, fname, NULL ); +#endif + } +#undef SIZE +#endif +} + + +/* Compute preconditioner for ACKS2 + */ +static void Compute_Preconditioner_ACKS2( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + real time; + sparse_matrix *Hptr; + + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + +#if defined(TEST_MAT) + Hptr = create_test_mat( ); +#endif + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; + } + + time = Get_Time( ); + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + setup_graph_coloring( control, workspace, Hptr, &workspace->H_full, &workspace->H_p ); + Sort_Matrix_Rows( workspace->H_p ); + Hptr = workspace->H_p; + } + data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); + + switch ( control->cm_solver_pre_comp_type ) + { + case NONE_PC: + break; + + case JACOBI_PC: + data->timing.cm_solver_pre_comp += + jacobi( Hptr, workspace->Hdia_inv ); + break; + + case ICHOLT_PC: + data->timing.cm_solver_pre_comp += + ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); + break; + + case ILUT_PAR_PC: + data->timing.cm_solver_pre_comp += + ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, + workspace->L, workspace->U ); + break; + + case SAI_PC: +#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) + data->timing.cm_solver_pre_comp += + sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full, + &workspace->H_app_inv ); +#else + fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); + exit( INVALID_INPUT ); +#endif + break; + + default: + fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + } + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; + } + +#if defined(DEBUG) +#define SIZE (1000) + char fname[SIZE]; + + if ( control->cm_solver_pre_comp_type != NONE_PC || + control->cm_solver_pre_comp_type != JACOBI_PC ) + { + fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); + +#if defined(DEBUG_FOCUS) + snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->L, fname, NULL ); + snprintf( fname, SIZE + 10, "%s.U%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->U, fname, NULL ); +#endif + } +#undef SIZE +#endif +} + + +static void Setup_Preconditioner_QEq( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + int fillin; + real time; + sparse_matrix *Hptr; + + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + + /* sort H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */ + time = Get_Time( ); + Sort_Matrix_Rows( workspace->H ); + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Sort_Matrix_Rows( workspace->H_sp ); + } + data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); + + switch ( control->cm_solver_pre_comp_type ) + { + case NONE_PC: + break; + + case JACOBI_PC: + if ( workspace->Hdia_inv == NULL ) + { + workspace->Hdia_inv = (real *) scalloc( Hptr->n, sizeof( real ), + "Setup_Preconditioner_QEq::workspace->Hdiv_inv" ); + } + break; + + case ICHOLT_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); + + fillin = Estimate_LU_Fill( Hptr, workspace->droptol ); + + if ( workspace->L == NULL ) + { + Allocate_Matrix( &(workspace->L), Hptr->n, fillin ); + Allocate_Matrix( &(workspace->U), Hptr->n, fillin ); + } + else if ( workspace->L->m < fillin ) + { + Deallocate_Matrix( workspace->L ); + Deallocate_Matrix( workspace->U ); + + Allocate_Matrix( &(workspace->L), Hptr->n, fillin ); + Allocate_Matrix( &(workspace->U), Hptr->n, fillin ); + } + break; + + case ILUT_PAR_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); + + if ( workspace->L == NULL ) + { + /* TODO: safest storage estimate is ILU(0) + * (same as lower triangular portion of H), could improve later */ + Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + } + else if ( workspace->L->m < Hptr->m ) + { + Deallocate_Matrix( workspace->L ); + Deallocate_Matrix( workspace->U ); + + /* TODO: safest storage estimate is ILU(0) + * (same as lower triangular portion of H), could improve later */ + Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + } + break; + + case SAI_PC: + setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt, + &workspace->H_spar_patt_full, &workspace->H_app_inv, + control->cm_solver_pre_comp_sai_thres ); + break; + + default: + fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } +} + + +/* Setup routines before computing the preconditioner for EE + */ +static void Setup_Preconditioner_EE( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + int fillin; + real time; + sparse_matrix *Hptr; + + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + + /* sorted H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */ + time = Get_Time( ); + Sort_Matrix_Rows( workspace->H ); + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Sort_Matrix_Rows( workspace->H_sp ); + } + data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + } + + switch ( control->cm_solver_pre_comp_type ) + { + case NONE_PC: + break; + + case JACOBI_PC: + if ( workspace->Hdia_inv == NULL ) + { + workspace->Hdia_inv = (real *) scalloc( system->N_cm, sizeof( real ), + "Setup_Preconditioner_QEq::workspace->Hdiv_inv" ); + } + break; + + case ICHOLT_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); + + fillin = Estimate_LU_Fill( Hptr, workspace->droptol ); + + if ( workspace->L == NULL ) + { + Allocate_Matrix( &(workspace->L), system->N_cm, fillin + system->N_cm ); + Allocate_Matrix( &(workspace->U), system->N_cm, fillin + system->N_cm ); + } + else if ( workspace->L->m < fillin + system->N_cm ) + { + Deallocate_Matrix( workspace->L ); + Deallocate_Matrix( workspace->U ); + + Allocate_Matrix( &(workspace->L), system->N_cm, fillin + system->N_cm ); + Allocate_Matrix( &(workspace->U), system->N_cm, fillin + system->N_cm ); + } + break; + + case ILUT_PAR_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); + + if ( workspace->L == NULL ) + { + /* TODO: safest storage estimate is ILU(0) + * (same as lower triangular portion of H), could improve later */ + Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + } + else if ( workspace->L->m < Hptr->m ) + { + Deallocate_Matrix( workspace->L ); + Deallocate_Matrix( workspace->U ); + + /* TODO: safest storage estimate is ILU(0) + * (same as lower triangular portion of H), could improve later */ + Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + } + break; + + case SAI_PC: + setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt, + &workspace->H_spar_patt_full, &workspace->H_app_inv, + control->cm_solver_pre_comp_sai_thres ); + break; + + default: + fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + } +} + + +/* Setup routines before computing the preconditioner for ACKS2 + */ +static void Setup_Preconditioner_ACKS2( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + int fillin; + real time; + sparse_matrix *Hptr; + + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + + /* sort H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */ + time = Get_Time( ); + Sort_Matrix_Rows( workspace->H ); + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Sort_Matrix_Rows( workspace->H_sp ); + } + data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; + } + + switch ( control->cm_solver_pre_comp_type ) + { + case NONE_PC: + break; + + case JACOBI_PC: + if ( workspace->Hdia_inv == NULL ) + { + workspace->Hdia_inv = (real *) scalloc( Hptr->n, sizeof( real ), + "Setup_Preconditioner_QEq::workspace->Hdiv_inv" ); + } + break; + + case ICHOLT_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); + + fillin = Estimate_LU_Fill( Hptr, workspace->droptol ); + + if ( workspace->L == NULL ) + { + Allocate_Matrix( &(workspace->L), Hptr->n, fillin ); + Allocate_Matrix( &(workspace->U), Hptr->n, fillin ); + } + else if ( workspace->L->m < fillin ) + { + Deallocate_Matrix( workspace->L ); + Deallocate_Matrix( workspace->U ); + + /* factors have sparsity pattern as H */ + Allocate_Matrix( &(workspace->L), Hptr->n, fillin ); + Allocate_Matrix( &(workspace->U), Hptr->n, fillin ); + } + break; + + case ILUT_PAR_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); + + if ( workspace->L == NULL ) + { + /* TODO: safest storage estimate is ILU(0) + * (same as lower triangular portion of H), could improve later */ + Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + } + else if ( workspace->L->m < Hptr->m ) + { + Deallocate_Matrix( workspace->L ); + Deallocate_Matrix( workspace->U ); + + /* factors have sparsity pattern as H */ + Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + } + break; + + case SAI_PC: + setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt, + &workspace->H_spar_patt_full, &workspace->H_app_inv, + control->cm_solver_pre_comp_sai_thres ); + break; + + default: + fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; + } +} + + +/* Combine ficticious charges s and t to get atomic charge q for QEq method + */ +static void Calculate_Charges_QEq( const reax_system * const system, + static_storage * const workspace ) +{ + int i; + real u, s_sum, t_sum; + + s_sum = 0.0; + t_sum = 0.0; + for ( i = 0; i < system->N_cm; ++i ) + { + s_sum += workspace->s[0][i]; + t_sum += workspace->t[0][i]; + } + + u = s_sum / t_sum; + for ( i = 0; i < system->N_cm; ++i ) + { + system->atoms[i].q = workspace->s[0][i] - u * workspace->t[0][i]; + +#if defined(DEBUG_FOCUS) + printf("atom %4d: %f\n", i, system->atoms[i].q); + printf(" x[0]: %10.5f, x[1]: %10.5f, x[2]: %10.5f\n", + system->atoms[i].x[0], system->atoms[i].x[1], system->atoms[i].x[2]); +#endif + } +} + + +/* Get atomic charge q for EE method + */ +static void Calculate_Charges_EE( const reax_system * const system, + static_storage * const workspace ) +{ + int i; + + for ( i = 0; i < system->N; ++i ) + { + system->atoms[i].q = workspace->s[0][i]; + +#if defined(DEBUG_FOCUS) + printf( "atom %4d: %f\n", i, system->atoms[i].q ); + printf( " x[0]: %10.5f, x[1]: %10.5f, x[2]: %10.5f\n", + system->atoms[i].x[0], system->atoms[i].x[1], system->atoms[i].x[2] ); +#endif + } +} + + +/* Main driver method for QEq kernel + * 1) init / setup routines for preconditioning of linear solver + * 2) compute preconditioner + * 3) extrapolate charges + * 4) perform 2 linear solves + * 5) compute atomic charges based on output of (4) + */ +static void QEq( reax_system * const system, control_params * const control, + simulation_data * const data, static_storage * const workspace, + const output_controls * const out_control ) +{ + int iters; + + if ( control->cm_solver_pre_comp_refactor > 0 && + ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) + + { + Setup_Preconditioner_QEq( system, control, data, workspace ); + + Compute_Preconditioner_QEq( system, control, data, workspace ); + } + + Extrapolate_Charges_QEq( system, control, data, workspace ); + + switch ( control->cm_solver_type ) + { + case GMRES_S: + iters = GMRES( workspace, control, data, workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); + iters += GMRES( workspace, control, data, workspace->H, + workspace->b_t, control->cm_solver_q_err, workspace->t[0], FALSE ); + break; + + case GMRES_H_S: + iters = GMRES_HouseHolder( workspace, control, data, workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); + iters += GMRES_HouseHolder( workspace, control, data, workspace->H, + workspace->b_t, control->cm_solver_q_err, workspace->t[0], 0 ); + break; + + case CG_S: + iters = CG( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; + iters += CG( workspace, control, data, workspace->H, workspace->b_t, control->cm_solver_q_err, + workspace->t[0], FALSE ) + 1; + break; + + case SDM_S: + iters = SDM( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; + iters += SDM( workspace, control, data, workspace->H, workspace->b_t, control->cm_solver_q_err, + workspace->t[0], FALSE ) + 1; + break; + + case BiCGStab_S: + iters = BiCGStab( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; + iters += BiCGStab( workspace, control, data, workspace->H, workspace->b_t, control->cm_solver_q_err, + workspace->t[0], FALSE ) + 1; + break; + + default: + fprintf( stderr, "Unrecognized QEq solver selection. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + data->timing.cm_solver_iters += iters; + + Calculate_Charges_QEq( system, workspace ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "%d %.9f %.9f %.9f %.9f %.9f %.9f\n", data->step, + workspace->s[0][0], workspace->t[0][0], + workspace->s[0][1], workspace->t[0][1], + workspace->s[0][2], workspace->t[0][2] ); + if( data->step == control->nsteps ) + { + Print_Charges( system, control, workspace, data->step ); + } +#endif +} + + +/* Main driver method for EE kernel + * 1) init / setup routines for preconditioning of linear solver + * 2) compute preconditioner + * 3) extrapolate charges + * 4) perform 1 linear solve + * 5) compute atomic charges based on output of (4) + */ +static void EE( reax_system * const system, control_params * const control, + simulation_data * const data, static_storage * const workspace, + const output_controls * const out_control ) +{ + int iters; + + if ( control->cm_solver_pre_comp_refactor > 0 && + ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) + { + Setup_Preconditioner_EE( system, control, data, workspace ); + + Compute_Preconditioner_EE( system, control, data, workspace ); + } + + Extrapolate_Charges_EE( system, control, data, workspace ); + + switch ( control->cm_solver_type ) + { + case GMRES_S: + iters = GMRES( workspace, control, data, workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); + break; + + case GMRES_H_S: + iters = GMRES_HouseHolder( workspace, control, data,workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); + break; + + case CG_S: + iters = CG( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; + break; + + case SDM_S: + iters = SDM( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; + break; + + case BiCGStab_S: + iters = BiCGStab( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; + break; + + default: + fprintf( stderr, "Unrecognized EE solver selection. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + data->timing.cm_solver_iters += iters; + + Calculate_Charges_EE( system, workspace ); + + // if( data->step == control->nsteps ) + //Print_Charges( system, control, workspace, data->step ); +} + + +/* Main driver method for ACKS2 kernel + * 1) init / setup routines for preconditioning of linear solver + * 2) compute preconditioner + * 3) extrapolate charges + * 4) perform 1 linear solve + * 5) compute atomic charges based on output of (4) + */ +static void ACKS2( reax_system * const system, control_params * const control, + simulation_data * const data, static_storage * const workspace, + const output_controls * const out_control ) +{ + int iters; + + if ( control->cm_solver_pre_comp_refactor > 0 && + ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) + { + Setup_Preconditioner_ACKS2( system, control, data, workspace ); + + Compute_Preconditioner_ACKS2( system, control, data, workspace ); + } + +// Print_Linear_System( system, control, workspace, data->step ); + + Extrapolate_Charges_EE( system, control, data, workspace ); + +#if defined(DEBUG_FOCUS) +#define SIZE (200) + char fname[SIZE]; + FILE * fp; + + if ( data->step % 10 == 0 ) + { + snprintf( fname, SIZE + 11, "s_%d_%s.out", data->step, control->sim_name ); + fp = sfopen( fname, "w" ); + Vector_Print( fp, NULL, workspace->s[0], system->N_cm ); + sfclose( fp, "ACKS2::fp" ); + } +#undef SIZE +#endif + + switch ( control->cm_solver_type ) + { + case GMRES_S: + iters = GMRES( workspace, control, data, workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); + break; + + case GMRES_H_S: + iters = GMRES_HouseHolder( workspace, control, data,workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); + break; + + case CG_S: + iters = CG( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; + break; + + case SDM_S: + iters = SDM( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; + break; + + case BiCGStab_S: + iters = BiCGStab( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; + break; + + default: + fprintf( stderr, "Unrecognized ACKS2 solver selection. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + data->timing.cm_solver_iters += iters; + + Calculate_Charges_EE( system, workspace ); +} + + +void Compute_Charges( reax_system * const system, control_params * const control, + simulation_data * const data, static_storage * const workspace, + const output_controls * const out_control ) +{ +#if defined(DEBUG_FOCUS) +#define SIZE (200) + char fname[SIZE]; + FILE * fp; + + if ( data->step % 10 == 0 ) + { + snprintf( fname, SIZE + 11, "H_%d_%s.out", data->step, control->sim_name ); + Print_Sparse_Matrix2( workspace->H, fname, NULL ); +// Print_Sparse_Matrix_Binary( workspace->H, fname ); + + snprintf( fname, SIZE + 11, "b_s_%d_%s.out", data->step, control->sim_name ); + fp = sfopen( fname, "w" ); + Vector_Print( fp, NULL, workspace->b_s, system->N_cm ); + sfclose( fp, "Compute_Charges::fp" ); + +// snprintf( fname, SIZE + 11, "b_t_%d_%s.out", data->step, control->sim_name ); +// fp = sfopen( fname, "w" ); +// Vector_Print( fp, NULL, workspace->b_t, system->N_cm ); +// sfclose( fp, "Compute_Charges::fp" ); + } +#undef SIZE +#endif + + switch ( control->charge_method ) + { + case QEQ_CM: + QEq( system, control, data, workspace, out_control ); + break; + + case EE_CM: + EE( system, control, data, workspace, out_control ); + break; + + case ACKS2_CM: + ACKS2( system, control, data, workspace, out_control ); + break; + + default: + fprintf( stderr, "[ERROR] Invalid charge method. Terminating...\n" ); + exit( UNKNOWN_OPTION ); + break; + } +} diff --git a/sPuReMD/src/box.c b/sPuReMD/src/box.c index a1d1a330..fef4145c 100644 --- a/sPuReMD/src/box.c +++ b/sPuReMD/src/box.c @@ -221,7 +221,7 @@ void Update_Box_Isotropic( simulation_box *box, real mu ) } -void Update_Box_SemiIsotropic( simulation_box *box, rvec mu ) +void Update_Box_Semi_Isotropic( simulation_box *box, rvec mu ) { /*box->box[0][0] = POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 ); diff --git a/sPuReMD/src/box.h b/sPuReMD/src/box.h index ea38a8e6..d6ab4551 100644 --- a/sPuReMD/src/box.h +++ b/sPuReMD/src/box.h @@ -34,7 +34,7 @@ void Setup_Box( real, real, real, real, real, real, simulation_box* ); /* Initializes box from box rtensor */ void Update_Box( rtensor, simulation_box* ); void Update_Box_Isotropic( simulation_box*, real ); -void Update_Box_SemiIsotropic( simulation_box*, rvec ); +void Update_Box_Semi_Isotropic( simulation_box*, rvec ); int Count_Periodic_Far_Neighbors_Big_Box( rvec, rvec, simulation_box*, real, far_neighbor_data* ); diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c index 875ce97e..7daeb3d4 100644 --- a/sPuReMD/src/charges.c +++ b/sPuReMD/src/charges.c @@ -27,9 +27,6 @@ #include "print_utils.h" #include "tool_box.h" #include "vector.h" -#if defined(HAVE_SUPERLU_MT) - #include "slu_mt_ddefs.h" -#endif static void Extrapolate_Charges_QEq( const reax_system * const system, @@ -218,9 +215,9 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, case NONE_PC: break; - case DIAG_PC: + case JACOBI_PC: data->timing.cm_solver_pre_comp += - diag_pre_comp( Hptr, workspace->Hdia_inv ); + jacobi( Hptr, workspace->Hdia_inv ); break; case ICHOLT_PC: @@ -228,40 +225,31 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); break; - case ILU_PAR_PC: + case ILUT_PC: data->timing.cm_solver_pre_comp += - ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U ); + ILUT( Hptr, workspace->droptol, workspace->L, workspace->U ); break; - case ILUT_PAR_PC: + case FG_ILUT_PC: data->timing.cm_solver_pre_comp += - ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, + FG_ILUT( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U ); break; - case ILU_SUPERLU_MT_PC: -#if defined(HAVE_SUPERLU_MT) - data->timing.cm_solver_pre_comp += - SuperLU_Factorize( Hptr, workspace->L, workspace->U ); -#else - fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" ); - exit( INVALID_INPUT ); -#endif - break; - case SAI_PC: #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) data->timing.cm_solver_pre_comp += sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full, &workspace->H_app_inv ); #else - fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); + fprintf( stderr, "[ERROR] LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); exit( INVALID_INPUT ); #endif break; default: - fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method (%d). Terminating...\n", + control->cm_solver_pre_comp_type ); exit( INVALID_INPUT ); break; } @@ -271,9 +259,9 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, char fname[SIZE]; if ( control->cm_solver_pre_comp_type != NONE_PC && - control->cm_solver_pre_comp_type != DIAG_PC ) + control->cm_solver_pre_comp_type != JACOBI_PC ) { - fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); + fprintf( stderr, "[INFO] condest = %f\n", condest(workspace->L, workspace->U) ); #if defined(DEBUG_FOCUS) snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step ); @@ -305,9 +293,9 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, // // if ( ones == NULL ) // { -// ones = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::ones" ); -// x = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::x" ); -// y = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::y" ); +// ones = smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::ones" ); +// x = smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::x" ); +// y = smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::y" ); // // for ( i = 0; i < system->N; ++i ) // { @@ -317,9 +305,9 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, // // switch ( control->cm_solver_pre_comp_type ) // { -// case DIAG_PC: +// case JACOBI_PC: // data->timing.cm_solver_pre_comp += -// diag_pre_comp( Hptr, workspace->Hdia_inv ); +// jacobi( Hptr, workspace->Hdia_inv ); // break; // // case ICHOLT_PC: @@ -327,34 +315,30 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, // ICHOLT( Hptr, workspace->droptol, workspace->L_EE, workspace->U_EE ); // break; // -// case ILU_PAR_PC: -// data->timing.cm_solver_pre_comp += -// ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L_EE, workspace->U_EE ); -// break; +// case ILUT_PC: +// data->timing.cm_solver_pre_comp += +// ILUT( Hptr, workspace->droptol, workspace->L, workspace->U ); +// break; // -// case ILUT_PAR_PC: -// data->timing.cm_solver_pre_comp += -// ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, -// workspace->L_EE, workspace->U_EE ); -// break; +// case ILUT_PC: +// data->timing.cm_solver_pre_comp += +// ILUT( Hptr, workspace->droptol, workspace->L, workspace->U ); +// break; // -// case ILU_SUPERLU_MT_PC: -//#if defined(HAVE_SUPERLU_MT) +// case FG_ILUT_PC: // data->timing.cm_solver_pre_comp += -// SuperLU_Factorize( Hptr, workspace->L_EE, workspace->U_EE ); -//#else -// fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" ); -// exit( INVALID_INPUT ); -//#endif +// FG_ILUT( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, +// workspace->L_EE, workspace->U_EE ); // break; // // default: -// fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); +// fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method (%d). Terminating...\n", +// control->cm_solver_pre_comp_type ); // exit( INVALID_INPUT ); // break; // } // -// if ( control->cm_solver_pre_comp_type != DIAG_PC ) +// if ( control->cm_solver_pre_comp_type != JACOBI_PC ) // { // switch ( control->cm_solver_pre_app_type ) // { @@ -509,7 +493,7 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, //#define SIZE (1000) // char fname[SIZE]; // -// if ( control->cm_solver_pre_comp_type != DIAG_PC ) +// if ( control->cm_solver_pre_comp_type != JACOBI_PC ) // { // fprintf( stderr, "condest = %f\n", condest(workspace->L) ); // @@ -552,9 +536,9 @@ static void Compute_Preconditioner_EE( const reax_system * const system, Hptr = create_test_mat( ); #endif - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILU_PAR_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; } @@ -573,9 +557,9 @@ static void Compute_Preconditioner_EE( const reax_system * const system, case NONE_PC: break; - case DIAG_PC: + case JACOBI_PC: data->timing.cm_solver_pre_comp += - diag_pre_comp( Hptr, workspace->Hdia_inv ); + jacobi( Hptr, workspace->Hdia_inv ); break; case ICHOLT_PC: @@ -583,40 +567,31 @@ static void Compute_Preconditioner_EE( const reax_system * const system, ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); break; - case ILU_PAR_PC: + case ILUT_PC: data->timing.cm_solver_pre_comp += - ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U ); + ILUT( Hptr, workspace->droptol, workspace->L, workspace->U ); break; - case ILUT_PAR_PC: + case FG_ILUT_PC: data->timing.cm_solver_pre_comp += - ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, + FG_ILUT( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U ); break; - case ILU_SUPERLU_MT_PC: -#if defined(HAVE_SUPERLU_MT) - data->timing.cm_solver_pre_comp += - SuperLU_Factorize( Hptr, workspace->L, workspace->U ); -#else - fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" ); - exit( INVALID_INPUT ); -#endif - break; - case SAI_PC: #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) data->timing.cm_solver_pre_comp += sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full, &workspace->H_app_inv ); #else - fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); + fprintf( stderr, "[ERROR] LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); exit( INVALID_INPUT ); #endif break; default: - fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method (%d). Terminating...\n", + control->cm_solver_pre_comp_type ); exit( INVALID_INPUT ); break; } @@ -633,9 +608,9 @@ static void Compute_Preconditioner_EE( const reax_system * const system, } } - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILU_PAR_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; } @@ -645,9 +620,9 @@ static void Compute_Preconditioner_EE( const reax_system * const system, char fname[SIZE]; if ( control->cm_solver_pre_comp_type != NONE_PC && - control->cm_solver_pre_comp_type != DIAG_PC ) + control->cm_solver_pre_comp_type != JACOBI_PC ) { - fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); + fprintf( stderr, "[INFO] condest = %f\n", condest(workspace->L, workspace->U) ); #if defined(DEBUG_FOCUS) snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step ); @@ -683,9 +658,9 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system, Hptr = create_test_mat( ); #endif - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILU_PAR_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; @@ -705,9 +680,9 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system, case NONE_PC: break; - case DIAG_PC: + case JACOBI_PC: data->timing.cm_solver_pre_comp += - diag_pre_comp( Hptr, workspace->Hdia_inv ); + jacobi( Hptr, workspace->Hdia_inv ); break; case ICHOLT_PC: @@ -715,40 +690,31 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system, ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); break; - case ILU_PAR_PC: + case ILUT_PC: data->timing.cm_solver_pre_comp += - ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U ); + ILUT( Hptr, workspace->droptol, workspace->L, workspace->U ); break; - case ILUT_PAR_PC: + case FG_ILUT_PC: data->timing.cm_solver_pre_comp += - ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, + FG_ILUT( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U ); break; - case ILU_SUPERLU_MT_PC: -#if defined(HAVE_SUPERLU_MT) - data->timing.cm_solver_pre_comp += - SuperLU_Factorize( Hptr, workspace->L, workspace->U ); -#else - fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" ); - exit( INVALID_INPUT ); -#endif - break; - case SAI_PC: #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) data->timing.cm_solver_pre_comp += sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full, &workspace->H_app_inv ); #else - fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); + fprintf( stderr, "[ERROR] LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); exit( INVALID_INPUT ); #endif break; default: - fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method (%d). Terminating...\n", + control->cm_solver_pre_comp_type ); exit( INVALID_INPUT ); break; } @@ -765,9 +731,9 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system, } } - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILU_PAR_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; @@ -778,9 +744,9 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system, char fname[SIZE]; if ( control->cm_solver_pre_comp_type != NONE_PC || - control->cm_solver_pre_comp_type != DIAG_PC ) + control->cm_solver_pre_comp_type != JACOBI_PC ) { - fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); + fprintf( stderr, "[INFO] condest = %f\n", condest(workspace->L, workspace->U) ); #if defined(DEBUG_FOCUS) snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step ); @@ -825,10 +791,10 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, case NONE_PC: break; - case DIAG_PC: + case JACOBI_PC: if ( workspace->Hdia_inv == NULL ) { - workspace->Hdia_inv = (real *) scalloc( Hptr->n, sizeof( real ), + workspace->Hdia_inv = scalloc( Hptr->n, sizeof( real ), "Setup_Preconditioner_QEq::workspace->Hdiv_inv" ); } break; @@ -840,74 +806,39 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, if ( workspace->L == NULL ) { - Allocate_Matrix( &(workspace->L), Hptr->n, fillin ); - Allocate_Matrix( &(workspace->U), Hptr->n, fillin ); + Allocate_Matrix( &workspace->L, Hptr->n, fillin ); + Allocate_Matrix( &workspace->U, Hptr->n, fillin ); } else if ( workspace->L->m < fillin ) { Deallocate_Matrix( workspace->L ); Deallocate_Matrix( workspace->U ); - Allocate_Matrix( &(workspace->L), Hptr->n, fillin ); - Allocate_Matrix( &(workspace->U), Hptr->n, fillin ); - } - break; - - case ILU_PAR_PC: - if ( workspace->L == NULL ) - { - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - else if ( workspace->L->m < Hptr->m ) - { - Deallocate_Matrix( workspace->L ); - Deallocate_Matrix( workspace->U ); - - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + Allocate_Matrix( &workspace->L, Hptr->n, fillin ); + Allocate_Matrix( &workspace->U, Hptr->n, fillin ); } break; - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); if ( workspace->L == NULL ) { - /* TODO: safest storage estimate is ILU(0) - * (same as lower triangular portion of H), could improve later */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + /* safest storage estimate is ILU(0) (same as + * lower triangular portion of H), could improve later */ + Allocate_Matrix( &workspace->L, Hptr->n, Hptr->m ); + Allocate_Matrix( &workspace->U, Hptr->n, Hptr->m ); } else if ( workspace->L->m < Hptr->m ) { Deallocate_Matrix( workspace->L ); Deallocate_Matrix( workspace->U ); - /* TODO: safest storage estimate is ILU(0) - * (same as lower triangular portion of H), could improve later */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - break; - - case ILU_SUPERLU_MT_PC: - if ( workspace->L == NULL ) - { - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - else if ( workspace->L->m < Hptr->m ) - { - Deallocate_Matrix( workspace->L ); - Deallocate_Matrix( workspace->U ); - - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + /* safest storage estimate is ILU(0) (same as + * lower triangular portion of H), could improve later */ + Allocate_Matrix( &workspace->L, Hptr->n, Hptr->m ); + Allocate_Matrix( &workspace->U, Hptr->n, Hptr->m ); } break; @@ -918,7 +849,8 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, break; default: - fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method (%d). Terminating...\n", + control->cm_solver_pre_comp_type ); exit( INVALID_INPUT ); break; } @@ -953,9 +885,9 @@ static void Setup_Preconditioner_EE( const reax_system * const system, } data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILU_PAR_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; } @@ -965,11 +897,11 @@ static void Setup_Preconditioner_EE( const reax_system * const system, case NONE_PC: break; - case DIAG_PC: + case JACOBI_PC: if ( workspace->Hdia_inv == NULL ) { - workspace->Hdia_inv = (real *) scalloc( system->N_cm, sizeof( real ), - "Setup_Preconditioner_QEq::workspace->Hdiv_inv" ); + workspace->Hdia_inv = scalloc( Hptr->n, sizeof( real ), + "Setup_Preconditioner_EE::workspace->Hdiv_inv" ); } break; @@ -980,44 +912,27 @@ static void Setup_Preconditioner_EE( const reax_system * const system, if ( workspace->L == NULL ) { - Allocate_Matrix( &(workspace->L), system->N_cm, fillin + system->N_cm ); - Allocate_Matrix( &(workspace->U), system->N_cm, fillin + system->N_cm ); + Allocate_Matrix( &workspace->L, system->N_cm, fillin + system->N_cm ); + Allocate_Matrix( &workspace->U, system->N_cm, fillin + system->N_cm ); } else if ( workspace->L->m < fillin + system->N_cm ) { Deallocate_Matrix( workspace->L ); Deallocate_Matrix( workspace->U ); - Allocate_Matrix( &(workspace->L), system->N_cm, fillin + system->N_cm ); - Allocate_Matrix( &(workspace->U), system->N_cm, fillin + system->N_cm ); + Allocate_Matrix( &workspace->L, system->N_cm, fillin + system->N_cm ); + Allocate_Matrix( &workspace->U, system->N_cm, fillin + system->N_cm ); } break; - case ILU_PAR_PC: - if ( workspace->L == NULL ) - { - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - else if ( workspace->L->m < Hptr->m ) - { - Deallocate_Matrix( workspace->L ); - Deallocate_Matrix( workspace->U ); - - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - break; - - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); if ( workspace->L == NULL ) { - /* TODO: safest storage estimate is ILU(0) - * (same as lower triangular portion of H), could improve later */ + /* safest storage estimate is ILU(0) (same as + * lower triangular portion of H), could improve later */ Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); } @@ -1026,32 +941,10 @@ static void Setup_Preconditioner_EE( const reax_system * const system, Deallocate_Matrix( workspace->L ); Deallocate_Matrix( workspace->U ); - /* TODO: safest storage estimate is ILU(0) - * (same as lower triangular portion of H), could improve later */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - break; - - case ILU_SUPERLU_MT_PC: - if ( workspace->L == NULL ) - { - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - else if ( workspace->L->m < Hptr->m ) - { - Deallocate_Matrix( workspace->L ); - Deallocate_Matrix( workspace->U ); - - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - { - fprintf( stderr, "[ERROR] not enough memory for preconditioning matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + /* safest storage estimate is ILU(0) (same as + * lower triangular portion of H), could improve later */ + Allocate_Matrix( &workspace->L, Hptr->n, Hptr->m ); + Allocate_Matrix( &workspace->U, Hptr->n, Hptr->m ); } break; @@ -1062,14 +955,15 @@ static void Setup_Preconditioner_EE( const reax_system * const system, break; default: - fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method (%d). Terminating...\n", + control->cm_solver_pre_comp_type ); exit( INVALID_INPUT ); break; } - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILU_PAR_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; } @@ -1104,9 +998,9 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, } data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILU_PAR_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; @@ -1117,11 +1011,11 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, case NONE_PC: break; - case DIAG_PC: + case JACOBI_PC: if ( workspace->Hdia_inv == NULL ) { - workspace->Hdia_inv = (real *) scalloc( Hptr->n, sizeof( real ), - "Setup_Preconditioner_QEq::workspace->Hdiv_inv" ); + workspace->Hdia_inv = scalloc( Hptr->n, sizeof( real ), + "Setup_Preconditioner_ACKS2::workspace->Hdiv_inv" ); } break; @@ -1132,8 +1026,8 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, if ( workspace->L == NULL ) { - Allocate_Matrix( &(workspace->L), Hptr->n, fillin ); - Allocate_Matrix( &(workspace->U), Hptr->n, fillin ); + Allocate_Matrix( &workspace->L, Hptr->n, fillin ); + Allocate_Matrix( &workspace->U, Hptr->n, fillin ); } else if ( workspace->L->m < fillin ) { @@ -1146,51 +1040,16 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, } break; - case ILU_PAR_PC: - if ( workspace->L == NULL ) - { - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - else if ( workspace->L->m < Hptr->m ) - { - Deallocate_Matrix( workspace->L ); - Deallocate_Matrix( workspace->U ); - - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - break; - - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); if ( workspace->L == NULL ) { - /* TODO: safest storage estimate is ILU(0) - * (same as lower triangular portion of H), could improve later */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - else if ( workspace->L->m < Hptr->m ) - { - Deallocate_Matrix( workspace->L ); - Deallocate_Matrix( workspace->U ); - - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); - } - break; - - case ILU_SUPERLU_MT_PC: - if ( workspace->L == NULL ) - { - /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + /* safest storage estimate is ILU(0) (same as + * lower triangular portion of H), could improve later */ + Allocate_Matrix( &workspace->L, Hptr->n, Hptr->m ); + Allocate_Matrix( &workspace->U, Hptr->n, Hptr->m ); } else if ( workspace->L->m < Hptr->m ) { @@ -1198,8 +1057,8 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, Deallocate_Matrix( workspace->U ); /* factors have sparsity pattern as H */ - Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ); - Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ); + Allocate_Matrix( &workspace->L, Hptr->n, Hptr->m ); + Allocate_Matrix( &workspace->U, Hptr->n, Hptr->m ); } break; @@ -1210,14 +1069,15 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, break; default: - fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method (%d). Terminating...\n", + control->cm_solver_pre_comp_type ); exit( INVALID_INPUT ); break; } - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILU_PAR_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; @@ -1288,8 +1148,8 @@ static void QEq( reax_system * const system, control_params * const control, { int iters; - if ( control->cm_solver_pre_comp_refactor > 0 && - ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) + if ( control->cm_solver_pre_comp_refactor > 0 + && ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) { Setup_Preconditioner_QEq( system, control, data, workspace ); @@ -1344,7 +1204,8 @@ static void QEq( reax_system * const system, control_params * const control, break; default: - fprintf( stderr, "Unrecognized QEq solver selection. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized solver selection (%d). Terminating...\n", + control->cm_solver_type ); exit( INVALID_INPUT ); break; } @@ -1379,8 +1240,8 @@ static void EE( reax_system * const system, control_params * const control, { int iters; - if ( control->cm_solver_pre_comp_refactor > 0 && - ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) + if ( control->cm_solver_pre_comp_refactor > 0 + && ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) { Setup_Preconditioner_EE( system, control, data, workspace ); @@ -1424,7 +1285,8 @@ static void EE( reax_system * const system, control_params * const control, break; default: - fprintf( stderr, "Unrecognized EE solver selection. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized solver selection (%d). Terminating...\n", + control->cm_solver_type ); exit( INVALID_INPUT ); break; } @@ -1451,8 +1313,8 @@ static void ACKS2( reax_system * const system, control_params * const control, { int iters; - if ( control->cm_solver_pre_comp_refactor > 0 && - ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) + if ( control->cm_solver_pre_comp_refactor > 0 + && ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) { Setup_Preconditioner_ACKS2( system, control, data, workspace ); @@ -1513,7 +1375,8 @@ static void ACKS2( reax_system * const system, control_params * const control, break; default: - fprintf( stderr, "Unrecognized ACKS2 solver selection. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized solver selection (%d). Terminating...\n", + control->cm_solver_type ); exit( INVALID_INPUT ); break; } diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index 3cdc7c30..d019bba8 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -46,10 +46,6 @@ static void Generate_Initial_Velocities( reax_system *system, real T ) { rvec_MakeZero( system->atoms[i].v ); } - -#if defined(DEBUG) - fprintf( stderr, "no random velocities...\n" ); -#endif } else { @@ -87,32 +83,33 @@ static void Init_System( reax_system *system, control_params *control, Compute_Total_Mass( system, data ); Compute_Center_of_Mass( system, data, stderr ); - /* reposition atoms */ - // just fit the atoms to the periodic box + /* just fit the atoms to the periodic box */ if ( control->reposition_atoms == 0 ) { rvec_MakeZero( dx ); } - // put the center of mass to the center of the box + /* put the center of mass to the center of the box */ else if ( control->reposition_atoms == 1 ) { rvec_Scale( dx, 0.5, system->box.box_norms ); rvec_ScaledAdd( dx, -1., data->xcm ); } - // put the center of mass to the origin + /* put the center of mass to the origin */ else if ( control->reposition_atoms == 2 ) { rvec_Scale( dx, -1., data->xcm ); } else { - fprintf( stderr, "UNKNOWN OPTION: reposition_atoms. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unknown option for reposition_atoms (%d). Terminating...\n", + control->reposition_atoms ); exit( UNKNOWN_OPTION ); } for ( i = 0; i < system->N; ++i ) { Inc_on_T3( system->atoms[i].x, dx, &(system->box) ); + /*fprintf( stderr, "%6d%2d%8.3f%8.3f%8.3f\n", i, system->atoms[i].type, system->atoms[i].x[0], system->atoms[i].x[1], system->atoms[i].x[2] );*/ @@ -148,17 +145,23 @@ static void Init_Simulation_Data( reax_system *system, control_params *control, *Evolve = Velocity_Verlet_NVE; break; + case bNVT: + data->N_f = 3 * system->N + 1; + *Evolve = Velocity_Verlet_Berendsen_NVT; + break; case nhNVT: data->N_f = 3 * system->N + 1; //control->Tau_T = 100 * data->N_f * K_B * control->T_final; + if ( !control->restart || (control->restart && control->random_vel) ) { - data->therm.G_xi = control->Tau_T * (2.0 * data->E_Kin - - data->N_f * K_B * control->T ); + data->therm.G_xi = control->Tau_T * (2.0 * data->E_Kin + - data->N_f * K_B * control->T ); data->therm.v_xi = data->therm.G_xi * control->dt; data->therm.v_xi_old = 0; data->therm.xi = 0; + #if defined(DEBUG_FOCUS) fprintf( stderr, "init_md: G_xi=%f Tau_T=%f E_kin=%f N_f=%f v_xi=%f\n", data->therm.G_xi, control->Tau_T, data->E_Kin, @@ -170,10 +173,12 @@ static void Init_Simulation_Data( reax_system *system, control_params *control, break; /* anisotropic NPT */ - case NPT: - fprintf( stderr, "THIS OPTION IS NOT YET IMPLEMENTED! TERMINATING...\n" ); + case aNPT: + fprintf( stderr, "[ERROR] THIS OPTION IS NOT YET IMPLEMENTED! TERMINATING...\n" ); exit( UNKNOWN_OPTION ); + data->N_f = 3 * system->N + 9; + if ( !control->restart ) { data->therm.G_xi = control->Tau_T * (2.0 * data->E_Kin - @@ -183,13 +188,14 @@ static void Init_Simulation_Data( reax_system *system, control_params *control, //data->inv_W = 1. / (data->N_f*K_B*control->T*SQR(control->Tau_P)); //Compute_Pressure( system, data, workspace ); } + *Evolve = Velocity_Verlet_Berendsen_Isotropic_NPT; break; /* semi-isotropic NPT */ case sNPT: data->N_f = 3 * system->N + 4; - *Evolve = Velocity_Verlet_Berendsen_SemiIsotropic_NPT; + *Evolve = Velocity_Verlet_Berendsen_Semi_Isotropic_NPT; break; /* isotropic NPT */ @@ -198,12 +204,9 @@ static void Init_Simulation_Data( reax_system *system, control_params *control, *Evolve = Velocity_Verlet_Berendsen_Isotropic_NPT; break; - case bNVT: - data->N_f = 3 * system->N + 1; - *Evolve = Velocity_Verlet_Berendsen_NVT; - break; - default: + fprintf( stderr, "[ERROR] Unknown ensemble type (%d). Terminating...\n", control->ensemble ); + exit( UNKNOWN_OPTION ); break; } @@ -240,17 +243,17 @@ static void Init_Taper( control_params *control, static_storage *workspace ) if ( FABS( swa ) > 0.01 ) { - fprintf( stderr, "Warning: non-zero value for lower Taper-radius cutoff\n" ); + fprintf( stderr, "[WARNING] non-zero value for lower Taper-radius cutoff (%f)\n", swa ); } if ( swb < 0.0 ) { - fprintf( stderr, "Negative value for upper Taper-radius cutoff\n" ); + fprintf( stderr, "[ERROR] Negative value for upper Taper-radius cutoff\n" ); exit( INVALID_INPUT ); } else if ( swb < 5.0 ) { - fprintf( stderr, "Warning: low value for upper Taper-radius cutoff:%f\n", swb ); + fprintf( stderr, "[WARNING] Low value for upper Taper-radius cutoff (%f)\n", swb ); } d1 = swb - swa; @@ -277,43 +280,43 @@ static void Init_Workspace( reax_system *system, control_params *control, { int i; - /* Allocate space for hydrogen bond list */ - workspace->hbond_index = (int *) smalloc( system->N * sizeof( int ), + /* hydrogen bond list */ + workspace->hbond_index = smalloc( system->N * sizeof( int ), "Init_Workspace::workspace->hbond_index" ); /* bond order related storage */ - workspace->total_bond_order = (real *) smalloc( system->N * sizeof( real ), + workspace->total_bond_order = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->bond_order" ); - workspace->Deltap = (real *) smalloc( system->N * sizeof( real ), + workspace->Deltap = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->Deltap" ); - workspace->Deltap_boc = (real *) smalloc( system->N * sizeof( real ), + workspace->Deltap_boc = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->Deltap_boc" ); - workspace->dDeltap_self = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->dDeltap_self = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->dDeltap_self" ); - workspace->Delta = (real *) smalloc( system->N * sizeof( real ), + workspace->Delta = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->Delta" ); - workspace->Delta_lp = (real *) smalloc( system->N * sizeof( real ), + workspace->Delta_lp = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->Delta_lp" ); - workspace->Delta_lp_temp = (real *) smalloc( system->N * sizeof( real ), + workspace->Delta_lp_temp = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->Delta_lp_temp" ); - workspace->dDelta_lp = (real *) smalloc( system->N * sizeof( real ), + workspace->dDelta_lp = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->dDelta_lp" ); - workspace->dDelta_lp_temp = (real *) smalloc( system->N * sizeof( real ), + workspace->dDelta_lp_temp = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->dDelta_lp_temp" ); - workspace->Delta_e = (real *) smalloc( system->N * sizeof( real ), + workspace->Delta_e = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->Delta_e" ); - workspace->Delta_boc = (real *) smalloc( system->N * sizeof( real ), + workspace->Delta_boc = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->Delta_boc" ); - workspace->nlp = (real *) smalloc( system->N * sizeof( real ), + workspace->nlp = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->nlp" ); - workspace->nlp_temp = (real *) smalloc( system->N * sizeof( real ), + workspace->nlp_temp = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->nlp_temp" ); - workspace->Clp = (real *) smalloc( system->N * sizeof( real ), + workspace->Clp = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->Clp" ); - workspace->CdDelta = (real *) smalloc( system->N * sizeof( real ), + workspace->CdDelta = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->CdDelta" ); - workspace->vlpex = (real *) smalloc( system->N * sizeof( real ), + workspace->vlpex = smalloc( system->N * sizeof( real ), "Init_Workspace::workspace->vlpex" ); /* charge method storage */ @@ -329,7 +332,7 @@ static void Init_Workspace( reax_system *system, control_params *control, system->N_cm = 2 * system->N + 2; break; default: - fprintf( stderr, "Unknown charge method type. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unknown charge method type. Terminating...\n" ); exit( INVALID_INPUT ); break; } @@ -344,36 +347,37 @@ static void Init_Workspace( reax_system *system, control_params *control, workspace->L = NULL; workspace->U = NULL; workspace->Hdia_inv = NULL; - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { - workspace->droptol = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->droptol = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->droptol" ); } //TODO: check if unused - //workspace->w = (real *) scalloc( cm_lin_sys_size, sizeof( real ), + //workspace->w = scalloc( cm_lin_sys_size, sizeof( real ), //"Init_Workspace::workspace->droptol" ); //TODO: check if unused - workspace->b = (real *) scalloc( system->N_cm * 2, sizeof( real ), + workspace->b = scalloc( system->N_cm * 2, sizeof( real ), "Init_Workspace::workspace->b" ); - workspace->b_s = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->b_s = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->b_s" ); - workspace->b_t = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->b_t = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->b_t" ); - workspace->b_prc = (real *) scalloc( system->N_cm * 2, sizeof( real ), + workspace->b_prc = scalloc( system->N_cm * 2, sizeof( real ), "Init_Workspace::workspace->b_prc" ); - workspace->b_prm = (real *) scalloc( system->N_cm * 2, sizeof( real ), + workspace->b_prm = scalloc( system->N_cm * 2, sizeof( real ), "Init_Workspace::workspace->b_prm" ); - workspace->s = (real**) scalloc( 5, sizeof( real* ), + workspace->s = scalloc( 5, sizeof( real* ), "Init_Workspace::workspace->s" ); - workspace->t = (real**) scalloc( 5, sizeof( real* ), + workspace->t = scalloc( 5, sizeof( real* ), "Init_Workspace::workspace->t" ); for ( i = 0; i < 5; ++i ) { - workspace->s[i] = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->s[i] = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->s[i]" ); - workspace->t[i] = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->t[i] = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->t[i]" ); } @@ -426,7 +430,7 @@ static void Init_Workspace( reax_system *system, control_params *control, break; default: - fprintf( stderr, "Unknown charge method type. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unknown charge method type. Terminating...\n" ); exit( INVALID_INPUT ); break; } @@ -435,89 +439,89 @@ static void Init_Workspace( reax_system *system, control_params *control, { case GMRES_S: case GMRES_H_S: - workspace->y = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + workspace->y = scalloc( control->cm_solver_restart + 1, sizeof( real ), "Init_Workspace::workspace->y" ); - workspace->z = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + workspace->z = scalloc( control->cm_solver_restart + 1, sizeof( real ), "Init_Workspace::workspace->z" ); - workspace->g = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + workspace->g = scalloc( control->cm_solver_restart + 1, sizeof( real ), "Init_Workspace::workspace->g" ); - workspace->h = (real **) scalloc( control->cm_solver_restart + 1, sizeof( real*), + workspace->h = scalloc( control->cm_solver_restart + 1, sizeof( real*), "Init_Workspace::workspace->h" ); - workspace->hs = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + workspace->hs = scalloc( control->cm_solver_restart + 1, sizeof( real ), "Init_Workspace::workspace->hs" ); - workspace->hc = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + workspace->hc = scalloc( control->cm_solver_restart + 1, sizeof( real ), "Init_Workspace::workspace->hc" ); - workspace->rn = (real **) scalloc( control->cm_solver_restart + 1, sizeof( real*), + workspace->rn = scalloc( control->cm_solver_restart + 1, sizeof( real*), "Init_Workspace::workspace->rn" ); - workspace->v = (real **) scalloc( control->cm_solver_restart + 1, sizeof( real*), + workspace->v = scalloc( control->cm_solver_restart + 1, sizeof( real*), "Init_Workspace::workspace->v" ); for ( i = 0; i < control->cm_solver_restart + 1; ++i ) { - workspace->h[i] = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + workspace->h[i] = scalloc( control->cm_solver_restart + 1, sizeof( real ), "Init_Workspace::workspace->h[i]" ); - workspace->rn[i] = (real *) scalloc( system->N_cm * 2, sizeof( real ), + workspace->rn[i] = scalloc( system->N_cm * 2, sizeof( real ), "Init_Workspace::workspace->rn[i]" ); - workspace->v[i] = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->v[i] = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->v[i]" ); } - workspace->r = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->r = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->r" ); - workspace->d = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->d = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->d" ); - workspace->q = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->q = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->q" ); - workspace->p = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->p = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->p" ); break; case CG_S: - workspace->r = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->r = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->r" ); - workspace->d = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->d = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->d" ); - workspace->q = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->q = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->q" ); - workspace->p = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->p = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->p" ); break; case SDM_S: - workspace->r = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->r = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->r" ); - workspace->d = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->d = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->d" ); - workspace->q = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->q = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->q" ); break; case BiCGStab_S: - workspace->r = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->r = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->r" ); - workspace->r_hat = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->r_hat = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->r_hat" ); - workspace->d = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->d = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->d" ); - workspace->q = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->q = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->q" ); - workspace->p = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->p = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->p" ); - workspace->y = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->y = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->y" ); - workspace->z = (real *) scalloc( system->N_cm, sizeof( real ), + workspace->z = scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->z" ); break; default: - fprintf( stderr, "Unknown charge method linear solver type. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unknown charge method linear solver type. Terminating...\n" ); exit( INVALID_INPUT ); break; } /* SpMV related */ #ifdef _OPENMP - workspace->b_local = (real*) smalloc( control->num_threads * system->N_cm * sizeof(real), + workspace->b_local = smalloc( control->num_threads * system->N_cm * sizeof(real), "Init_Workspace::b_local" ); #endif @@ -527,19 +531,19 @@ static void Init_Workspace( reax_system *system, control_params *control, if ( control->cm_solver_pre_app_type == TRI_SOLVE_LEVEL_SCHED_PA || control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) { - workspace->row_levels_L = (unsigned int*) smalloc( system->N_cm * sizeof(unsigned int), + workspace->row_levels_L = smalloc( system->N_cm * sizeof(unsigned int), "Init_Workspace::row_levels_L" ); - workspace->level_rows_L = (unsigned int*) smalloc( system->N_cm * sizeof(unsigned int), + workspace->level_rows_L = smalloc( system->N_cm * sizeof(unsigned int), "Init_Workspace::level_rows_L" ); - workspace->level_rows_cnt_L = (unsigned int*) smalloc( (system->N_cm + 1) * sizeof(unsigned int), + workspace->level_rows_cnt_L = smalloc( (system->N_cm + 1) * sizeof(unsigned int), "Init_Workspace::level_rows_cnt_L" ); - workspace->row_levels_U = (unsigned int*) smalloc( system->N_cm * sizeof(unsigned int), + workspace->row_levels_U = smalloc( system->N_cm * sizeof(unsigned int), "Init_Workspace::row_levels_U" ); - workspace->level_rows_U = (unsigned int*) smalloc( system->N_cm * sizeof(unsigned int), + workspace->level_rows_U = smalloc( system->N_cm * sizeof(unsigned int), "Init_Workspace::level_rows_U" ); - workspace->level_rows_cnt_U = (unsigned int*) smalloc( (system->N_cm + 1) * sizeof(unsigned int), + workspace->level_rows_cnt_U = smalloc( (system->N_cm + 1) * sizeof(unsigned int), "Init_Workspace::level_rows_cnt_U" ); - workspace->top = (unsigned int*) smalloc( (system->N_cm + 1) * sizeof(unsigned int), + workspace->top = smalloc( (system->N_cm + 1) * sizeof(unsigned int), "Init_Workspace::top" ); } else @@ -557,26 +561,24 @@ static void Init_Workspace( reax_system *system, control_params *control, workspace->recolor_cnt = 0; if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) { - workspace->color = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + workspace->color = smalloc( sizeof(unsigned int) * system->N_cm, "Init_Workspace::color" ); - workspace->to_color = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + workspace->to_color = smalloc( sizeof(unsigned int) * system->N_cm, "Init_Workspace::to_color" ); - workspace->conflict = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + workspace->conflict = smalloc( sizeof(unsigned int) * system->N_cm, "setup_graph_coloring::conflict" ); - workspace->conflict_cnt = (unsigned int*) smalloc( sizeof(unsigned int) * (control->num_threads + 1), + workspace->conflict_cnt = smalloc( sizeof(unsigned int) * (control->num_threads + 1), "Init_Workspace::conflict_cnt" ); - workspace->recolor = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + workspace->recolor = smalloc( sizeof(unsigned int) * system->N_cm, "Init_Workspace::recolor" ); - workspace->color_top = (unsigned int*) smalloc( sizeof(unsigned int) * (system->N_cm + 1), + workspace->color_top = smalloc( sizeof(unsigned int) * (system->N_cm + 1), "Init_Workspace::color_top" ); - workspace->permuted_row_col = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + workspace->permuted_row_col = smalloc( sizeof(unsigned int) * system->N_cm, "Init_Workspace::premuted_row_col" ); - workspace->permuted_row_col_inv = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + workspace->permuted_row_col_inv = smalloc( sizeof(unsigned int) * system->N_cm, "Init_Workspace::premuted_row_col_inv" ); - workspace->y_p = (real*) smalloc( sizeof(real) * system->N_cm, - "Init_Workspace::y_p" ); - workspace->x_p = (real*) smalloc( sizeof(real) * system->N_cm, - "Init_Workspace::x_p" ); + workspace->y_p = smalloc( sizeof(real) * system->N_cm, "Init_Workspace::y_p" ); + workspace->x_p = smalloc( sizeof(real) * system->N_cm, "Init_Workspace::x_p" ); } else { @@ -595,15 +597,15 @@ static void Init_Workspace( reax_system *system, control_params *control, /* Jacobi iteration related */ if ( control->cm_solver_pre_app_type == JACOBI_ITER_PA ) { - workspace->Dinv_L = (real*) smalloc( sizeof(real) * system->N_cm, + workspace->Dinv_L = smalloc( sizeof(real) * system->N_cm, "Init_Workspace::Dinv_L" ); - workspace->Dinv_U = (real*) smalloc( sizeof(real) * system->N_cm, + workspace->Dinv_U = smalloc( sizeof(real) * system->N_cm, "Init_Workspace::Dinv_U" ); - workspace->Dinv_b = (real*) smalloc( sizeof(real) * system->N_cm, + workspace->Dinv_b = smalloc( sizeof(real) * system->N_cm, "Init_Workspace::Dinv_b" ); - workspace->rp = (real*) smalloc( sizeof(real) * system->N_cm, + workspace->rp = smalloc( sizeof(real) * system->N_cm, "Init_Workspace::rp" ); - workspace->rp2 = (real*) smalloc( sizeof(real) * system->N_cm, + workspace->rp2 = smalloc( sizeof(real) * system->N_cm, "Init_Workspace::rp2" ); } else @@ -616,24 +618,24 @@ static void Init_Workspace( reax_system *system, control_params *control, } /* integrator storage */ - workspace->a = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->a = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->a" ); - workspace->f_old = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_old = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_old" ); - workspace->v_const = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->v_const = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->v_const" ); #ifdef _OPENMP - workspace->f_local = (rvec *) smalloc( control->num_threads * system->N * sizeof( rvec ), + workspace->f_local = smalloc( control->num_threads * system->N * sizeof( rvec ), "Init_Workspace::workspace->f_local" ); #endif /* storage for analysis */ if ( control->molec_anal || control->diffusion_coef ) { - workspace->mark = (int *) scalloc( system->N, sizeof(int), + workspace->mark = scalloc( system->N, sizeof(int), "Init_Workspace::workspace->mark" ); - workspace->old_mark = (int *) scalloc( system->N, sizeof(int), + workspace->old_mark = scalloc( system->N, sizeof(int), "Init_Workspace::workspace->old_mark" ); } else @@ -643,7 +645,7 @@ static void Init_Workspace( reax_system *system, control_params *control, if ( control->diffusion_coef ) { - workspace->x_old = (rvec *) scalloc( system->N, sizeof( rvec ), + workspace->x_old = scalloc( system->N, sizeof( rvec ), "Init_Workspace::workspace->x_old" ); } else @@ -652,33 +654,33 @@ static void Init_Workspace( reax_system *system, control_params *control, } #ifdef TEST_FORCES - workspace->dDelta = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->dDelta = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->dDelta" ); - workspace->f_ele = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_ele = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_ele" ); - workspace->f_vdw = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_vdw = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_vdw" ); - workspace->f_bo = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_bo = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_bo" ); - workspace->f_be = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_be = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_be" ); - workspace->f_lp = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_lp = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_lp" ); - workspace->f_ov = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_ov = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_ov" ); - workspace->f_un = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_un = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_un" ); - workspace->f_ang = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_ang = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_ang" ); - workspace->f_coa = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_coa = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_coa" ); - workspace->f_pen = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_pen = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_pen" ); - workspace->f_hb = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_hb = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_hb" ); - workspace->f_tor = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_tor = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_tor" ); - workspace->f_con = (rvec *) smalloc( system->N * sizeof( rvec ), + workspace->f_con = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->f_con" ); #endif @@ -877,9 +879,8 @@ static void Init_Out_Controls( reax_system *system, control_params *control, } /* Init pressure file */ - if ( control->ensemble == NPT || - control->ensemble == iNPT || - control->ensemble == sNPT ) + if ( control->ensemble == aNPT || control->ensemble == iNPT + || control->ensemble == sNPT ) { strncpy( temp, control->sim_name, MAX_STR ); strcat( temp, ".prs" ); @@ -1061,17 +1062,6 @@ static void Init_Out_Controls( reax_system *system, control_params *control, out_control->ftot2 = sfopen( temp, "w" ); #endif - - /* Error handling */ - /* if ( out_control->out == NULL || out_control->pot == NULL || - out_control->log == NULL || out_control->mol == NULL || - out_control->dpl == NULL || out_control->drft == NULL || - out_control->pdb == NULL ) - { - fprintf( stderr, "FILE OPEN ERROR. TERMINATING..." ); - exit( CANNOT_OPEN_FILE ); - }*/ - #undef TEMP_SIZE } @@ -1211,9 +1201,9 @@ static void Finalize_Workspace( reax_system *system, control_params *control, Deallocate_Matrix( workspace->H ); Deallocate_Matrix( workspace->H_sp ); - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILU_PAR_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { Deallocate_Matrix( workspace->L ); Deallocate_Matrix( workspace->U ); @@ -1237,12 +1227,13 @@ static void Finalize_Workspace( reax_system *system, control_params *control, sfree( workspace->t[i], "Finalize_Workspace::workspace->t[i]" ); } - if ( control->cm_solver_pre_comp_type == DIAG_PC ) + if ( control->cm_solver_pre_comp_type == JACOBI_PC ) { sfree( workspace->Hdia_inv, "Finalize_Workspace::workspace->Hdia_inv" ); } - if ( control->cm_solver_pre_comp_type == ICHOLT_PC || - control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + if ( control->cm_solver_pre_comp_type == ICHOLT_PC + || control->cm_solver_pre_comp_type == ILUT_PC + || control->cm_solver_pre_comp_type == FG_ILUT_PC ) { sfree( workspace->droptol, "Finalize_Workspace::workspace->droptol" ); } @@ -1307,7 +1298,7 @@ static void Finalize_Workspace( reax_system *system, control_params *control, break; default: - fprintf( stderr, "Unknown charge method linear solver type. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unknown charge method linear solver type. Terminating...\n" ); exit( INVALID_INPUT ); break; } @@ -1430,7 +1421,6 @@ static void Finalize_Lists( control_params *control, reax_list **lists ) static void Finalize_Out_Controls( reax_system *system, control_params *control, static_storage *workspace, output_controls *out_control ) { - /* close trajectory file */ if ( out_control->write_steps > 0 ) { sfclose( out_control->trj, "Finalize_Out_Controls::out_control->trj" ); @@ -1438,27 +1428,19 @@ static void Finalize_Out_Controls( reax_system *system, control_params *control, if ( out_control->energy_update_freq > 0 ) { - /* close out file */ sfclose( out_control->out, "Finalize_Out_Controls::out_control->out" ); - - /* close potentials file */ sfclose( out_control->pot, "Finalize_Out_Controls::out_control->pot" ); - - /* close log file */ sfclose( out_control->log, "Finalize_Out_Controls::out_control->log" ); } - /* close pressure file */ - if ( control->ensemble == NPT || - control->ensemble == iNPT || - control->ensemble == sNPT ) + if ( control->ensemble == aNPT || control->ensemble == iNPT + || control->ensemble == sNPT ) { sfclose( out_control->prs, "Finalize_Out_Controls::out_control->prs" ); } if ( control->molec_anal ) { - /* close molecular analysis file */ sfclose( out_control->mol, "Finalize_Out_Controls::out_control->mol" ); if ( control->num_ignored ) @@ -1467,13 +1449,11 @@ static void Finalize_Out_Controls( reax_system *system, control_params *control, } } - /* close electric dipole moment analysis file */ if ( control->dipole_anal ) { sfclose( out_control->dpl, "Finalize_Out_Controls::out_control->dpl" ); } - /* close diffusion coef analysis file */ if ( control->diffusion_coef ) { sfclose( out_control->drft, "Finalize_Out_Controls::out_control->drft" ); @@ -1481,75 +1461,31 @@ static void Finalize_Out_Controls( reax_system *system, control_params *control, #ifdef TEST_ENERGY - /* close bond energy file */ sfclose( out_control->ebond, "Finalize_Out_Controls::out_control->ebond" ); - - /* close lone-pair energy file */ sfclose( out_control->elp, "Finalize_Out_Controls::out_control->elp" ); - - /* close overcoordination energy file */ sfclose( out_control->eov, "Finalize_Out_Controls::out_control->eov" ); - - /* close undercoordination energy file */ sfclose( out_control->eun, "Finalize_Out_Controls::out_control->eun" ); - - /* close angle energy file */ sfclose( out_control->eval, "Finalize_Out_Controls::out_control->eval" ); - - /* close penalty energy file */ sfclose( out_control->epen, "Finalize_Out_Controls::out_control->epen" ); - - /* close coalition energy file */ sfclose( out_control->ecoa, "Finalize_Out_Controls::out_control->ecoa" ); - - /* close hydrogen bond energy file */ sfclose( out_control->ehb, "Finalize_Out_Controls::out_control->ehb" ); - - /* close torsion energy file */ sfclose( out_control->etor, "Finalize_Out_Controls::out_control->etor" ); - - /* close conjugation energy file */ sfclose( out_control->econ, "Finalize_Out_Controls::out_control->econ" ); - - /* close vdWaals energy file */ sfclose( out_control->evdw, "Finalize_Out_Controls::out_control->evdw" ); - - /* close coulomb energy file */ sfclose( out_control->ecou, "Finalize_Out_Controls::out_control->ecou" ); #endif #ifdef TEST_FORCES - /* close bond orders file */ sfclose( out_control->fbo, "Finalize_Out_Controls::out_control->fbo" ); - - /* close bond orders derivatives file */ sfclose( out_control->fdbo, "Finalize_Out_Controls::out_control->fdbo" ); - - /* close bond forces file */ sfclose( out_control->fbond, "Finalize_Out_Controls::out_control->fbond" ); - - /* close lone-pair forces file */ sfclose( out_control->flp, "Finalize_Out_Controls::out_control->flp" ); - - /* close overcoordination forces file */ sfclose( out_control->fatom, "Finalize_Out_Controls::out_control->fatom" ); - - /* close angle forces file */ sfclose( out_control->f3body, "Finalize_Out_Controls::out_control->f3body" ); - - /* close hydrogen bond forces file */ sfclose( out_control->fhb, "Finalize_Out_Controls::out_control->fhb" ); - - /* close torsion forces file */ sfclose( out_control->f4body, "Finalize_Out_Controls::out_control->f4body" ); - - /* close nonbonded forces file */ sfclose( out_control->fnonb, "Finalize_Out_Controls::out_control->fnonb" ); - - /* close total force file */ sfclose( out_control->ftot, "Finalize_Out_Controls::out_control->ftot" ); - - /* close coulomb forces file */ sfclose( out_control->ftot2, "Finalize_Out_Controls::out_control->ftot2" ); #endif } diff --git a/sPuReMD/src/integrate.c b/sPuReMD/src/integrate.c index 2e5c06b2..5698379c 100644 --- a/sPuReMD/src/integrate.c +++ b/sPuReMD/src/integrate.c @@ -70,11 +70,13 @@ void Velocity_Verlet_NVE(reax_system *system, control_params *control, Reallocate( system, control, workspace, lists, renbr ); Reset( system, control, data, workspace, lists ); + if ( renbr ) { Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); } + Compute_Forces( system, control, data, workspace, lists, out_control ); for ( i = 0; i < system->N; i++ ) @@ -90,6 +92,104 @@ void Velocity_Verlet_NVE(reax_system *system, control_params *control, } +/* Uses Berendsen-type coupling for both T and P. + * All box dimensions are scaled by the same amount, + * there is no change in the angles between axes. */ +void Velocity_Verlet_Berendsen_NVT( reax_system* system, + control_params* control, simulation_data *data, + static_storage *workspace, reax_list **lists, + output_controls *out_control ) +{ + int i, steps, renbr; + real inv_m, dt, lambda; + rvec dx; + reax_atom *atom; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "step%d\n", data->step ); +#endif + + dt = control->dt; + steps = data->step - data->prev_steps; + renbr = (steps % control->reneighbor == 0); + + /* velocity verlet, 1st part */ + for ( i = 0; i < system->N; i++ ) + { + atom = &system->atoms[i]; + inv_m = 1.0 / system->reaxprm.sbp[atom->type].mass; + /* Compute x(t + dt) */ + rvec_ScaledSum( dx, dt, atom->v, -0.5 * F_CONV * inv_m * SQR(dt), atom->f ); + + /* bNVT fix - Metin's suggestion */ + /* ORIGINAL CHANGE -- CHECK THE branch serial-bnvt for the fix */ + //rvec_Add( atom->x, dx ); + Inc_on_T3( atom->x, dx, &system->box ); + + /* Compute v(t + dt/2) */ + rvec_ScaledAdd( atom->v, -0.5 * F_CONV * inv_m * dt, atom->f ); + } + +#if defined(DEBUG_FOCUS) + fprintf(stderr, "step%d: verlet1 done\n", data->step); +#endif + + Reallocate( system, control, workspace, lists, renbr ); + Reset( system, control, data, workspace, lists ); + + if ( renbr ) + { + Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); + } + + Compute_Forces( system, control, data, workspace, + lists, out_control ); + + /* velocity verlet, 2nd part */ + for ( i = 0; i < system->N; i++ ) + { + atom = &system->atoms[i]; + inv_m = 1.0 / system->reaxprm.sbp[atom->type].mass; + /* Compute v(t + dt) */ + rvec_ScaledAdd( atom->v, -0.5 * dt * F_CONV * inv_m, atom->f ); + } + +#if defined(DEBUG_FOCUS) + fprintf(stderr, "step%d: verlet2 done\n", data->step); +#endif + + /* temperature scaler */ + Compute_Kinetic_Energy( system, data ); + lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0); + if ( lambda < MIN_dT ) + { + lambda = MIN_dT; + } + else if (lambda > MAX_dT ) + { + lambda = MAX_dT; + } + lambda = SQRT( lambda ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "step:%d lambda -> %f \n", data->step, lambda ); +#endif + + /* Scale velocities and positions at t+dt */ + for ( i = 0; i < system->N; ++i ) + { + atom = &system->atoms[i]; + rvec_Scale( atom->v, lambda, atom->v ); + } + Compute_Kinetic_Energy( system, data ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "step%d: scaled velocities\n", + data->step ); +#endif +} + + void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* control, simulation_data *data, static_storage *workspace, reax_list **lists, output_controls *out_control ) @@ -102,7 +202,7 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* dt = control->dt; dt_sqr = SQR( dt ); - therm = &( data->therm ); + therm = &data->therm; steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); @@ -131,11 +231,13 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* Reallocate( system, control, workspace, lists, renbr ); Reset( system, control, data, workspace, lists ); + if ( renbr ) { Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); } + /* Calculate Forces at time (t + dt) */ Compute_Forces( system, control, data, workspace, lists, out_control ); @@ -150,6 +252,7 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* -0.5 * dt * inv_m * F_CONV, workspace->f_old[i] ); rvec_ScaledAdd( workspace->v_const[i], -0.5 * dt * inv_m * F_CONV, system->atoms[i].f ); + #if defined(DEBUG) fprintf( stderr, "atom%d: inv_m=%f, C1=%f, C2=%f, v_const=%f %f %f\n", i, inv_m, 1.0 - 0.5 * dt * therm->v_xi, @@ -164,26 +267,28 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* do { itr++; - /* new values become old in this iteration */ v_xi_old = v_xi_new; coef_v = 1.0 / (1.0 + 0.5 * dt * v_xi_old); E_kin_new = 0; + for ( i = 0; i < system->N; ++i ) { rvec_Scale( system->atoms[i].v, coef_v, workspace->v_const[i] ); - E_kin_new += ( 0.5 * system->reaxprm.sbp[system->atoms[i].type].mass * - rvec_Dot( system->atoms[i].v, system->atoms[i].v ) ); + E_kin_new += ( 0.5 * system->reaxprm.sbp[system->atoms[i].type].mass + * rvec_Dot( system->atoms[i].v, system->atoms[i].v ) ); + #if defined(DEBUG) fprintf( stderr, "itr%d-atom%d: coef_v = %f, v_xi_old = %f\n", itr, i, coef_v, v_xi_old ); #endif } - G_xi_new = control->Tau_T * ( 2.0 * E_kin_new - - data->N_f * K_B * control->T ); + G_xi_new = control->Tau_T * ( 2.0 * E_kin_new + - data->N_f * K_B * control->T ); v_xi_new = therm->v_xi + 0.5 * dt * ( therm->G_xi + G_xi_new ); + #if defined(DEBUG) fprintf( stderr, "itr%d: G_xi_new = %f, v_xi_new = %f, v_xi_old = %f\n", itr, G_xi_new, v_xi_new, v_xi_old ); @@ -194,6 +299,7 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* therm->v_xi_old = therm->v_xi; therm->v_xi = v_xi_new; therm->G_xi = G_xi_new; + #if defined(DEBUG_FOCUS) fprintf( stderr, "vel scale\n" ); #endif @@ -201,8 +307,8 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* /* uses Berendsen-type coupling for both T and P. - All box dimensions are scaled by the same amount, - there is no change in the angles between axes. */ + * All box dimensions are scaled by the same amount, + * there is no change in the angles between axes. */ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, control_params* control, simulation_data *data, static_storage *workspace, reax_list **lists, output_controls *out_control ) @@ -227,18 +333,21 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, rvec_ScaledAdd( system->atoms[i].v, -0.5 * F_CONV * inv_m * dt, system->atoms[i].f ); } + #if defined(DEBUG_FOCUS) fprintf( stderr, "verlet1 - " ); #endif Reallocate( system, control, workspace, lists, renbr ); Reset( system, control, data, workspace, lists ); + if ( renbr ) { Update_Grid( system ); Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); } + Compute_Forces( system, control, data, workspace, lists, out_control ); /* velocity verlet, 2nd part */ @@ -249,19 +358,25 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, rvec_ScaledAdd( system->atoms[i].v, -0.5 * dt * F_CONV * inv_m, system->atoms[i].f ); } + Compute_Kinetic_Energy( system, data ); Compute_Pressure_Isotropic( system, control, data, out_control ); + #if defined(DEBUG_FOCUS) fprintf( stderr, "verlet2 - " ); #endif /* pressure scaler */ - mu = POW( 1.0 + (dt / control->Tau_P[0]) * (data->iso_bar.P - control->P[0]), - 1.0 / 3 ); + mu = POW( 1.0 + (dt / control->Tau_P[0]) + * (data->iso_bar.P - control->P[0]), 1.0 / 3.0 ); if ( mu < MIN_dV ) + { mu = MIN_dV; + } else if ( mu > MAX_dV ) + { mu = MAX_dV; + } /* temperature scaler */ lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0); @@ -286,12 +401,15 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, are being scaled with mu! We need to discuss this with Adri! */ rvec_Scale( system->atoms[i].x, mu, system->atoms[i].x ); } + Compute_Kinetic_Energy( system, data ); + #if defined(DEBUG_FOCUS) fprintf( stderr, "scaling - " ); #endif Update_Box_Isotropic( &(system->box), mu ); + #if defined(DEBUG_FOCUS) fprintf( stderr, "updated box\n" ); #endif @@ -299,9 +417,9 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, /* uses Berendsen-type coupling for both T and P. - All box dimensions are scaled by the same amount, - there is no change in the angles between axes. */ -void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system, + * All box dimensions are scaled by the same amount, + * there is no change in the angles between axes. */ +void Velocity_Verlet_Berendsen_Semi_Isotropic_NPT( reax_system* system, control_params* control, simulation_data *data, static_storage *workspace, reax_list **lists, output_controls *out_control ) { @@ -312,6 +430,7 @@ void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system, dt = control->dt; steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); + #if defined(DEBUG_FOCUS) //fprintf( out_control->prs, // "tau_t: %g tau_p: %g dt/tau_t: %g dt/tau_p: %g\n", @@ -331,12 +450,14 @@ void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system, rvec_ScaledAdd( system->atoms[i].v, -0.5 * F_CONV * inv_m * dt, system->atoms[i].f ); } + #if defined(DEBUG_FOCUS) fprintf( stderr, "verlet1 - " ); #endif Reallocate( system, control, workspace, lists, renbr ); Reset( system, control, data, workspace, lists ); + if ( renbr ) { Update_Grid( system ); @@ -361,47 +482,60 @@ void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system, /* pressure scaler */ for ( d = 0; d < 3; ++d ) { - mu[d] = POW( 1.0 + (dt / control->Tau_P[d]) * (data->tot_press[d] - control->P[d]), - 1.0 / 3 ); + mu[d] = POW( 1.0 + (dt / control->Tau_P[d]) + * (data->tot_press[d] - control->P[d]), 1.0 / 3.0 ); if ( mu[d] < MIN_dV ) + { mu[d] = MIN_dV; + } else if ( mu[d] > MAX_dV ) + { mu[d] = MAX_dV; + } } /* temperature scaler */ lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0); if ( lambda < MIN_dT ) + { lambda = MIN_dT; - else if (lambda > MAX_dT ) + } + else if ( lambda > MAX_dT ) + { lambda = MAX_dT; + } lambda = SQRT( lambda ); /* Scale velocities and positions at t+dt */ for ( i = 0; i < system->N; ++i ) { rvec_Scale( system->atoms[i].v, lambda, system->atoms[i].v ); + /* IMPORTANT: What Adri does with scaling positions first to - unit coordinates and then back to cartesian coordinates essentially - is scaling the coordinates with mu^2. However, this causes unphysical - modifications on the system because box dimensions - are being scaled with mu! We need to discuss this with Adri! */ + * unit coordinates and then back to cartesian coordinates essentially + * is scaling the coordinates with mu^2. However, this causes unphysical + * modifications on the system because box dimensions + * are being scaled with mu! We need to discuss this with Adri! */ for ( d = 0; d < 3; ++d ) + { system->atoms[i].x[d] = system->atoms[i].x[d] * mu[d]; + } } + Compute_Kinetic_Energy( system, data ); + #if defined(DEBUG_FOCUS) fprintf( stderr, "scaling - " ); #endif - Update_Box_SemiIsotropic( &(system->box), mu ); + Update_Box_Semi_Isotropic( &system->box, mu ); + #if defined(DEBUG_FOCUS) fprintf( stderr, "updated box & grid\n" ); #endif } - /************************************************/ /* BELOW FUNCTIONS ARE NOT BEING USED ANYMORE! */ /* */ @@ -644,100 +778,3 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system, therm->G_xi, therm->v_xi, therm->xi); } #endif - - -/* Uses Berendsen-type coupling for both T and P. - * All box dimensions are scaled by the same amount, - * there is no change in the angles between axes. */ -void Velocity_Verlet_Berendsen_NVT( reax_system* system, - control_params* control, simulation_data *data, - static_storage *workspace, reax_list **lists, - output_controls *out_control ) -{ - int i, steps, renbr; - real inv_m, dt, lambda; - rvec dx; - reax_atom *atom; - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "step%d\n", data->step ); -#endif - - dt = control->dt; - steps = data->step - data->prev_steps; - renbr = (steps % control->reneighbor == 0); - - /* velocity verlet, 1st part */ - for ( i = 0; i < system->N; i++ ) - { - atom = &(system->atoms[i]); - inv_m = 1.0 / system->reaxprm.sbp[atom->type].mass; - /* Compute x(t + dt) */ - rvec_ScaledSum( dx, dt, atom->v, -0.5 * F_CONV * inv_m * SQR(dt), atom->f ); - - /* bNVT fix - Metin's suggestion */ - /* ORIGINAL CHANGE -- CHECK THE branch serial-bnvt for the fix */ - //rvec_Add( atom->x, dx ); - Inc_on_T3( atom->x, dx, &system->box ); - - /* Compute v(t + dt/2) */ - rvec_ScaledAdd( atom->v, -0.5 * F_CONV * inv_m * dt, atom->f ); - } - -#if defined(DEBUG_FOCUS) - fprintf(stderr, "step%d: verlet1 done\n", data->step); -#endif - - Reallocate( system, control, workspace, lists, renbr ); - Reset( system, control, data, workspace, lists ); - - if ( renbr ) - { - Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); - } - - Compute_Forces( system, control, data, workspace, - lists, out_control ); - - /* velocity verlet, 2nd part */ - for ( i = 0; i < system->N; i++ ) - { - atom = &(system->atoms[i]); - inv_m = 1.0 / system->reaxprm.sbp[atom->type].mass; - /* Compute v(t + dt) */ - rvec_ScaledAdd( atom->v, -0.5 * dt * F_CONV * inv_m, atom->f ); - } -#if defined(DEBUG_FOCUS) - fprintf(stderr, "step%d: verlet2 done\n", data->step); -#endif - - /* temperature scaler */ - Compute_Kinetic_Energy( system, data ); - lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0); - if ( lambda < MIN_dT ) - { - lambda = MIN_dT; - } - else if (lambda > MAX_dT ) - { - lambda = MAX_dT; - } - lambda = SQRT( lambda ); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "step:%d lambda -> %f \n", data->step, lambda ); -#endif - - /* Scale velocities and positions at t+dt */ - for ( i = 0; i < system->N; ++i ) - { - atom = &(system->atoms[i]); - rvec_Scale( atom->v, lambda, atom->v ); - } - Compute_Kinetic_Energy( system, data ); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "step%d: scaled velocities\n", - data->step ); -#endif -} diff --git a/sPuReMD/src/integrate.h b/sPuReMD/src/integrate.h index d5f0f3ae..976fc7ac 100644 --- a/sPuReMD/src/integrate.h +++ b/sPuReMD/src/integrate.h @@ -29,26 +29,25 @@ void Velocity_Verlet_NVE( reax_system*, control_params*, simulation_data*, static_storage*, reax_list**, output_controls* ); -void Velocity_Verlet_Nose_Hoover_NVT( reax_system*, control_params*, - simulation_data*, static_storage*, reax_list**, output_controls* ); +void Velocity_Verlet_Berendsen_NVT( reax_system* , control_params* , + simulation_data *, static_storage *, reax_list **, output_controls * ); void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system*, control_params*, simulation_data*, static_storage*, reax_list**, output_controls* ); -void Velocity_Verlet_Flexible_NPT( reax_system*, control_params*, +void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system*, control_params*, simulation_data*, static_storage*, reax_list**, output_controls* ); -void Velocity_Verlet_Isotropic_NPT( reax_system*, control_params*, +void Velocity_Verlet_Berendsen_Semi_Isotropic_NPT( reax_system*, control_params*, simulation_data*, static_storage*, reax_list**, output_controls* ); -void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system*, control_params*, +#ifdef ANISOTROPIC +void Velocity_Verlet_Nose_Hoover_NVT( reax_system*, control_params*, simulation_data*, static_storage*, reax_list**, output_controls* ); -void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system*, control_params*, +void Velocity_Verlet_Isotropic_NPT( reax_system*, control_params*, simulation_data*, static_storage*, reax_list**, output_controls* ); - -void Velocity_Verlet_Berendsen_NVT( reax_system* , control_params* , - simulation_data *, static_storage *, reax_list **, output_controls * ); +#endif #endif diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index ac3c2166..3fb32c98 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -118,8 +118,7 @@ void Sort_Matrix_Rows( sparse_matrix * const A ) // #pragma omp parallel default(none) private(i, j, si, ei, temp) shared(stderr) #endif { - temp = (sparse_matrix_entry *) smalloc( A->n * sizeof(sparse_matrix_entry), - "Sort_Matrix_Rows::temp" ); + temp = smalloc( A->n * sizeof(sparse_matrix_entry), "Sort_Matrix_Rows::temp" ); /* sort each row of A using column indices */ #ifdef _OPENMP @@ -247,7 +246,7 @@ void setup_sparse_approx_inverse( const sparse_matrix * const A, sparse_matrix * /* list: values from the matrix*/ /* left-right: search space of the quick-select */ - list = (real *) smalloc( sizeof(real) * (A->start[A->n]),"setup_sparse_approx_inverse::list" ); + list = smalloc( sizeof(real) * (A->start[A->n]),"setup_sparse_approx_inverse::list" ); left = 0; right = A->start[A->n] - 1; @@ -380,8 +379,8 @@ void Calculate_Droptol( const sparse_matrix * const A, { if ( droptol_local == NULL ) { - droptol_local = (real*) smalloc( omp_get_num_threads() * A->n * sizeof(real), - "Calculate_Droptol::droptol_local" ); + droptol_local = smalloc( omp_get_num_threads() * A->n * sizeof(real), + "Calculate_Droptol::droptol_local" ); } } @@ -492,303 +491,8 @@ int Estimate_LU_Fill( const sparse_matrix * const A, const real * const droptol } -#if defined(HAVE_SUPERLU_MT) -real SuperLU_Factorize( const sparse_matrix * const A, - sparse_matrix * const L, sparse_matrix * const U ) -{ - unsigned int i, pj, count, *Ltop, *Utop, r; - sparse_matrix *A_t; - SuperMatrix A_S, AC_S, L_S, U_S; - NCformat *A_S_store; - SCPformat *L_S_store; - NCPformat *U_S_store; - superlumt_options_t superlumt_options; - pxgstrf_shared_t pxgstrf_shared; - pdgstrf_threadarg_t *pdgstrf_threadarg; - int_t nprocs; - fact_t fact; - trans_t trans; - yes_no_t refact, usepr; - real u, drop_tol; - real *a, *at; - int_t *asub, *atsub, *xa, *xat; - int_t *perm_c; /* column permutation vector */ - int_t *perm_r; /* row permutations from partial pivoting */ - void *work; - int_t info, lwork; - int_t permc_spec, panel_size, relax; - Gstat_t Gstat; - flops_t flopcnt; - - /* Default parameters to control factorization. */ -#ifdef _OPENMP - //TODO: set as global parameter and use - #pragma omp parallel \ - default(none) shared(nprocs) - { - #pragma omp master - { - /* SuperLU_MT spawns threads internally, so set and pass parameter */ - nprocs = omp_get_num_threads(); - } - } -#else - nprocs = 1; -#endif - - // fact = EQUILIBRATE; /* equilibrate A (i.e., scale rows & cols to have unit norm), then factorize */ - fact = DOFACT; /* factor from scratch */ - trans = NOTRANS; - refact = NO; /* first time factorization */ - //TODO: add to control file and use the value there to set these - panel_size = sp_ienv(1); /* # consec. cols treated as unit task */ - relax = sp_ienv(2); /* # cols grouped as relaxed supernode */ - u = 1.0; /* diagonal pivoting threshold */ - usepr = NO; - drop_tol = 0.0; - work = NULL; - lwork = 0; - -#if defined(DEBUG) - fprintf( stderr, "nprocs = %d\n", nprocs ); - fprintf( stderr, "Panel size = %d\n", panel_size ); - fprintf( stderr, "Relax = %d\n", relax ); -#endif - - if ( !(perm_r = intMalloc(A->n)) ) - { - SUPERLU_ABORT("Malloc fails for perm_r[]."); - } - if ( !(perm_c = intMalloc(A->n)) ) - { - SUPERLU_ABORT("Malloc fails for perm_c[]."); - } - if ( !(superlumt_options.etree = intMalloc(A->n)) ) - { - SUPERLU_ABORT("Malloc fails for etree[]."); - } - if ( !(superlumt_options.colcnt_h = intMalloc(A->n)) ) - { - SUPERLU_ABORT("Malloc fails for colcnt_h[]."); - } - if ( !(superlumt_options.part_super_h = intMalloc(A->n)) ) - { - SUPERLU_ABORT("Malloc fails for part_super__h[]."); - } - a = (real*) smalloc( (2 * A->start[A->n] - A->n) * sizeof(real), - "SuperLU_Factorize::a" ); - asub = (int_t*) smalloc( (2 * A->start[A->n] - A->n) * sizeof(int_t), - "SuperLU_Factorize::asub" ); - xa = (int_t*) smalloc( (A->n + 1) * sizeof(int_t), - "SuperLU_Factorize::xa" ); - Ltop = (unsigned int*) smalloc( (A->n + 1) * sizeof(unsigned int), - "SuperLU_Factorize::Ltop" ); - Utop = (unsigned int*) smalloc( (A->n + 1) * sizeof(unsigned int), - "SuperLU_Factorize::Utop" ); - Allocate_Matrix( &A_t, A->n, A->m ); - - /* Set up the sparse matrix data structure for A. */ - Transpose( A, A_t ); - - count = 0; - for ( i = 0; i < A->n; ++i ) - { - xa[i] = count; - for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) - { - a[count] = A->entries[pj].val; - asub[count] = A->entries[pj].j; - ++count; - } - for ( pj = A_t->start[i] + 1; pj < A_t->start[i + 1]; ++pj ) - { - a[count] = A_t->entries[pj].val; - asub[count] = A_t->entries[pj].j; - ++count; - } - } - xa[i] = count; - - dCompRow_to_CompCol( A->n, A->n, 2 * A->start[A->n] - A->n, a, asub, xa, - &at, &atsub, &xat ); - - for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i ) - fprintf( stderr, "%6d", asub[i] ); - fprintf( stderr, "\n" ); - for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i ) - fprintf( stderr, "%6.1f", a[i] ); - fprintf( stderr, "\n" ); - for ( i = 0; i <= A->n; ++i ) - fprintf( stderr, "%6d", xa[i] ); - fprintf( stderr, "\n" ); - for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i ) - fprintf( stderr, "%6d", atsub[i] ); - fprintf( stderr, "\n" ); - for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i ) - fprintf( stderr, "%6.1f", at[i] ); - fprintf( stderr, "\n" ); - for ( i = 0; i <= A->n; ++i ) - fprintf( stderr, "%6d", xat[i] ); - fprintf( stderr, "\n" ); - - A_S.Stype = SLU_NC; /* column-wise, no supernode */ - A_S.Dtype = SLU_D; /* double-precision */ - A_S.Mtype = SLU_GE; /* full (general) matrix -- required for parallel factorization */ - A_S.nrow = A->n; - A_S.ncol = A->n; - A_S.Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) ); - A_S_store = (NCformat *) A_S.Store; - A_S_store->nnz = 2 * A->start[A->n] - A->n; - A_S_store->nzval = at; - A_S_store->rowind = atsub; - A_S_store->colptr = xat; - - /* ------------------------------------------------------------ - Allocate storage and initialize statistics variables. - ------------------------------------------------------------*/ - StatAlloc( A->n, nprocs, panel_size, relax, &Gstat ); - StatInit( A->n, nprocs, &Gstat ); - - /* ------------------------------------------------------------ - Get column permutation vector perm_c[], according to permc_spec: - permc_spec = 0: natural ordering - permc_spec = 1: minimum degree ordering on structure of A'*A - permc_spec = 2: minimum degree ordering on structure of A'+A - permc_spec = 3: approximate minimum degree for unsymmetric matrices - ------------------------------------------------------------*/ - permc_spec = 0; - get_perm_c( permc_spec, &A_S, perm_c ); - - /* ------------------------------------------------------------ - Initialize the option structure superlumt_options using the - user-input parameters; - Apply perm_c to the columns of original A to form AC. - ------------------------------------------------------------*/ - pdgstrf_init( nprocs, fact, trans, refact, panel_size, relax, - u, usepr, drop_tol, perm_c, perm_r, - work, lwork, &A_S, &AC_S, &superlumt_options, &Gstat ); - - for ( i = 0; i < ((NCPformat*)AC_S.Store)->nnz; ++i ) - fprintf( stderr, "%6.1f", ((real*)(((NCPformat*)AC_S.Store)->nzval))[i] ); - fprintf( stderr, "\n" ); - - /* ------------------------------------------------------------ - Compute the LU factorization of A. - The following routine will create nprocs threads. - ------------------------------------------------------------*/ - pdgstrf( &superlumt_options, &AC_S, perm_r, &L_S, &U_S, &Gstat, &info ); - - fprintf( stderr, "INFO: %d\n", info ); - - flopcnt = 0; - for (i = 0; i < nprocs; ++i) - { - flopcnt += Gstat.procstat[i].fcops; - } - Gstat.ops[FACT] = flopcnt; - -#if defined(DEBUG) - printf("\n** Result of sparse LU **\n"); - L_S_store = (SCPformat *) L_S.Store; - U_S_store = (NCPformat *) U_S.Store; - printf( "No of nonzeros in factor L = " IFMT "\n", L_S_store->nnz ); - printf( "No of nonzeros in factor U = " IFMT "\n", U_S_store->nnz ); - fflush( stdout ); -#endif - - /* convert L and R from SuperLU formats to CSR */ - memset( Ltop, 0, (A->n + 1) * sizeof(int) ); - memset( Utop, 0, (A->n + 1) * sizeof(int) ); - memset( L->start, 0, (A->n + 1) * sizeof(int) ); - memset( U->start, 0, (A->n + 1) * sizeof(int) ); - - for ( i = 0; i < 2 * L_S_store->nnz; ++i ) - fprintf( stderr, "%6.1f", ((real*)(L_S_store->nzval))[i] ); - fprintf( stderr, "\n" ); - for ( i = 0; i < 2 * U_S_store->nnz; ++i ) - fprintf( stderr, "%6.1f", ((real*)(U_S_store->nzval))[i] ); - fprintf( stderr, "\n" ); - - printf( "No of supernodes in factor L = " IFMT "\n", L_S_store->nsuper ); - for ( i = 0; i < A->n; ++i ) - { - fprintf( stderr, "nzval_col_beg[%5d] = %d\n", i, L_S_store->nzval_colbeg[i] ); - fprintf( stderr, "nzval_col_end[%5d] = %d\n", i, L_S_store->nzval_colend[i] ); - //TODO: correct for SCPformat for L? - //for( pj = L_S_store->rowind_colbeg[i]; pj < L_S_store->rowind_colend[i]; ++pj ) - // for( pj = 0; pj < L_S_store->rowind_colend[i] - L_S_store->rowind_colbeg[i]; ++pj ) - // { - // ++Ltop[L_S_store->rowind[L_S_store->rowind_colbeg[i] + pj] + 1]; - // } - fprintf( stderr, "col_beg[%5d] = %d\n", i, U_S_store->colbeg[i] ); - fprintf( stderr, "col_end[%5d] = %d\n", i, U_S_store->colend[i] ); - for ( pj = U_S_store->colbeg[i]; pj < U_S_store->colend[i]; ++pj ) - { - ++Utop[U_S_store->rowind[pj] + 1]; - fprintf( stderr, "Utop[%5d] = %d\n", U_S_store->rowind[pj] + 1, Utop[U_S_store->rowind[pj] + 1] ); - } - } - for ( i = 1; i <= A->n; ++i ) - { - // Ltop[i] = L->start[i] = Ltop[i] + Ltop[i - 1]; - Utop[i] = U->start[i] = Utop[i] + Utop[i - 1]; - // fprintf( stderr, "Utop[%5d] = %d\n", i, Utop[i] ); - // fprintf( stderr, "U->start[%5d] = %d\n", i, U->start[i] ); - } - for ( i = 0; i < A->n; ++i ) - { - // for( pj = 0; pj < L_S_store->nzval_colend[i] - L_S_store->nzval_colbeg[i]; ++pj ) - // { - // r = L_S_store->rowind[L_S_store->rowind_colbeg[i] + pj]; - // L->entries[Ltop[r]].j = r; - // L->entries[Ltop[r]].val = ((real*)L_S_store->nzval)[L_S_store->nzval_colbeg[i] + pj]; - // ++Ltop[r]; - // } - for ( pj = U_S_store->colbeg[i]; pj < U_S_store->colend[i]; ++pj ) - { - r = U_S_store->rowind[pj]; - U->entries[Utop[r]].j = i; - U->entries[Utop[r]].val = ((real*)U_S_store->nzval)[pj]; - ++Utop[r]; - } - } - - /* ------------------------------------------------------------ - Deallocate storage after factorization. - ------------------------------------------------------------*/ - pxgstrf_finalize( &superlumt_options, &AC_S ); - Deallocate_Matrix( A_t ); - sfree( xa, "SuperLU_Factorize::xa" ); - sfree( asub, "SuperLU_Factorize::asub" ); - sfree( a, "SuperLU_Factorize::a" ); - SUPERLU_FREE( perm_r ); - SUPERLU_FREE( perm_c ); - SUPERLU_FREE( ((NCformat *)A_S.Store)->rowind ); - SUPERLU_FREE( ((NCformat *)A_S.Store)->colptr ); - SUPERLU_FREE( ((NCformat *)A_S.Store)->nzval ); - SUPERLU_FREE( A_S.Store ); - if ( lwork == 0 ) - { - Destroy_SuperNode_SCP(&L_S); - Destroy_CompCol_NCP(&U_S); - } - else if ( lwork > 0 ) - { - SUPERLU_FREE(work); - } - StatFree(&Gstat); - - sfree( Utop, "SuperLU_Factorize::Utop" ); - sfree( Ltop, "SuperLU_Factorize::Ltop" ); - - //TODO: return iters - return 0.; -} -#endif - - -/* Diagonal (Jacobi) preconditioner computation */ -real diag_pre_comp( const sparse_matrix * const H, real * const Hdia_inv ) +/* Jacobi preconditioner computation */ +real jacobi( const sparse_matrix * const H, real * const Hdia_inv ) { unsigned int i; real start; @@ -827,12 +531,9 @@ real ICHOLT( const sparse_matrix * const A, const real * const droptol, start = Get_Time( ); - Utop = (unsigned int*) smalloc( (A->n + 1) * sizeof(unsigned int), - "ICHOLT::Utop" ); - tmp_j = (int*) smalloc( A->n * sizeof(int), - "ICHOLT::Utop" ); - tmp_val = (real*) smalloc( A->n * sizeof(real), - "ICHOLT::Utop" ); + Utop = smalloc( (A->n + 1) * sizeof(unsigned int), "ICHOLT::Utop" ); + tmp_j = smalloc( A->n * sizeof(int), "ICHOLT::Utop" ); + tmp_val = smalloc( A->n * sizeof(real), "ICHOLT::Utop" ); // clear variables Ltop = 0; @@ -961,6 +662,133 @@ real ICHOLT( const sparse_matrix * const A, const real * const droptol, } +/* Compute incomplete LU factorization with thresholding + * + * Reference: + * Iterative Methods for Sparse Linear System, Second Edition, 2003, + * Yousef Saad + * + * A: symmetric, half-stored (lower triangular), CSR format + * droptol: row-wise tolerances used for dropping + * L / U: (output) triangular matrices, A \approx LU, CSR format */ +real ILUT( const sparse_matrix * const A, const real * const droptol, + sparse_matrix * const L, sparse_matrix * const U ) +{ + int *tmp_j; + real *tmp_val; + int i, j, pj, k1, k2, tmptop, Ltop; + real val, start; + unsigned int *Utop; + + start = Get_Time( ); + + Utop = smalloc( (A->n + 1) * sizeof(unsigned int), "ILUT::Utop" ); + tmp_j = smalloc( A->n * sizeof(int), "ILUT::Utop" ); + tmp_val = smalloc( A->n * sizeof(real), "ILUT::Utop" ); + + Ltop = 0; + tmptop = 0; + memset( L->start, 0, (A->n + 1) * sizeof(unsigned int) ); + memset( U->start, 0, (A->n + 1) * sizeof(unsigned int) ); + memset( Utop, 0, A->n * sizeof(unsigned int) ); + + for ( i = 0; i < A->n; ++i ) + { + L->start[i] = Ltop; + tmptop = 0; + + for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) + { + j = A->j[pj]; + val = A->val[pj]; + + if ( FABS(val) > droptol[i] ) + { + k1 = 0; + k2 = L->start[j]; + while ( k1 < tmptop && k2 < L->start[j + 1] ) + { + if ( tmp_j[k1] < L->j[k2] ) + { + ++k1; + } + else if ( tmp_j[k1] > L->j[k2] ) + { + ++k2; + } + else + { + val -= (tmp_val[k1++] * L->val[k2++]); + } + } + + /* L matrix is lower triangular, + * so right before the start of next row comes jth diagonal */ + val /= L->val[L->start[j + 1] - 1]; + + tmp_j[tmptop] = j; + tmp_val[tmptop] = val; + ++tmptop; + } + } + + /* sanity check */ + if ( A->j[pj] != i ) + { + fprintf( stderr, "[ERROR] badly built input matrix in ILUT (i = %d)\n", i ); + exit( NUMERIC_BREAKDOWN ); + } + + /* compute the ith diagonal in L */ + val = A->val[pj]; + for ( k1 = 0; k1 < tmptop; ++k1 ) + { + val -= (tmp_val[k1] * tmp_val[k1]); + } + +#if defined(DEBUG) + if ( val < 0.0 ) + { + fprintf( stderr, "[ERROR] Numeric breakdown in ILUT (SQRT of negative on diagonal i = %d). Terminating.\n", i ); + exit( NUMERIC_BREAKDOWN ); + + } +#endif + + tmp_j[tmptop] = i; + tmp_val[tmptop] = SQRT( val ); + + /* apply the dropping rule once again */ + for ( k1 = 0; k1 < tmptop; ++k1 ) + { + if ( FABS(tmp_val[k1]) > droptol[i] / tmp_val[tmptop] ) + { + L->j[Ltop] = tmp_j[k1]; + L->val[Ltop] = tmp_val[k1]; + U->start[tmp_j[k1] + 1]++; + ++Ltop; + } + } + + /* keep the diagonal in any case */ + L->j[Ltop] = tmp_j[k1]; + L->val[Ltop] = tmp_val[k1]; + ++Ltop; + } + + L->start[i] = Ltop; + + /* U = L^T (Cholesky factorization) */ + Transpose( L, U ); + + sfree( tmp_val, "ILUT::tmp_val" ); + sfree( tmp_j, "ILUT::tmp_j" ); + sfree( Utop, "ILUT::Utop" ); + + return Get_Timing_Info( start ); +} + + /* Fine-grained (parallel) incomplete Cholesky factorization * * Reference: @@ -968,7 +796,7 @@ real ICHOLT( const sparse_matrix * const A, const real * const droptol, * Fine-Grained Parallel Incomplete LU Factorization * SIAM J. Sci. Comp. */ #if defined(TESTING) -real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, +real FG_ICHOL( const sparse_matrix * const A, const unsigned int sweeps, sparse_matrix * const U_t, sparse_matrix * const U ) { unsigned int i, j, k, pj, x = 0, y = 0, ei_x, ei_y; @@ -978,9 +806,9 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, start = Get_Time( ); - D = (real*) smalloc( A->n * sizeof(real), "ICHOL_PAR::D" ); - D_inv = (real*) smalloc( A->n * sizeof(real), "ICHOL_PAR::D_inv" ); - Utop = (int*) smalloc( (A->n + 1) * sizeof(int), "ICHOL_PAR::Utop" ); + D = smalloc( A->n * sizeof(real), "FG_ICHOL::D" ); + D_inv = smalloc( A->n * sizeof(real), "FG_ICHOL::D_inv" ); + Utop = smalloc( (A->n + 1) * sizeof(int), "FG_ICHOL::Utop" ); Allocate_Matrix( &DAD, A->n, A->m ); #ifdef _OPENMP @@ -1078,7 +906,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, /* sanity check */ if ( sum < ZERO ) { - fprintf( stderr, "Numeric breakdown in ICHOL_PAR. Terminating.\n"); + fprintf( stderr, "Numeric breakdown in FG_ICHOL. Terminating.\n"); #if defined(DEBUG_FOCUS) fprintf( stderr, "A(%5d,%5d) = %10.3f\n", k - 1, A->entries[j].j, A->entries[j].val ); @@ -1124,225 +952,15 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, #endif Deallocate_Matrix( DAD ); - sfree( D_inv, "ICHOL_PAR::D_inv" ); - sfree( D, "ICHOL_PAR::D" ); - sfree( Utop, "ICHOL_PAR::Utop" ); + sfree( D_inv, "FG_ICHOL::D_inv" ); + sfree( D, "FG_ICHOL::D" ); + sfree( Utop, "FG_ICHOL::Utop" ); return Get_Timing_Info( start ); } #endif -/* Fine-grained (parallel) incomplete LU factorization - * - * Reference: - * Edmond Chow and Aftab Patel - * Fine-Grained Parallel Incomplete LU Factorization - * SIAM J. Sci. Comp. - * - * A: symmetric, half-stored (lower triangular), CSR format - * sweeps: number of loops over non-zeros for computation - * L / U: factorized triangular matrices (A \approx LU), CSR format */ -real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, - sparse_matrix * const L, sparse_matrix * const U ) -{ - unsigned int i, j, k, pj, x, y, ei_x, ei_y; - real *D, *D_inv, sum, start; - sparse_matrix *DAD; - - start = Get_Time( ); - - D = (real*) smalloc( A->n * sizeof(real), "ILU_PAR::D" ); - D_inv = (real*) smalloc( A->n * sizeof(real), "ILU_PAR::D_inv" ); - Allocate_Matrix( &DAD, A->n, A->m ); - -#ifdef _OPENMP - #pragma omp parallel for schedule(static) \ - default(none) shared(D, D_inv) private(i) -#endif - for ( i = 0; i < A->n; ++i ) - { - D_inv[i] = SQRT( FABS( A->val[A->start[i + 1] - 1] ) ); - D[i] = 1.0 / D_inv[i]; - // printf( "A->val[%8d] = %f, D[%4d] = %f, D_inv[%4d] = %f\n", A->start[i + 1] - 1, A->val[A->start[i + 1] - 1], i, D[i], i, D_inv[i] ); - } - - /* to get convergence, A must have unit diagonal, so apply - * transformation DAD, where D = D(1./SQRT(abs(D(A)))) */ - memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); -#ifdef _OPENMP - #pragma omp parallel for schedule(static) \ - default(none) shared(DAD, D) private(i, pj) -#endif - for ( i = 0; i < A->n; ++i ) - { - /* non-diagonals */ - for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) - { - DAD->j[pj] = A->j[pj]; - DAD->val[pj] = D[i] * A->val[pj] * D[A->j[pj]]; - } - /* diagonal */ - DAD->j[pj] = A->j[pj]; - DAD->val[pj] = 1.0; - } - - /* initial guesses for L and U, - * assume: A and DAD symmetric and stored lower triangular */ - memcpy( L->start, DAD->start, sizeof(int) * (DAD->n + 1) ); - memcpy( L->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); - memcpy( L->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); - /* store U^T in CSR for row-wise access and tranpose later */ - memcpy( U->start, DAD->start, sizeof(int) * (DAD->n + 1) ); - memcpy( U->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); - memcpy( U->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); - - /* L has unit diagonal, by convention */ -#ifdef _OPENMP - #pragma omp parallel for schedule(static) default(none) private(i) -#endif - for ( i = 0; i < A->n; ++i ) - { - L->val[L->start[i + 1] - 1] = 1.0; - } - - for ( i = 0; i < sweeps; ++i ) - { - /* for each nonzero in L */ -#ifdef _OPENMP - #pragma omp parallel for schedule(static) \ - default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) -#endif - for ( j = 0; j < DAD->start[DAD->n]; ++j ) - { - sum = ZERO; - - /* determine row bounds of current nonzero */ - x = 0; - ei_x = 0; - for ( k = 1; k <= DAD->n; ++k ) - { - if ( DAD->start[k] > j ) - { - x = DAD->start[k - 1]; - ei_x = DAD->start[k]; - break; - } - } - /* determine column bounds of current nonzero */ - y = DAD->start[DAD->j[j]]; - ei_y = DAD->start[DAD->j[j] + 1]; - - /* sparse dot product: - * dot( L(i,1:j-1), U(1:j-1,j) ) */ - while ( L->j[x] < L->j[j] && - L->j[y] < L->j[j] && - x < ei_x && y < ei_y ) - { - if ( L->j[x] == L->j[y] ) - { - sum += (L->val[x] * U->val[y]); - ++x; - ++y; - } - else if ( L->j[x] < L->j[y] ) - { - ++x; - } - else - { - ++y; - } - } - - if ( j != ei_x - 1 ) - { - L->val[j] = ( DAD->val[j] - sum ) / U->val[ei_y - 1]; - } - } - -#ifdef _OPENMP - #pragma omp parallel for schedule(static) \ - default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) -#endif - for ( j = 0; j < DAD->start[DAD->n]; ++j ) - { - sum = ZERO; - - /* determine row bounds of current nonzero */ - x = 0; - ei_x = 0; - for ( k = 1; k <= DAD->n; ++k ) - { - if ( DAD->start[k] > j ) - { - x = DAD->start[k - 1]; - ei_x = DAD->start[k]; - break; - } - } - /* determine column bounds of current nonzero */ - y = DAD->start[DAD->j[j]]; - ei_y = DAD->start[DAD->j[j] + 1]; - - /* sparse dot product: - * dot( L(i,1:i-1), U(1:i-1,j) ) */ - while ( U->j[x] < U->j[j] && - U->j[y] < U->j[j] && - x < ei_x && y < ei_y ) - { - if ( U->j[x] == U->j[y] ) - { - sum += (L->val[y] * U->val[x]); - ++x; - ++y; - } - else if ( U->j[x] < U->j[y] ) - { - ++x; - } - else - { - ++y; - } - } - - U->val[j] = DAD->val[j] - sum; - } - } - - /* apply inverse transformation: - * since DAD \approx LU, then - * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */ -#ifdef _OPENMP - #pragma omp parallel for schedule(static) \ - default(none) shared(DAD, D_inv) private(i, pj) -#endif - for ( i = 0; i < DAD->n; ++i ) - { - for ( pj = DAD->start[i]; pj < DAD->start[i + 1]; ++pj ) - { - L->val[pj] = D_inv[i] * L->val[pj]; - /* currently storing U^T, so use row index instead of column index */ - U->val[pj] = U->val[pj] * D_inv[i]; - } - } - - Transpose_I( U ); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "nnz(L): %d, max: %d\n", L->start[L->n], L->n * 50 ); - fprintf( stderr, "nnz(U): %d, max: %d\n", Utop[U->n], U->n * 50 ); -#endif - - Deallocate_Matrix( DAD ); - sfree( D_inv, "ILU_PAR::D_inv" ); - sfree( D, "ILU_PAR::D_inv" ); - - return Get_Timing_Info( start ); -} - - /* Fine-grained (parallel) incomplete LU factorization with thresholding * * Reference: @@ -1354,7 +972,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, * droptol: row-wise tolerances used for dropping * sweeps: number of loops over non-zeros for computation * L / U: factorized triangular matrices (A \approx LU), CSR format */ -real ILUT_PAR( const sparse_matrix * const A, const real * droptol, +real FG_ILUT( const sparse_matrix * const A, const real * droptol, const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U ) { unsigned int i, j, k, pj, x, y, ei_x, ei_y, Ltop, Utop; @@ -1367,8 +985,8 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, Allocate_Matrix( &L_temp, A->n, A->m ); Allocate_Matrix( &U_temp, A->n, A->m ); - D = (real*) smalloc( A->n * sizeof(real), "ILUT_PAR::D" ); - D_inv = (real*) smalloc( A->n * sizeof(real), "ILUT_PAR::D_inv" ); + D = smalloc( A->n * sizeof(real), "FG_ILUT::D" ); + D_inv = smalloc( A->n * sizeof(real), "FG_ILUT::D_inv" ); #ifdef _OPENMP #pragma omp parallel for schedule(static) \ @@ -1594,8 +1212,8 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, Deallocate_Matrix( U_temp ); Deallocate_Matrix( L_temp ); Deallocate_Matrix( DAD ); - sfree( D_inv, "ILUT_PAR::D_inv" ); - sfree( D, "ILUT_PAR::D_inv" ); + sfree( D_inv, "FG_ILUT::D_inv" ); + sfree( D, "FG_ILUT::D_inv" ); return Get_Timing_Info( start ); } @@ -1651,10 +1269,10 @@ real sparse_approx_inverse( const sparse_matrix * const A, shared(A_app_inv, stderr) #endif { - X = (char *) smalloc( sizeof(char) * A->n, "sparse_approx_inverse::X" ); - Y = (char *) smalloc( sizeof(char) * A->n, "sparse_approx_inverse::Y" ); - pos_x = (int *) smalloc( sizeof(int) * A->n, "sparse_approx_inverse::pos_x" ); - pos_y = (int *) smalloc( sizeof(int) * A->n, "sparse_approx_inverse::pos_y" ); + X = smalloc( sizeof(char) * A->n, "sparse_approx_inverse::X" ); + Y = smalloc( sizeof(char) * A->n, "sparse_approx_inverse::Y" ); + pos_x = smalloc( sizeof(int) * A->n, "sparse_approx_inverse::pos_x" ); + pos_y = smalloc( sizeof(int) * A->n, "sparse_approx_inverse::pos_y" ); for ( i = 0; i < A->n; ++i ) { @@ -1707,8 +1325,8 @@ real sparse_approx_inverse( const sparse_matrix * const A, } // allocate memory for NxM dense matrix - dense_matrix = (real *) smalloc( sizeof(real) * N * M, - "sparse_approx_inverse::dense_matrix" ); + dense_matrix = smalloc( sizeof(real) * N * M, + "sparse_approx_inverse::dense_matrix" ); // fill in the entries of dense matrix for ( d_i = 0; d_i < M; ++d_i) @@ -1732,8 +1350,7 @@ real sparse_approx_inverse( const sparse_matrix * const A, /* create the right hand side of the linear equation that is the full column of the identity matrix*/ - e_j = (real *) smalloc( sizeof(real) * M, - "sparse_approx_inverse::e_j" ); + e_j = smalloc( sizeof(real) * M, "sparse_approx_inverse::e_j" ); for ( k = 0; k < M; ++k ) { @@ -2259,10 +1876,8 @@ void graph_coloring( const control_params * const control, } } - fb_color = (int*) smalloc( sizeof(int) * A->n, - "graph_coloring::fb_color" ); - conflict_local = (unsigned int*) smalloc( sizeof(unsigned int) * A->n, - "graph_coloring::fb_color" ); + fb_color = smalloc( sizeof(int) * A->n, "graph_coloring::fb_color" ); + conflict_local = smalloc( sizeof(unsigned int) * A->n, "graph_coloring::fb_color" ); while ( workspace->recolor_cnt > 0 ) { @@ -2742,12 +2357,12 @@ static void apply_preconditioner( const static_storage * const workspace, const case TRI_SOLVE_PA: switch ( control->cm_solver_pre_comp_type ) { - case DIAG_PC: + case JACOBI_PC: diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); break; case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: tri_solve( workspace->L, y, x, workspace->L->n, LOWER ); break; case SAI_PC: @@ -2762,12 +2377,12 @@ static void apply_preconditioner( const static_storage * const workspace, const case TRI_SOLVE_LEVEL_SCHED_PA: switch ( control->cm_solver_pre_comp_type ) { - case DIAG_PC: + case JACOBI_PC: diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); break; case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: tri_solve_level_sched( (static_storage *) workspace, workspace->L, y, x, workspace->L->n, LOWER, fresh_pre ); break; @@ -2783,14 +2398,14 @@ static void apply_preconditioner( const static_storage * const workspace, const case TRI_SOLVE_GC_PA: switch ( control->cm_solver_pre_comp_type ) { - case DIAG_PC: + case JACOBI_PC: case SAI_PC: fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); exit( INVALID_INPUT ); break; case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: #ifdef _OPENMP #pragma omp for schedule(static) #endif @@ -2812,14 +2427,14 @@ static void apply_preconditioner( const static_storage * const workspace, const case JACOBI_ITER_PA: switch ( control->cm_solver_pre_comp_type ) { - case DIAG_PC: + case JACOBI_PC: case SAI_PC: fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); exit( INVALID_INPUT ); break; case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: /* construct D^{-1}_L */ if ( fresh_pre == TRUE ) { @@ -2856,7 +2471,7 @@ static void apply_preconditioner( const static_storage * const workspace, const case TRI_SOLVE_PA: switch ( control->cm_solver_pre_comp_type ) { - case DIAG_PC: + case JACOBI_PC: case SAI_PC: if ( x != y ) { @@ -2864,8 +2479,8 @@ static void apply_preconditioner( const static_storage * const workspace, const } break; case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: tri_solve( workspace->U, y, x, workspace->U->n, UPPER ); break; default: @@ -2877,7 +2492,7 @@ static void apply_preconditioner( const static_storage * const workspace, const case TRI_SOLVE_LEVEL_SCHED_PA: switch ( control->cm_solver_pre_comp_type ) { - case DIAG_PC: + case JACOBI_PC: case SAI_PC: if ( x != y ) { @@ -2885,8 +2500,8 @@ static void apply_preconditioner( const static_storage * const workspace, const } break; case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: tri_solve_level_sched( (static_storage *) workspace, workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); break; @@ -2899,14 +2514,14 @@ static void apply_preconditioner( const static_storage * const workspace, const case TRI_SOLVE_GC_PA: switch ( control->cm_solver_pre_comp_type ) { - case DIAG_PC: + case JACOBI_PC: case SAI_PC: fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); exit( INVALID_INPUT ); break; case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: tri_solve_level_sched( (static_storage *) workspace, workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); permute_vector( workspace, x, workspace->H->n, TRUE, UPPER ); @@ -2920,14 +2535,14 @@ static void apply_preconditioner( const static_storage * const workspace, const case JACOBI_ITER_PA: switch ( control->cm_solver_pre_comp_type ) { - case DIAG_PC: + case JACOBI_PC: case SAI_PC: fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); exit( INVALID_INPUT ); break; case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case ILUT_PC: + case FG_ILUT_PC: /* construct D^{-1}_U */ if ( fresh_pre == TRUE ) { @@ -3736,7 +3351,7 @@ real condest( const sparse_matrix * const L, const sparse_matrix * const U ) N = L->n; - e = (real*) smalloc( sizeof(real) * N, "condest::e" ); + e = smalloc( sizeof(real) * N, "condest::e" ); memset( e, 1., N * sizeof(real) ); diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h index 429db79c..5b9f0992 100644 --- a/sPuReMD/src/lin_alg.h +++ b/sPuReMD/src/lin_alg.h @@ -40,25 +40,20 @@ int Estimate_LU_Fill( const sparse_matrix * const, const real * const ); void Calculate_Droptol( const sparse_matrix * const, real * const, const real ); -#if defined(HAVE_SUPERLU_MT) -real SuperLU_Factorize( const sparse_matrix * const, - sparse_matrix * const, sparse_matrix * const ); -#endif - -real diag_pre_comp( const sparse_matrix * const, real * const ); +real jacobi( const sparse_matrix * const, real * const ); real ICHOLT( const sparse_matrix * const, const real * const, sparse_matrix * const, sparse_matrix * const ); -#if defined(TESTING) -real ICHOL_PAR( const sparse_matrix * const, const unsigned int, +real ILUT( const sparse_matrix * const, const real * const, sparse_matrix * const, sparse_matrix * const ); -#endif -real ILU_PAR( const sparse_matrix * const, const unsigned int, +#if defined(TESTING) +real FG_ICHOL( const sparse_matrix * const, const unsigned int, sparse_matrix * const, sparse_matrix * const ); +#endif -real ILUT_PAR( const sparse_matrix * const, const real *, +real FG_ILUT( const sparse_matrix * const, const real *, const unsigned int, sparse_matrix * const, sparse_matrix * const ); #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) diff --git a/sPuReMD/src/neighbors.c b/sPuReMD/src/neighbors.c index d52388d6..8a6d5a42 100644 --- a/sPuReMD/src/neighbors.c +++ b/sPuReMD/src/neighbors.c @@ -354,7 +354,7 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, far_nbrs = lists[FAR_NBRS]; if ( control->ensemble == iNPT || control->ensemble == sNPT - || control->ensemble == NPT ) + || control->ensemble == aNPT ) { Update_Grid( system ); } diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c index c837063d..302eef02 100644 --- a/sPuReMD/src/print_utils.c +++ b/sPuReMD/src/print_utils.c @@ -656,7 +656,7 @@ void Output_Results( reax_system *system, control_params *control, fflush( out_control->log ); /* output pressure */ - if ( control->ensemble == NPT || control->ensemble == iNPT || + if ( control->ensemble == aNPT || control->ensemble == iNPT || control->ensemble == sNPT ) { fprintf( out_control->prs, "%-8d%13.6f%13.6f%13.6f", diff --git a/sPuReMD/src/reax_types.h b/sPuReMD/src/reax_types.h index cff4f2cf..26cd3aca 100644 --- a/sPuReMD/src/reax_types.h +++ b/sPuReMD/src/reax_types.h @@ -145,7 +145,7 @@ enum ensemble nhNVT = 2, sNPT = 3, iNPT = 4, - NPT = 5, + aNPT = 5, ens_N = 6, }; @@ -220,6 +220,7 @@ enum geo_formats GF_N = 5, }; +/* method used for computing atomic charges */ enum charge_method { QEQ_CM = 0, @@ -227,6 +228,7 @@ enum charge_method ACKS2_CM = 2, }; +/* iterative linear solver used for computing atomic charges */ enum solver { GMRES_S = 0, @@ -236,17 +238,18 @@ enum solver BiCGStab_S = 4, }; +/* preconditioner used with iterative linear solver */ enum pre_comp { NONE_PC = 0, - DIAG_PC = 1, + JACOBI_PC = 1, ICHOLT_PC = 2, - ILU_PAR_PC = 3, - ILUT_PAR_PC = 4, - ILU_SUPERLU_MT_PC = 5, + ILUT_PC = 3, + FG_ILUT_PC = 5, SAI_PC = 6, }; +/* method used to apply preconditioner for 2-side incomplete factorizations (ICHOLT, ILU) */ enum pre_app { TRI_SOLVE_PA = 0, @@ -621,11 +624,12 @@ struct control_params int reposition_atoms; /* ensemble values: - 0 : NVE - 1 : NVT (Nose-Hoover) - 2 : NPT (Parrinello-Rehman-Nose-Hoover) Anisotropic - 3 : sNPT (Parrinello-Rehman-Nose-Hoover) semiisotropic - 4 : iNPT (Parrinello-Rehman-Nose-Hoover) isotropic */ + * 0 : NVE + * 1 : NVT (Berendsen) + * 2 : NVT (Nose-Hoover) + * 3 : sNPT (Parrinello-Rehman-Nose-Hoover) semi-isotropic + * 4 : iNPT (Parrinello-Rehman-Nose-Hoover) isotropic + * 5 : aNPT (Parrinello-Rehman-Nose-Hoover) anisotropic */ int ensemble; int nsteps; int periodic_boundaries; diff --git a/sPuReMD/src/traj.c b/sPuReMD/src/traj.c index 2acb1bc5..43cd6c70 100644 --- a/sPuReMD/src/traj.c +++ b/sPuReMD/src/traj.c @@ -284,7 +284,7 @@ int Append_Custom_Frame( reax_system *system, control_params *control, } /* get correct pressure */ - if ( control->ensemble == NPT || control->ensemble == sNPT ) + if ( control->ensemble == aNPT || control->ensemble == sNPT ) { P = data->flex_bar.P_scalar; } diff --git a/sPuReMD/src/two_body_interactions.c b/sPuReMD/src/two_body_interactions.c index eca3d5a0..2a179cab 100644 --- a/sPuReMD/src/two_body_interactions.c +++ b/sPuReMD/src/two_body_interactions.c @@ -316,7 +316,7 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, +(CEvd + CEclmb), nbr_pj->dvec ); #endif } - /* NPT, iNPT or sNPT */ + /* aNPT, iNPT or sNPT */ else { /* for pressure coupling, terms not related to bond order @@ -620,7 +620,8 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, +(CEvd + CEclmb), nbr_pj->dvec ); #endif } - else // NPT, iNPT or sNPT + /* aNPT, iNPT or sNPT */ + else { /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ -- GitLab