diff --git a/sPuReMD/Makefile.am b/sPuReMD/Makefile.am index 7c986471c7759f44ea3cd9aea126c246929f1647..4102573deee8d6fefac290cce49f0755bd726200 100644 --- a/sPuReMD/Makefile.am +++ b/sPuReMD/Makefile.am @@ -4,7 +4,7 @@ bin_PROGRAMS = bin/spuremd bin_spuremd_SOURCES = src/ffield.c src/grid.c src/list.c src/lookup.c src/print_utils.c \ src/reset_utils.c src/restart.c src/random.c src/tool_box.c src/traj.c \ src/vector.c src/allocate.c src/analyze.c src/box.c src/system_props.c src/control.c \ - src/geo_tools.c src/neighbors.c src/lin_alg.c src/QEq.c src/bond_orders.c \ + src/geo_tools.c src/neighbors.c src/lin_alg.c src/charges.c src/bond_orders.c \ src/single_body_interactions.c src/two_body_interactions.c \ src/three_body_interactions.c src/four_body_interactions.c src/forces.c \ src/integrate.c src/init_md.c src/testmd.c @@ -12,7 +12,7 @@ bin_spuremd_SOURCES = src/ffield.c src/grid.c src/list.c src/lookup.c src/print_ include_HEADERS = src/mytypes.h src/ffield.h src/grid.h src/list.h src/lookup.h src/print_utils.h \ src/reset_utils.h src/restart.h src/random.h src/tool_box.h src/traj.h \ src/vector.h src/allocate.h src/analyze.h src/box.h src/system_props.h src/control.h \ - src/geo_tools.h src/neighbors.h src/lin_alg.h src/QEq.h src/bond_orders.h \ + src/geo_tools.h src/neighbors.h src/lin_alg.h src/charges.h src/bond_orders.h \ src/single_body_interactions.h src/two_body_interactions.h \ src/three_body_interactions.h src/four_body_interactions.h src/forces.h \ src/integrate.h src/init_md.h diff --git a/sPuReMD/src/QEq.c b/sPuReMD/src/charges.c similarity index 85% rename from sPuReMD/src/QEq.c rename to sPuReMD/src/charges.c index 972886fd2938d63ac9ef91063ddbe59b96e9e1fa..8ac55b476f34193ec5b328a5bf0810cc2ccf2aff 100644 --- a/sPuReMD/src/QEq.c +++ b/sPuReMD/src/charges.c @@ -19,7 +19,7 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include "QEq.h" +#include "charges.h" #include "allocate.h" #include "list.h" @@ -145,8 +145,8 @@ static void Sort_Matrix_Rows( sparse_matrix * const A ) } -static void Calculate_Droptol( const sparse_matrix * const A, real * const droptol, - const real dtol ) +static void Calculate_Droptol( const sparse_matrix * const A, + real * const droptol, const real dtol ) { int i, j, k; real val; @@ -275,7 +275,7 @@ static int Estimate_LU_Fill( const sparse_matrix * const A, const real * const d #if defined(HAVE_SUPERLU_MT) static real SuperLU_Factorize( const sparse_matrix * const A, - sparse_matrix * const L, sparse_matrix * const U ) + sparse_matrix * const L, sparse_matrix * const U ) { unsigned int i, pj, count, *Ltop, *Utop, r; sparse_matrix *A_t; @@ -581,7 +581,7 @@ static real diag_pre_comp( const reax_system * const system, real * const Hdia_i #pragma omp parallel for schedule(static) \ default(none) private(i) - for ( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N_cm; ++i ) { Hdia_inv[i] = 1.0 / system->reaxprm.sbp[system->atoms[i].type].eta; } @@ -592,7 +592,7 @@ static real diag_pre_comp( const reax_system * const system, real * const Hdia_i /* Incomplete Cholesky factorization with dual thresholding */ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, - sparse_matrix * const L, sparse_matrix * const U ) + sparse_matrix * const L, sparse_matrix * const U ) { int *tmp_j; real *tmp_val; @@ -738,7 +738,7 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, * Fine-Grained Parallel Incomplete LU Factorization * SIAM J. Sci. Comp. */ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, - sparse_matrix * const U_t, sparse_matrix * const U ) + sparse_matrix * const U_t, sparse_matrix * const U ) { unsigned int i, j, k, pj, x = 0, y = 0, ei_x, ei_y; real *D, *D_inv, sum, start; @@ -757,7 +757,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, } #pragma omp parallel for schedule(static) \ - default(none) shared(D_inv, D) private(i) + default(none) shared(D_inv, D) private(i) for ( i = 0; i < A->n; ++i ) { D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); @@ -771,7 +771,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, * transformation DAD, where D = D(1./sqrt(D(A))) */ memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); #pragma omp parallel for schedule(guided) \ - default(none) shared(DAD, D_inv, D) private(i, pj) + default(none) shared(DAD, D_inv, D) private(i, pj) for ( i = 0; i < A->n; ++i ) { /* non-diagonals */ @@ -795,7 +795,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, { /* for each nonzero */ #pragma omp parallel for schedule(static) \ - default(none) shared(DAD, stderr) private(sum, ei_x, ei_y, k) firstprivate(x, y) + default(none) shared(DAD, stderr) private(sum, ei_x, ei_y, k) firstprivate(x, y) for ( j = 0; j < A->start[A->n]; ++j ) { sum = ZERO; @@ -868,7 +868,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, * since DAD \approx U^{T}U, so * D^{-1}DADD^{-1} = A \approx D^{-1}U^{T}UD^{-1} */ #pragma omp parallel for schedule(guided) \ - default(none) shared(D_inv) private(i, pj) + default(none) shared(D_inv) private(i, pj) for ( i = 0; i < A->n; ++i ) { for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) @@ -925,7 +925,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, } #pragma omp parallel for schedule(static) \ - default(none) shared(D, D_inv) private(i) + default(none) shared(D, D_inv) private(i) for ( i = 0; i < A->n; ++i ) { D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); @@ -936,7 +936,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, * transformation DAD, where D = D(1./sqrt(D(A))) */ memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); #pragma omp parallel for schedule(static) \ - default(none) shared(DAD, D) private(i, pj) + default(none) shared(DAD, D) private(i, pj) for ( i = 0; i < A->n; ++i ) { /* non-diagonals */ @@ -971,7 +971,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, { /* for each nonzero in L */ #pragma omp parallel for schedule(static) \ - default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) + default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) for ( j = 0; j < DAD->start[DAD->n]; ++j ) { sum = ZERO; @@ -1021,7 +1021,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, } #pragma omp parallel for schedule(static) \ - default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) + default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) for ( j = 0; j < DAD->start[DAD->n]; ++j ) { sum = ZERO; @@ -1072,7 +1072,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, * since DAD \approx LU, then * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */ #pragma omp parallel for schedule(static) \ - default(none) shared(DAD, D_inv) private(i, pj) + default(none) shared(DAD, D_inv) private(i, pj) for ( i = 0; i < DAD->n; ++i ) { for ( pj = DAD->start[i]; pj < DAD->start[i + 1]; ++pj ) @@ -1110,7 +1110,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, * sweeps: number of loops over non-zeros for computation * L / U: factorized triangular matrices (A \approx LU), CSR format */ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, - const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U ) + const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U ) { unsigned int i, j, k, pj, x, y, ei_x, ei_y, Ltop, Utop; real *D, *D_inv, sum, start; @@ -1134,7 +1134,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, } #pragma omp parallel for schedule(static) \ - default(none) shared(D, D_inv) private(i) + default(none) shared(D, D_inv) private(i) for ( i = 0; i < A->n; ++i ) { D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); @@ -1145,7 +1145,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, * transformation DAD, where D = D(1./sqrt(D(A))) */ memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); #pragma omp parallel for schedule(static) \ - default(none) shared(DAD, D) private(i, pj) + default(none) shared(DAD, D) private(i, pj) for ( i = 0; i < A->n; ++i ) { /* non-diagonals */ @@ -1171,7 +1171,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, /* L has unit diagonal, by convention */ #pragma omp parallel for schedule(static) \ - default(none) private(i) shared(L_temp) + default(none) private(i) shared(L_temp) for ( i = 0; i < A->n; ++i ) { L_temp->val[L_temp->start[i + 1] - 1] = 1.0; @@ -1231,7 +1231,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, } #pragma omp parallel for schedule(static) \ - default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) + default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) for ( j = 0; j < DAD->start[DAD->n]; ++j ) { sum = ZERO; @@ -1352,13 +1352,106 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, } +static void Extrapolate_Charges_QEq( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + int i; + real s_tmp, t_tmp; + + /* extrapolation for s & t */ + //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer + #pragma omp parallel for schedule(static) \ + default(none) private(i, s_tmp, t_tmp) + for ( i = 0; i < system->N_cm; ++i ) + { + // no extrapolation + //s_tmp = workspace->s[0][i]; + //t_tmp = workspace->t[0][i]; + + // linear + //s_tmp = 2 * workspace->s[0][i] - workspace->s[1][i]; + //t_tmp = 2 * workspace->t[0][i] - workspace->t[1][i]; + + // quadratic + //s_tmp = workspace->s[2][i] + 3 * (workspace->s[0][i]-workspace->s[1][i]); + t_tmp = workspace->t[2][i] + 3 * (workspace->t[0][i] - workspace->t[1][i]); + + // cubic + s_tmp = 4 * (workspace->s[0][i] + workspace->s[2][i]) - + (6 * workspace->s[1][i] + workspace->s[3][i] ); + //t_tmp = 4 * (workspace->t[0][i] + workspace->t[2][i]) - + // (6 * workspace->t[1][i] + workspace->t[3][i] ); + + // 4th order + //s_tmp = 5 * (workspace->s[0][i] - workspace->s[3][i]) + + // 10 * (-workspace->s[1][i] + workspace->s[2][i] ) + workspace->s[4][i]; + //t_tmp = 5 * (workspace->t[0][i] - workspace->t[3][i]) + + // 10 * (-workspace->t[1][i] + workspace->t[2][i] ) + workspace->t[4][i]; + + workspace->s[4][i] = workspace->s[3][i]; + workspace->s[3][i] = workspace->s[2][i]; + workspace->s[2][i] = workspace->s[1][i]; + workspace->s[1][i] = workspace->s[0][i]; + workspace->s[0][i] = s_tmp; + + workspace->t[4][i] = workspace->t[3][i]; + workspace->t[3][i] = workspace->t[2][i]; + workspace->t[2][i] = workspace->t[1][i]; + workspace->t[1][i] = workspace->t[0][i]; + workspace->t[0][i] = t_tmp; + } +} + + +static void Extrapolate_Charges_EEM( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace ) +{ + int i; + real s_tmp; + + /* extrapolation for s */ + //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer + #pragma omp parallel for schedule(static) \ + default(none) private(i, s_tmp) + for ( i = 0; i < system->N_cm; ++i ) + { + // no extrapolation + //s_tmp = workspace->s[0][i]; + + // linear + //s_tmp = 2 * workspace->s[0][i] - workspace->s[1][i]; + + // quadratic + //s_tmp = workspace->s[2][i] + 3 * (workspace->s[0][i]-workspace->s[1][i]); + + // cubic + s_tmp = 4 * (workspace->s[0][i] + workspace->s[2][i]) - + (6 * workspace->s[1][i] + workspace->s[3][i] ); + + // 4th order + //s_tmp = 5 * (workspace->s[0][i] - workspace->s[3][i]) + + // 10 * (-workspace->s[1][i] + workspace->s[2][i] ) + workspace->s[4][i]; + + workspace->s[4][i] = workspace->s[3][i]; + workspace->s[3][i] = workspace->s[2][i]; + workspace->s[2][i] = workspace->s[1][i]; + workspace->s[1][i] = workspace->s[0][i]; + workspace->s[0][i] = s_tmp; + } +} + + /* Setup routine which performs the following: * 1) init storage for QEq matrices and other dependent routines * 2) compute preconditioner (if sim. step matches refactor step) * 3) extrapolate ficticious charges s and t */ -static void Init_MatVec( const reax_system * const system, const control_params * const control, - simulation_data * const data, static_storage * const workspace, const list * const far_nbrs ) +static void Init_Charges( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace, + const list * const far_nbrs ) { int i, fillin; real s_tmp, t_tmp, time; @@ -1379,9 +1472,12 @@ static void Init_MatVec( const reax_system * const system, const control_params #endif if (control->cm_solver_pre_comp_refactor > 0 && - ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 || workspace->L == NULL)) + ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 + || workspace->L == NULL)) { - //Print_Linear_System( system, control, workspace, data->step ); +// Print_Linear_System( system, control, workspace, data->step ); + Print_Sparse_Matrix2( workspace->H, "H_0.out" ); + exit(0); time = Get_Time( ); if ( control->cm_solver_pre_comp_type != DIAG_PC ) @@ -1406,7 +1502,7 @@ static void Init_MatVec( const reax_system * const system, const control_params Sort_Matrix_Rows( Hptr ); } } - data->timing.QEq_sort_mat_rows += Get_Timing_Info( time ); + data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); #if defined(DEBUG) fprintf( stderr, "H matrix sorted\n" ); @@ -1417,13 +1513,14 @@ static void Init_MatVec( const reax_system * const system, const control_params case DIAG_PC: if ( workspace->Hdia_inv == NULL ) { - if ( ( workspace->Hdia_inv = (real *) calloc( system->N, sizeof( real ) ) ) == NULL ) + if ( ( workspace->Hdia_inv = (real *) calloc( system->N_cm, sizeof( real ) ) ) == NULL ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); } } - data->timing.pre_comp += diag_pre_comp( system, workspace->Hdia_inv ); + + data->timing.cm_solver_pre_comp += diag_pre_comp( system, workspace->Hdia_inv ); break; case ICHOLT_PC: @@ -1450,7 +1547,8 @@ static void Init_MatVec( const reax_system * const system, const control_params #endif } - data->timing.pre_comp += ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); + data->timing.cm_solver_pre_comp += + ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); break; case ILU_PAR_PC: @@ -1465,11 +1563,13 @@ static void Init_MatVec( const reax_system * const system, const control_params } } - data->timing.pre_comp += ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U ); + data->timing.cm_solver_pre_comp += + ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U ); break; case ILUT_PAR_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); + #if defined(DEBUG_FOCUS) fprintf( stderr, "drop tolerances calculated\n" ); #endif @@ -1485,8 +1585,9 @@ static void Init_MatVec( const reax_system * const system, const control_params } } - data->timing.pre_comp += ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, - workspace->L, workspace->U ); + data->timing.cm_solver_pre_comp += + ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, + workspace->L, workspace->U ); break; case ILU_SUPERLU_MT_PC: @@ -1502,7 +1603,7 @@ static void Init_MatVec( const reax_system * const system, const control_params } #if defined(HAVE_SUPERLU_MT) - data->timing.pre_comp += SuperLU_Factorize( Hptr, workspace->L, workspace->U ); + data->timing.cm_solver_pre_comp += SuperLU_Factorize( Hptr, workspace->L, workspace->U ); #else fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" ); exit( INVALID_INPUT ); @@ -1531,68 +1632,26 @@ static void Init_MatVec( const reax_system * const system, const control_params //Print_Sparse_Matrix( U ); #endif } - - /* extrapolation for s & t */ - //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer - #pragma omp parallel for schedule(static) \ - default(none) private(i, s_tmp, t_tmp) - for ( i = 0; i < system->N; ++i ) - { - // no extrapolation - //s_tmp = workspace->s[0][i]; - //t_tmp = workspace->t[0][i]; - - // linear - //s_tmp = 2 * workspace->s[0][i] - workspace->s[1][i]; - //t_tmp = 2 * workspace->t[0][i] - workspace->t[1][i]; - - // quadratic - //s_tmp = workspace->s[2][i] + 3 * (workspace->s[0][i]-workspace->s[1][i]); - t_tmp = workspace->t[2][i] + 3 * (workspace->t[0][i] - workspace->t[1][i]); - - // cubic - s_tmp = 4 * (workspace->s[0][i] + workspace->s[2][i]) - - (6 * workspace->s[1][i] + workspace->s[3][i] ); - //t_tmp = 4 * (workspace->t[0][i] + workspace->t[2][i]) - - // (6 * workspace->t[1][i] + workspace->t[3][i] ); - - // 4th order - //s_tmp = 5 * (workspace->s[0][i] - workspace->s[3][i]) + - // 10 * (-workspace->s[1][i] + workspace->s[2][i] ) + workspace->s[4][i]; - //t_tmp = 5 * (workspace->t[0][i] - workspace->t[3][i]) + - // 10 * (-workspace->t[1][i] + workspace->t[2][i] ) + workspace->t[4][i]; - - workspace->s[4][i] = workspace->s[3][i]; - workspace->s[3][i] = workspace->s[2][i]; - workspace->s[2][i] = workspace->s[1][i]; - workspace->s[1][i] = workspace->s[0][i]; - workspace->s[0][i] = s_tmp; - - workspace->t[4][i] = workspace->t[3][i]; - workspace->t[3][i] = workspace->t[2][i]; - workspace->t[2][i] = workspace->t[1][i]; - workspace->t[1][i] = workspace->t[0][i]; - workspace->t[0][i] = t_tmp; - } } -/* Combine ficticious charges s and t to get atomic charge q +/* Combine ficticious charges s and t to get atomic charge q for QEq method */ -static void Calculate_Charges( const reax_system * const system, static_storage * const workspace ) +static void Calculate_Charges_QEq( const reax_system * const system, + static_storage * const workspace ) { int i; real u, s_sum, t_sum; s_sum = t_sum = 0.; - for ( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N_cm; ++i ) { s_sum += workspace->s[0][i]; t_sum += workspace->t[0][i]; } u = s_sum / t_sum; - for ( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N_cm; ++i ) { system->atoms[i].q = workspace->s[0][i] - u * workspace->t[0][i]; } @@ -1606,13 +1665,15 @@ static void Calculate_Charges( const reax_system * const system, static_storage * 2) perform 2 linear solves * 3) compute atomic charges based on output of 2) */ -void QEq( reax_system * const system, control_params * const control, simulation_data * const data, - static_storage * const workspace, const list * const far_nbrs, - const output_controls * const out_control ) +static void QEq( reax_system * const system, control_params * const control, + simulation_data * const data, static_storage * const workspace, + const list * const far_nbrs, const output_controls * const out_control ) { int iters; - Init_MatVec( system, control, data, workspace, far_nbrs ); + Init_Charges( system, control, data, workspace, far_nbrs ); + + Extrapolate_Charges_QEq( system, control, data, workspace ); // if( data->step == 0 || data->step == 100 ) // { @@ -1622,17 +1683,21 @@ void QEq( reax_system * const system, control_params * const control, simulation switch ( control->cm_solver_type ) { case GMRES_S: - iters = GMRES( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0], out_control->log, - ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); - iters += GMRES( workspace, control, data, workspace->H, workspace->b_t, control->cm_solver_q_err, - workspace->t[0], out_control->log, FALSE ); + iters = GMRES( workspace, control, data, workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + out_control->log, + ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); + iters += GMRES( workspace, control, data, workspace->H, + workspace->b_t, control->cm_solver_q_err, workspace->t[0], + out_control->log, FALSE ); break; case GMRES_H_S: - iters = GMRES_HouseHolder( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0], out_control->log, (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); - iters += GMRES_HouseHolder( workspace, control, data, workspace->H, workspace->b_t, control->cm_solver_q_err, - workspace->t[0], out_control->log, 0 ); + iters = GMRES_HouseHolder( workspace, control, data, workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + out_control->log, (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); + iters += GMRES_HouseHolder( workspace, control, data, workspace->H, + workspace->b_t, control->cm_solver_q_err, workspace->t[0], + out_control->log, 0 ); break; case CG_S: iters = CG( workspace, workspace->H, workspace->b_s, control->cm_solver_q_err, @@ -1664,13 +1729,13 @@ void QEq( reax_system * const system, control_params * const control, simulation break; } - data->timing.solver_iters += iters; + data->timing.cm_solver_iters += iters; #if defined(DEBUG_FOCUS) fprintf( stderr, "linsolve-" ); #endif - Calculate_Charges( system, workspace ); + Calculate_Charges_QEq( system, workspace ); //fprintf( stderr, "%d %.9f %.9f %.9f %.9f %.9f %.9f\n", // data->step, @@ -1680,3 +1745,104 @@ void QEq( reax_system * const system, control_params * const control, simulation // if( data->step == control->nsteps ) //Print_Charges( system, control, workspace, data->step ); } + + +/* Get atomic charge q for EEM method + */ +static void Calculate_Charges_EEM( const reax_system * const system, + static_storage * const workspace ) +{ + int i; +// real s_sum; + +// s_sum = 0.0; +// for ( i = 0; i < system->N_cm; ++i ) +// { +// s_sum += workspace->s[0][i]; +// } + + for ( i = 0; i < system->N_cm; ++i ) + { + system->atoms[i].q = workspace->s[0][i]; + } +} + + +/* Main driver method for EEM kernel + * + * Rough outline: + * 1) init / setup routines + * 2) perform 1 linear solve + */ +static void EEM( reax_system * const system, control_params * const control, + simulation_data * const data, static_storage * const workspace, + const list * const far_nbrs, const output_controls * const out_control ) +{ + int iters; + + Init_Charges( system, control, data, workspace, far_nbrs ); + + Extrapolate_Charges_EEM( system, control, data, workspace ); + + switch ( control->cm_solver_type ) + { + case GMRES_S: + iters = GMRES( workspace, control, data, workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + out_control->log, + ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); + break; + case GMRES_H_S: + iters = GMRES_HouseHolder( workspace, control, data,workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + out_control->log, + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); + break; + case CG_S: + iters = CG( workspace, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], out_control->log ) + 1; + break; + case SDM_S: + iters = SDM( workspace, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], out_control->log ) + 1; + break; + default: + fprintf( stderr, "Unrecognized QEq solver selection. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + data->timing.cm_solver_iters += iters; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "linsolve-" ); +#endif + + Calculate_Charges_EEM( system, workspace ); + + // if( data->step == control->nsteps ) + //Print_Charges( system, control, workspace, data->step ); +} + + +void Compute_Charges( reax_system * const system, control_params * const control, + simulation_data * const data, static_storage * const workspace, + const list * const far_nbrs, const output_controls * const out_control ) +{ + switch ( control->charge_method ) + { + case QEQ_CM: + QEq( system, control, data, workspace, far_nbrs, out_control ); + break; + case EEM_CM: + EEM( system, control, data, workspace, far_nbrs, out_control ); + break; + case ACKS2_CM: + //TODO + break; + default: + fprintf( stderr, "Invalid charge method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } +} diff --git a/sPuReMD/src/QEq.h b/sPuReMD/src/charges.h similarity index 88% rename from sPuReMD/src/QEq.h rename to sPuReMD/src/charges.h index c7186aaee64b35ab67a6fe590de525d99db08c32..50f563c3e94fc5e8a5c324d2d3467572c6ea930d 100644 --- a/sPuReMD/src/QEq.h +++ b/sPuReMD/src/charges.h @@ -19,12 +19,12 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#ifndef __QEq_H_ -#define __QEq_H_ +#ifndef __CHARGES_H_ +#define __CHARGES_H_ #include "mytypes.h" -void QEq( reax_system* const, control_params* const, simulation_data* const, +void Compute_Charges( reax_system* const, control_params* const, simulation_data* const, static_storage* const, const list* const, const output_controls* const ); diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c index a10ee4f206c92c2765e89dbc45b41276168f4730..e9470449fba1397482b82b91bef0f88f3a83d346 100644 --- a/sPuReMD/src/control.c +++ b/sPuReMD/src/control.c @@ -71,6 +71,7 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, control->tabulate = 0; control->charge_method = QEQ_CM; + control->cm_q_net = 0.0; control->cm_solver_type = GMRES_S; control->cm_solver_q_err = 0.000001; control->cm_domain_sparsify_enabled = FALSE; @@ -246,6 +247,11 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, ival = atoi( tmp[1] ); control->charge_method = ival; } + else if ( strcmp(tmp[0], "cm_q_net") == 0 ) + { + val = atof( tmp[1] ); + control->cm_q_net = val; + } else if ( strcmp(tmp[0], "cm_solver_type") == 0 ) { ival = atoi( tmp[1] ); diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c index 8c21776a7250e8fa428f6fdf6f33903f868c0a7f..91e3430f21245bf624e672be2b72dd44077c222f 100644 --- a/sPuReMD/src/forces.c +++ b/sPuReMD/src/forces.c @@ -29,7 +29,7 @@ #include "list.h" #include "print_utils.h" #include "system_props.h" -#include "QEq.h" +#include "charges.h" #include "vector.h" @@ -132,9 +132,10 @@ void Compute_NonBonded_Forces( reax_system *system, control_params *control, #endif t_start = Get_Time( ); - QEq( system, control, data, workspace, lists[FAR_NBRS], out_control ); + Compute_Charges( system, control, data, workspace, lists[FAR_NBRS], out_control ); t_elapsed = Get_Timing_Info( t_start ); - data->timing.QEq += t_elapsed; + data->timing.cm += t_elapsed; + #if defined(DEBUG_FOCUS) fprintf( stderr, "qeq - " ); #endif @@ -352,7 +353,7 @@ static inline real Init_Charge_Matrix_Entry( reax_system *system, control_params *control, int i, int j, real r_ij, MATRIX_ENTRY_POSITION pos ) { - real Tap, dr3gamij_1, dr3gamij_3, ret = 0.0; + real Tap, gamij, dr3gamij_1, dr3gamij_3, ret = 0.0; switch ( control->charge_method ) { @@ -368,15 +369,18 @@ static inline real Init_Charge_Matrix_Entry( reax_system *system, Tap = Tap * r_ij + control->Tap1; Tap = Tap * r_ij + control->Tap0; + /* shielding */ dr3gamij_1 = ( r_ij * r_ij * r_ij + system->reaxprm.tbp[system->atoms[i].type][system->atoms[j].type].gamma ); dr3gamij_3 = POW( dr3gamij_1 , 0.33333333333333 ); ret = ((i == j) ? 0.5 : 1.0) * Tap * EV_to_KCALpMOL / dr3gamij_3; break; + case DIAGONAL: ret = system->reaxprm.sbp[system->atoms[i].type].eta; break; + default: fprintf( stderr, "[Init_forces] Invalid matrix position. Terminating...\n" ); exit( INVALID_INPUT ); @@ -388,9 +392,35 @@ static inline real Init_Charge_Matrix_Entry( reax_system *system, switch ( pos ) { case OFF_DIAGONAL: + Tap = control->Tap7 * r_ij + control->Tap6; + Tap = Tap * r_ij + control->Tap5; + Tap = Tap * r_ij + control->Tap4; + Tap = Tap * r_ij + control->Tap3; + Tap = Tap * r_ij + control->Tap2; + Tap = Tap * r_ij + control->Tap1; + Tap = Tap * r_ij + control->Tap0; + +// gamij = system->reaxprm.sbp[system->atoms[i].type].gamma +// + system->reaxprm.sbp[system->atoms[j].type].gamma; +// gamij = SQRT( gamij ); +// dr3gamij_1 = ( r_ij * r_ij * r_ij + +// 1.0 / ( gamij * gamij * gamij ) ); +// dr3gamij_3 = POW( dr3gamij_1 , 0.33333333333333 ); +// +// ret = Tap * EV_to_KCALpMOL / dr3gamij_3; + + /* shielding */ + dr3gamij_1 = ( r_ij * r_ij * r_ij + + system->reaxprm.tbp[system->atoms[i].type][system->atoms[j].type].gamma ); + dr3gamij_3 = POW( dr3gamij_1 , 0.33333333333333 ); + + ret = ((i == j) ? 0.5 : 1.0) * Tap * EV_to_KCALpMOL / dr3gamij_3; break; + case DIAGONAL: + ret = 2.0 * system->reaxprm.sbp[system->atoms[i].type].eta; break; + default: fprintf( stderr, "[Init_forces] Invalid matrix position. Terminating...\n" ); exit( INVALID_INPUT ); @@ -423,6 +453,42 @@ static inline real Init_Charge_Matrix_Entry( reax_system *system, } +static void Init_Charge_Matrix_Remaining_Entries( reax_system *system, + control_params *control, sparse_matrix * H, sparse_matrix * H_sp, + int * Htop, int * H_sp_top ) +{ + int i; + + switch ( control->charge_method ) + { + case QEQ_CM: + break; + + case EEM_CM: + H->start[system->N] = *Htop; + H_sp->start[system->N] = *H_sp_top; + + for ( i = 0; i < system->N_cm; ++i ) + { + H->j[*Htop] = i; + H->val[*Htop] = 1.0; + ++(*Htop); + + H_sp->j[*H_sp_top] = i; + H_sp->val[*H_sp_top] = 1.0; + ++(*H_sp_top); + } + break; + + case ACKS2_CM: + break; + + default: + break; + } +} + + void Init_Forces( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control ) @@ -696,7 +762,9 @@ void Init_Forces( reax_system *system, control_params *control, Set_End_Index( i, btop_i, bonds ); if ( ihb == 1 ) + { Set_End_Index( workspace->hbond_index[i], ihb_top, hbonds ); + } //fprintf( stderr, "%d bonds start: %d, end: %d\n", // i, Start_Index( i, bonds ), End_Index( i, bonds ) ); } @@ -705,8 +773,10 @@ void Init_Forces( reax_system *system, control_params *control, // printf("H_sp_top = %d\n", H_sp_top); // mark the end of j list - H->start[i] = Htop; - H_sp->start[i] = H_sp_top; + Init_Charge_Matrix_Remaining_Entries( system, control, H, H_sp, &Htop, &H_sp_top ); + H->start[system->N_cm] = Htop; + H_sp->start[system->N_cm] = H_sp_top; + /* validate lists - decide if reallocation is required! */ Validate_Lists( workspace, lists, data->step, system->N, H->m, Htop, num_bonds, num_hbonds ); diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index af3e234b65c8cd763399646ba014a3bfcf0113f9..aed0674020ee136db3b978a641277826865764ad 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -209,15 +209,15 @@ void Init_Simulation_Data( reax_system *system, control_params *control, data->timing.init_forces = 0; data->timing.bonded = 0; data->timing.nonb = 0; - data->timing.QEq = ZERO; - data->timing.QEq_sort_mat_rows = ZERO; - data->timing.pre_comp = ZERO; - data->timing.pre_app = ZERO; - data->timing.solver_iters = 0; - data->timing.solver_spmv = ZERO; - data->timing.solver_vector_ops = ZERO; - data->timing.solver_orthog = ZERO; - data->timing.solver_tri_solve = ZERO; + data->timing.cm = ZERO; + data->timing.cm_sort_mat_rows = ZERO; + data->timing.cm_solver_pre_comp = ZERO; + data->timing.cm_solver_pre_app = ZERO; + data->timing.cm_solver_iters = 0; + data->timing.cm_solver_spmv = ZERO; + data->timing.cm_solver_vector_ops = ZERO; + data->timing.cm_solver_orthog = ZERO; + data->timing.cm_solver_tri_solve = ZERO; } @@ -231,8 +231,10 @@ void Init_Taper( control_params *control ) swa = control->r_low; swb = control->r_cut; - if ( fabs( swa ) > 0.01 ) + if ( FABS( swa ) > 0.01 ) + { fprintf( stderr, "Warning: non-zero value for lower Taper-radius cutoff\n" ); + } if ( swb < 0 ) { @@ -240,8 +242,10 @@ void Init_Taper( control_params *control ) exit( INVALID_INPUT ); } else if ( swb < 5 ) + { fprintf( stderr, "Warning: low value for upper Taper-radius cutoff:%f\n", - swb ); + swb ); + } d1 = swb - swa; d7 = POW( d1, 7.0 ); @@ -258,7 +262,7 @@ void Init_Taper( control_params *control ) control->Tap2 = -210.0 * (swa3 * swb2 + swa2 * swb3) / d7; control->Tap1 = 140.0 * swa3 * swb3 / d7; control->Tap0 = (-35.0 * swa3 * swb2 * swb2 + 21.0 * swa2 * swb3 * swb2 + - 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; + 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; } @@ -289,71 +293,136 @@ void Init_Workspace( reax_system *system, control_params *control, workspace->CdDelta = (real *) malloc( system->N * sizeof( real ) ); workspace->vlpex = (real *) malloc( system->N * sizeof( real ) ); - /* QEq storage */ - workspace->H = NULL; - workspace->H_sp = NULL; - workspace->L = NULL; - workspace->U = NULL; + /* charge method storage */ + switch ( control->charge_method ) + { + case QEQ_CM: + system->N_cm = system->N; + break; + case EEM_CM: + system->N_cm = system->N + 1; + break; + case ACKS2_CM: + system->N_cm = 2 * system->N + 2; + break; + default: + fprintf( stderr, "Unknown charge method type. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + workspace->H = NULL; + workspace->H_sp = NULL; + workspace->L = NULL; + workspace->U = NULL; workspace->Hdia_inv = NULL; - workspace->droptol = (real *) calloc( system->N, sizeof( real ) ); - workspace->w = (real *) calloc( system->N, sizeof( real ) ); - workspace->b = (real *) calloc( system->N * 2, sizeof( real ) ); - workspace->b_s = (real *) calloc( system->N, sizeof( real ) ); - workspace->b_t = (real *) calloc( system->N, sizeof( real ) ); - workspace->b_prc = (real *) calloc( system->N * 2, sizeof( real ) ); - workspace->b_prm = (real *) calloc( system->N * 2, sizeof( real ) ); - workspace->s_t = (real *) calloc( system->N * 2, sizeof( real ) ); + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + workspace->droptol = (real *) calloc( system->N_cm, sizeof( real ) ); + } + //TODO: check if unused + //workspace->w = (real *) calloc( cm_lin_sys_size, sizeof( real ) ); + //TODO: check if unused + workspace->b = (real *) calloc( system->N_cm * 2, sizeof( real ) ); + workspace->b_s = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->b_t = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->b_prc = (real *) calloc( system->N_cm * 2, sizeof( real ) ); + workspace->b_prm = (real *) calloc( system->N_cm * 2, sizeof( real ) ); + //TODO: check if unused + //workspace->s_t = (real *) calloc( cm_lin_sys_size * 2, sizeof( real ) ); workspace->s = (real**) calloc( 5, sizeof( real* ) ); workspace->t = (real**) calloc( 5, sizeof( real* ) ); for ( i = 0; i < 5; ++i ) { - workspace->s[i] = (real *) calloc( system->N, sizeof( real ) ); - workspace->t[i] = (real *) calloc( system->N, sizeof( real ) ); + workspace->s[i] = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->t[i] = (real *) calloc( system->N_cm, sizeof( real ) ); } - // workspace->s_old = (real *) calloc( system->N, sizeof( real ) ); - // workspace->t_old = (real *) calloc( system->N, sizeof( real ) ); - // workspace->s_oldest = (real *) calloc( system->N, sizeof( real ) ); - // workspace->t_oldest = (real *) calloc( system->N, sizeof( real ) ); - for ( i = 0; i < system->N; ++i ) + switch ( control->charge_method ) { - workspace->b_s[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; - workspace->b_t[i] = -1.0; - - workspace->b[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; - workspace->b[i + system->N] = -1.0; + case QEQ_CM: + for ( i = 0; i < system->N; ++i ) + { + workspace->b_s[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; + workspace->b_t[i] = -1.0; + + //TODO: check if unused (redundant) + workspace->b[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; + workspace->b[i + system->N] = -1.0; + } + break; + case EEM_CM: + for ( i = 0; i < system->N; ++i ) + { + workspace->b_s[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; + + //TODO: check if unused (redundant) + workspace->b[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; + } + + workspace->b_s[system->N] = control->cm_q_net; + workspace->b[system->N] = control->cm_q_net; + break; + case ACKS2_CM: + //TODO + break; + default: + fprintf( stderr, "Unknown charge method type. Terminating...\n" ); + exit( INVALID_INPUT ); + break; } - /* GMRES storage */ - workspace->y = (real *) calloc( RESTART + 1, sizeof( real ) ); - workspace->z = (real *) calloc( RESTART + 1, sizeof( real ) ); - workspace->g = (real *) calloc( RESTART + 1, sizeof( real ) ); - workspace->h = (real **) calloc( RESTART + 1, sizeof( real*) ); - workspace->hs = (real *) calloc( RESTART + 1, sizeof( real ) ); - workspace->hc = (real *) calloc( RESTART + 1, sizeof( real ) ); - workspace->rn = (real **) calloc( RESTART + 1, sizeof( real*) ); - workspace->v = (real **) calloc( RESTART + 1, sizeof( real*) ); - - for ( i = 0; i < RESTART + 1; ++i ) + switch ( control->cm_solver_type ) { - workspace->h[i] = (real *) calloc( RESTART + 1, sizeof( real ) ); - workspace->rn[i] = (real *) calloc( system->N * 2, sizeof( real ) ); - workspace->v[i] = (real *) calloc( system->N, sizeof( real ) ); + /* GMRES storage */ + case GMRES_S: + case GMRES_H_S: + workspace->y = (real *) calloc( RESTART + 1, sizeof( real ) ); + workspace->z = (real *) calloc( RESTART + 1, sizeof( real ) ); + workspace->g = (real *) calloc( RESTART + 1, sizeof( real ) ); + workspace->h = (real **) calloc( RESTART + 1, sizeof( real*) ); + workspace->hs = (real *) calloc( RESTART + 1, sizeof( real ) ); + workspace->hc = (real *) calloc( RESTART + 1, sizeof( real ) ); + workspace->rn = (real **) calloc( RESTART + 1, sizeof( real*) ); + workspace->v = (real **) calloc( RESTART + 1, sizeof( real*) ); + + for ( i = 0; i < RESTART + 1; ++i ) + { + workspace->h[i] = (real *) calloc( RESTART + 1, sizeof( real ) ); + workspace->rn[i] = (real *) calloc( system->N_cm * 2, sizeof( real ) ); + workspace->v[i] = (real *) calloc( system->N_cm, sizeof( real ) ); + } + + workspace->r = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->d = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->q = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->p = (real *) calloc( system->N_cm, sizeof( real ) ); + break; + + /* CG storage */ + case CG_S: + workspace->r = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->d = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->q = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->p = (real *) calloc( system->N_cm, sizeof( real ) ); + break; + + case SDM_S: + //TODO + break; + + default: + fprintf( stderr, "Unknown charge method linear solver type. Terminating...\n" ); + exit( INVALID_INPUT ); + break; } - /* CG storage */ - workspace->r = (real *) calloc( system->N, sizeof( real ) ); - workspace->d = (real *) calloc( system->N, sizeof( real ) ); - workspace->q = (real *) calloc( system->N, sizeof( real ) ); - workspace->p = (real *) calloc( system->N, sizeof( real ) ); - - /* integrator storage */ workspace->a = (rvec *) malloc( system->N * sizeof( rvec ) ); workspace->f_old = (rvec *) malloc( system->N * sizeof( rvec ) ); workspace->v_const = (rvec *) malloc( system->N * sizeof( rvec ) ); - /* storage for analysis */ if ( control->molec_anal || control->diffusion_coef ) { @@ -361,12 +430,18 @@ void Init_Workspace( reax_system *system, control_params *control, workspace->old_mark = (int *) calloc( system->N, sizeof(int) ); } else + { workspace->mark = workspace->old_mark = NULL; + } if ( control->diffusion_coef ) + { workspace->x_old = (rvec *) calloc( system->N, sizeof( rvec ) ); - else workspace->x_old = NULL; - + } + else + { + workspace->x_old = NULL; + } #ifdef TEST_FORCES workspace->dDelta = (rvec *) malloc( system->N * sizeof( rvec ) ); @@ -407,11 +482,13 @@ void Init_Lists( reax_system *system, control_params *control, int *hb_top, *bond_top; num_nbrs = Estimate_NumNeighbors( system, control, workspace, lists ); + if ( !Make_List(system->N, num_nbrs, TYP_FAR_NEIGHBOR, (*lists) + FAR_NBRS) ) { fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n"); exit( CANNOT_INITIALIZE ); } + #if defined(DEBUG_FOCUS) fprintf( stderr, "memory allocated: far_nbrs = %ldMB\n", num_nbrs * sizeof(far_neighbor_data) / (1024 * 1024) ); @@ -422,10 +499,10 @@ void Init_Lists( reax_system *system, control_params *control, hb_top = (int*) calloc( system->N, sizeof(int) ); bond_top = (int*) calloc( system->N, sizeof(int) ); num_3body = 0; - Estimate_Storage_Sizes( system, control, lists, - &Htop, hb_top, bond_top, &num_3body ); + Estimate_Storage_Sizes( system, control, lists, &Htop, + hb_top, bond_top, &num_3body ); - if ( Allocate_Matrix( &(workspace->H), system->N, Htop ) == FAILURE ) + if ( Allocate_Matrix( &(workspace->H), system->N_cm, Htop ) == FAILURE ) { fprintf( stderr, "Not enough space for init matrices. Terminating...\n" ); exit( INSUFFICIENT_MEMORY ); @@ -434,15 +511,16 @@ void Init_Lists( reax_system *system, control_params *control, * If so, need to refactor Estimate_Storage_Sizes * to use various cut-off distances as parameters * (non-bonded, hydrogen, 3body, etc.) */ - if ( Allocate_Matrix( &(workspace->H_sp), system->N, Htop ) == FAILURE ) + if ( Allocate_Matrix( &(workspace->H_sp), system->N_cm, Htop ) == FAILURE ) { fprintf( stderr, "Not enough space for init matrices. Terminating...\n" ); exit( INSUFFICIENT_MEMORY ); } + #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - Htop: %d\n", Htop ); fprintf( stderr, "memory allocated: H = %ldMB\n", - Htop * sizeof(sparse_matrix_entry) / (1024 * 1024) ); + Htop * sizeof(sparse_matrix_entry) / (1024 * 1024) ); #endif workspace->num_H = 0; @@ -450,13 +528,22 @@ void Init_Lists( reax_system *system, control_params *control, { /* init H indexes */ for ( i = 0; i < system->N; ++i ) - if ( system->reaxprm.sbp[ system->atoms[i].type ].p_hbond == 1 ) // H atom + { + // H atom + if ( system->reaxprm.sbp[ system->atoms[i].type ].p_hbond == 1 ) + { workspace->hbond_index[i] = workspace->num_H++; - else workspace->hbond_index[i] = -1; + } + else + { + workspace->hbond_index[i] = -1; + } + } Allocate_HBond_List( system->N, workspace->num_H, workspace->hbond_index, - hb_top, (*lists) + HBONDS ); + hb_top, (*lists) + HBONDS ); num_hbonds = hb_top[system->N - 1]; + #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - num_hbonds: %d\n", num_hbonds ); fprintf( stderr, "memory allocated: hbonds = %ldMB\n", @@ -467,6 +554,7 @@ void Init_Lists( reax_system *system, control_params *control, /* bonds list */ Allocate_Bond_List( system->N, bond_top, (*lists) + BONDS ); num_bonds = bond_top[system->N - 1]; + #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - num_bonds: %d\n", num_bonds ); fprintf( stderr, "memory allocated: bonds = %ldMB\n", @@ -483,11 +571,13 @@ void Init_Lists( reax_system *system, control_params *control, fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); exit( CANNOT_INITIALIZE ); } + #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - num_3body: %d\n", num_3body ); fprintf( stderr, "memory allocated: 3-body = %ldMB\n", num_3body * sizeof(three_body_interaction_data) / (1024 * 1024) ); #endif + #ifdef TEST_FORCES if (!Make_List( system->N, num_bonds * 8, TYP_DDELTA, (*lists) + DDELTA )) { @@ -548,7 +638,7 @@ void Init_Out_Controls(reax_system *system, control_params *control, out_control->log = fopen( temp, "w" ); fprintf( out_control->log, "%-6s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n", "step", "total", "neighbors", "init", "bonded", - "nonbonded", "QEq", "QEq Sort", "S iters", "Pre Comp", "Pre App", + "nonbonded", "CM", "CM Sort", "S iters", "Pre Comp", "Pre App", "S spmv", "S vec ops", "S orthog", "S tsolve" ); } diff --git a/sPuReMD/src/integrate.c b/sPuReMD/src/integrate.c index 142863cad18291a04f4f56fc364c73cf8fdabd3b..681ec92e97131544b616e7c71d4751effacbd59b 100644 --- a/sPuReMD/src/integrate.c +++ b/sPuReMD/src/integrate.c @@ -26,7 +26,7 @@ #include "grid.h" #include "neighbors.h" #include "print_utils.h" -#include "QEq.h" +#include "charges.h" #include "reset_utils.h" #include "restart.h" #include "system_props.h" @@ -483,7 +483,7 @@ void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system, fprintf(out_control->log, "nbrs-"); fflush( out_control->log ); - /* QEq( system, control, workspace, lists[FAR_NBRS], out_control ); + /* Compute_Charges( system, control, workspace, lists[FAR_NBRS], out_control ); fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */ Compute_Forces( system, control, data, workspace, lists, out_control ); @@ -589,7 +589,7 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system, fprintf(out_control->log, "nbrs-"); fflush( out_control->log ); - /* QEq( system, control, workspace, lists[FAR_NBRS], out_control ); + /* Compute_Charges( system, control, workspace, lists[FAR_NBRS], out_control ); fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */ Compute_Forces( system, control, data, workspace, lists, out_control ); diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index 4d37ae6f55c99c507e1c239dd8f20f59c3c11a75..727de37e4b78fb4b3d27820faa82a2828b440e66 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -1200,7 +1200,7 @@ int GMRES( const static_storage * const workspace, const control_params * const bnorm = Norm( b, N ); #pragma omp master { - data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); } if ( control->cm_solver_pre_comp_type == DIAG_PC ) @@ -1213,7 +1213,7 @@ int GMRES( const static_storage * const workspace, const control_params * const apply_preconditioner( workspace, control, b, workspace->b_prc, fresh_pre ); #pragma omp master { - data->timing.pre_app += Get_Timing_Info( time_start ); + data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); } } @@ -1228,7 +1228,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Sparse_MatVec( H, x, workspace->b_prm ); #pragma omp master { - data->timing.solver_spmv += Get_Timing_Info( time_start ); + data->timing.cm_solver_spmv += Get_Timing_Info( time_start ); } if ( control->cm_solver_pre_comp_type == DIAG_PC ) @@ -1240,7 +1240,7 @@ int GMRES( const static_storage * const workspace, const control_params * const apply_preconditioner( workspace, control, workspace->b_prm, workspace->b_prm, FALSE ); #pragma omp master { - data->timing.pre_app += Get_Timing_Info( time_start ); + data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); } } @@ -1253,7 +1253,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Vector_Sum( workspace->v[0], 1., workspace->b_prc, -1., workspace->b_prm, N ); #pragma omp master { - data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); } } else @@ -1265,7 +1265,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N ); #pragma omp master { - data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); } } @@ -1279,7 +1279,7 @@ int GMRES( const static_storage * const workspace, const control_params * const itr == 0 ? fresh_pre : FALSE ); #pragma omp master { - data->timing.pre_app += Get_Timing_Info( time_start ); + data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); } } @@ -1295,7 +1295,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N ); #pragma omp master { - data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); } /* GMRES inner-loop */ @@ -1309,7 +1309,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] ); #pragma omp master { - data->timing.solver_spmv += Get_Timing_Info( time_start ); + data->timing.cm_solver_spmv += Get_Timing_Info( time_start ); } #pragma omp master @@ -1319,7 +1319,7 @@ int GMRES( const static_storage * const workspace, const control_params * const apply_preconditioner( workspace, control, workspace->v[j + 1], workspace->v[j + 1], FALSE ); #pragma omp master { - data->timing.pre_app += Get_Timing_Info( time_start ); + data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); } if ( control->cm_solver_pre_comp_type == DIAG_PC ) @@ -1336,7 +1336,7 @@ int GMRES( const static_storage * const workspace, const control_params * const } #pragma omp master { - data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); } } else @@ -1363,7 +1363,7 @@ int GMRES( const static_storage * const workspace, const control_params * const } #pragma omp master { - data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); } } @@ -1380,7 +1380,7 @@ int GMRES( const static_storage * const workspace, const control_params * const 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); #pragma omp master { - data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); } #if defined(DEBUG) fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); @@ -1438,7 +1438,7 @@ int GMRES( const static_storage * const workspace, const control_params * const tmp2 = -workspace->hs[j] * workspace->g[j]; workspace->g[j] = tmp1; workspace->g[j + 1] = tmp2; - data->timing.solver_orthog += Get_Timing_Info( time_start ); + data->timing.cm_solver_orthog += Get_Timing_Info( time_start ); } #pragma omp barrier @@ -1464,7 +1464,7 @@ int GMRES( const static_storage * const workspace, const control_params * const workspace->y[i] = temp / workspace->h[i][i]; } - data->timing.solver_tri_solve += Get_Timing_Info( time_start ); + data->timing.cm_solver_tri_solve += Get_Timing_Info( time_start ); /* update x = x_0 + Vy */ time_start = Get_Time( ); @@ -1478,7 +1478,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Vector_Add( x, 1., workspace->p, N ); #pragma omp master { - data->timing.solver_vector_ops += Get_Timing_Info( time_start ); + data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); } /* stopping condition */ diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index 23653bb3445c683cb62d501a781d9ae07384de35..a6e5a1666308225f2a0980f4e50e9e02357ad5c7 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -215,7 +215,7 @@ enum pre_app }; -/* Global params mapping */ +/* Force field global params mapping */ /* l[0] = p_boc1 l[1] = p_boc2 @@ -456,10 +456,17 @@ typedef struct typedef struct { + /* number of atoms */ int N; + /* dimension of the N x N sparse charge method matrix H */ + int N_cm; + /* atom info */ reax_atom *atoms; + /* atomic interaction parameters */ reax_interaction reaxprm; + /* simulation space (a.k.a. box) parameters */ simulation_box box; + /* grid structure used for binning atoms and tracking neighboring bins */ grid g; } reax_system; @@ -521,6 +528,7 @@ typedef struct unsigned int charge_method; unsigned int cm_solver_type; + real cm_q_net; real cm_solver_q_err; real cm_domain_sparsity; unsigned int cm_domain_sparsify_enabled; @@ -589,15 +597,15 @@ typedef struct real init_forces; real bonded; real nonb; - real QEq; - real QEq_sort_mat_rows; - real pre_comp; - real pre_app; - int solver_iters; - real solver_spmv; - real solver_vector_ops; - real solver_orthog; - real solver_tri_solve; + real cm; + real cm_sort_mat_rows; + real cm_solver_pre_comp; + real cm_solver_pre_app; + int cm_solver_iters; + real cm_solver_spmv; + real cm_solver_vector_ops; + real cm_solver_orthog; + real cm_solver_tri_solve; } reax_timing; diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c index 579ba6290a5c9812cd8b1342f96f959f597d6709..015e25731801c993fabbf2765140a2fd420016e7 100644 --- a/sPuReMD/src/print_utils.c +++ b/sPuReMD/src/print_utils.c @@ -622,30 +622,30 @@ void Output_Results( reax_system *system, control_params *control, data->timing.init_forces / f_update, data->timing.bonded / f_update, data->timing.nonb / f_update, - data->timing.QEq / f_update, - data->timing.QEq_sort_mat_rows / f_update, - (double)data->timing.solver_iters / f_update, - data->timing.pre_comp / f_update, - data->timing.pre_app / f_update, - data->timing.solver_spmv / f_update, - data->timing.solver_vector_ops / f_update, - data->timing.solver_orthog / f_update, - data->timing.solver_tri_solve / f_update ); + data->timing.cm / f_update, + data->timing.cm_sort_mat_rows / f_update, + (double)data->timing.cm_solver_iters / f_update, + data->timing.cm_solver_pre_comp / f_update, + data->timing.cm_solver_pre_app / f_update, + data->timing.cm_solver_spmv / f_update, + data->timing.cm_solver_vector_ops / f_update, + data->timing.cm_solver_orthog / f_update, + data->timing.cm_solver_tri_solve / f_update ); data->timing.total = Get_Time( ); data->timing.nbrs = 0; data->timing.init_forces = 0; data->timing.bonded = 0; data->timing.nonb = 0; - data->timing.QEq = ZERO; - data->timing.QEq_sort_mat_rows = ZERO; - data->timing.pre_comp = ZERO; - data->timing.pre_app = ZERO; - data->timing.solver_iters = 0; - data->timing.solver_spmv = ZERO; - data->timing.solver_vector_ops = ZERO; - data->timing.solver_orthog = ZERO; - data->timing.solver_tri_solve = ZERO; + data->timing.cm = ZERO; + data->timing.cm_sort_mat_rows = ZERO; + data->timing.cm_solver_pre_comp = ZERO; + data->timing.cm_solver_pre_app = ZERO; + data->timing.cm_solver_iters = 0; + data->timing.cm_solver_spmv = ZERO; + data->timing.cm_solver_vector_ops = ZERO; + data->timing.cm_solver_orthog = ZERO; + data->timing.cm_solver_tri_solve = ZERO; fflush( out_control->out ); fflush( out_control->pot ); @@ -694,17 +694,17 @@ void Output_Results( reax_system *system, control_params *control, void Print_Linear_System( reax_system *system, control_params *control, - static_storage *workspace, int step ) + static_storage *workspace, int step ) { - int i, j; - char fname[100]; + int i, j; + char fname[100]; sparse_matrix *H; FILE *out; sprintf( fname, "%s.state%d.out", control->sim_name, step ); out = fopen( fname, "w" ); - for ( i = 0; i < system->N; i++ ) + for ( i = 0; i < system->N_cm; i++ ) fprintf( out, "%6d%2d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", workspace->orig_id[i], system->atoms[i].type, system->atoms[i].x[0], system->atoms[i].x[1], @@ -719,12 +719,11 @@ void Print_Linear_System( reax_system *system, control_params *control, // fprintf( out, "%g\n", workspace->s_t[i+system->N] ); // fclose( out ); - sprintf( fname, "%s.H%d.out", control->sim_name, step ); out = fopen( fname, "w" ); H = workspace->H; - for ( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N_cm; ++i ) { for ( j = H->start[i]; j < H->start[i + 1] - 1; ++j ) { @@ -747,7 +746,7 @@ void Print_Linear_System( reax_system *system, control_params *control, out = fopen( fname, "w" ); H = workspace->H_sp; - for ( i = 0; i < system->N; ++i ) + for ( i = 0; i < system->N_cm; ++i ) { for ( j = H->start[i]; j < H->start[i + 1] - 1; ++j ) { diff --git a/sPuReMD/src/testmd.c b/sPuReMD/src/testmd.c index c4c6645375434287bc670b0317ed116254502d46..075a49b1910787a0eadf0c9e9d1bbf133f3f5869 100644 --- a/sPuReMD/src/testmd.c +++ b/sPuReMD/src/testmd.c @@ -47,7 +47,7 @@ static void Post_Evolve( reax_system * const system, /* if velocity dependent force then { Generate_Neighbor_Lists( &system, &control, &lists ); - QEq(system, control, workspace, lists[FAR_NBRS]); + Compute_Charges(system, control, workspace, lists[FAR_NBRS]); Introduce compute_force here if we are using velocity dependent forces Compute_Forces(system,control,data,workspace,lists); } */