From 21aca7ac2c19d68959748bac2e6343ef62dfaef2 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@cse.msu.edu> Date: Sat, 25 Feb 2017 18:34:26 -0500 Subject: [PATCH] Complete ACKS2 implementation. Fix ILU_PAR/ILUT_PAR diagonal scalings bugs (absolute value). --- sPuReMD/src/allocate.c | 4 +- sPuReMD/src/charges.c | 426 +++++++++++++++++++++++++++++++++----- sPuReMD/src/control.c | 5 +- sPuReMD/src/ffield.c | 1 + sPuReMD/src/forces.c | 158 +++++++++++++- sPuReMD/src/init_md.c | 41 +++- sPuReMD/src/mytypes.h | 3 +- sPuReMD/src/print_utils.c | 2 +- sPuReMD/src/testmd.c | 7 +- 9 files changed, 581 insertions(+), 66 deletions(-) diff --git a/sPuReMD/src/allocate.c b/sPuReMD/src/allocate.c index ed2c07a4..ab075f94 100644 --- a/sPuReMD/src/allocate.c +++ b/sPuReMD/src/allocate.c @@ -87,8 +87,8 @@ int Allocate_Matrix( sparse_matrix **pH, int n, int m ) H->n = n; H->m = m; - if ( (H->start = (unsigned int*) malloc(sizeof(int) * (n + 1))) == NULL - || (H->j = (unsigned int*) malloc(sizeof(int) * m)) == NULL + if ( (H->start = (unsigned int*) malloc(sizeof(unsigned int) * (n + 1))) == NULL + || (H->j = (unsigned int*) malloc(sizeof(unsigned int) * m)) == NULL || (H->val = (real*) malloc(sizeof(real) * m)) == NULL ) { return FAILURE; diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c index 2f6c5710..ecd2827e 100644 --- a/sPuReMD/src/charges.c +++ b/sPuReMD/src/charges.c @@ -582,12 +582,18 @@ static real diag_pre_comp( const reax_system * const system, real * const Hdia_i start = Get_Time( ); #pragma omp parallel for schedule(static) \ - default(none) private(i) + default(none) private(i) for ( i = 0; i < system->N; ++i ) { Hdia_inv[i] = 1.0 / system->reaxprm.sbp[system->atoms[i].type].eta; } - Hdia_inv[system->N_cm - 1] = 1.0; + + #pragma omp parallel for schedule(static) \ + default(none) private(i) + for ( i = system->N; i < system->N_cm; ++i ) + { + Hdia_inv[i] = 1.0; + } return Get_Timing_Info( start ); } @@ -595,21 +601,21 @@ static real diag_pre_comp( const reax_system * const system, real * const Hdia_i /* Incomplete Cholesky factorization with dual thresholding */ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, - sparse_matrix * const L, sparse_matrix * const U ) + sparse_matrix * const L, sparse_matrix * const U ) { int *tmp_j; real *tmp_val; int i, j, pj, k1, k2, tmptop, Ltop; real val, start; - int *Utop; + unsigned int *Utop; start = Get_Time( ); - if ( ( Utop = (int*) malloc((A->n + 1) * sizeof(int)) ) == NULL || + if ( ( Utop = (unsigned int*) malloc((A->n + 1) * sizeof(unsigned int)) ) == NULL || ( tmp_j = (int*) malloc(A->n * sizeof(int)) ) == NULL || ( tmp_val = (real*) malloc(A->n * sizeof(real)) ) == NULL ) { - fprintf( stderr, "not enough memory for ICHOLT preconditioning matrices. terminating.\n" ); + fprintf( stderr, "[ICHOLT] Not enough memory for preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); } @@ -620,9 +626,6 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, memset( U->start, 0, (A->n + 1) * sizeof(unsigned int) ); memset( Utop, 0, A->n * sizeof(unsigned int) ); -// fprintf( stderr, "n: %d\n", A->n ); -// fflush( stderr ); - for ( i = 0; i < A->n; ++i ) { L->start[i] = Ltop; @@ -633,9 +636,6 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, j = A->j[pj]; val = A->val[pj]; -// fprintf( stderr, " i: %d, j: %d\n", i, j ); -// fflush( stderr ); - if ( FABS(val) > droptol[i] ) { k1 = 0; @@ -664,15 +664,12 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, tmp_val[tmptop] = val; ++tmptop; } - -// fprintf( stderr, " -- done\n" ); -// fflush( stderr ); } // sanity check if ( A->j[pj] != i ) { - fprintf( stderr, "i=%d, badly built A matrix!\n", i ); + fprintf( stderr, "[ICHOLT] badly built A matrix!\n (i = %d) ", i ); exit( NUMERIC_BREAKDOWN ); } @@ -683,8 +680,17 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, val -= (tmp_val[k1] * tmp_val[k1]); } +#if defined(DEBUG) + if ( val < 0.0 ) + { + fprintf( stderr, "[ICHOLT] Numeric breakdown (SQRT of negative on diagonal i = %d). Terminating.\n", i ); + exit( NUMERIC_BREAKDOWN ); + + } +#endif + tmp_j[tmptop] = i; - tmp_val[tmptop] = SQRT(val); + tmp_val[tmptop] = SQRT( val ); // apply the dropping rule once again //fprintf( stderr, "row%d: tmptop: %d\n", i, tmptop ); @@ -854,7 +860,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, /* sanity check */ if ( sum < ZERO ) { - fprintf( stderr, "Numeric breakdown in ICHOL Terminating.\n"); + fprintf( stderr, "Numeric breakdown in ICHOL_PAR. Terminating.\n"); #if defined(DEBUG_FOCUS) fprintf( stderr, "A(%5d,%5d) = %10.3f\n", k - 1, A->entries[j].j, A->entries[j].val ); @@ -929,7 +935,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL ) { - fprintf( stderr, "not enough memory for ILU_PAR preconditioning matrices. terminating.\n" ); + fprintf( stderr, "[ILU_PAR] Not enough memory for preconditioning matrices. Terminating.\n" ); exit( INSUFFICIENT_MEMORY ); } @@ -937,12 +943,13 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, default(none) shared(D, D_inv) private(i) for ( i = 0; i < A->n; ++i ) { - D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); + D_inv[i] = SQRT( FABS( A->val[A->start[i + 1] - 1] ) ); D[i] = 1.0 / D_inv[i]; +// printf( "A->val[%8d] = %f, D[%4d] = %f, D_inv[%4d] = %f\n", A->start[i + 1] - 1, A->val[A->start[i + 1] - 1], i, D[i], i, D_inv[i] ); } /* to get convergence, A must have unit diagonal, so apply - * transformation DAD, where D = D(1./sqrt(D(A))) */ + * transformation DAD, where D = D(1./sqrt(abs(D(A)))) */ memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); #pragma omp parallel for schedule(static) \ default(none) shared(DAD, D) private(i, pj) @@ -1146,7 +1153,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, default(none) shared(D, D_inv) private(i) for ( i = 0; i < A->n; ++i ) { - D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); + D_inv[i] = SQRT( FABS( A->val[A->start[i + 1] - 1] ) ); D[i] = 1.0 / D_inv[i]; } @@ -1461,7 +1468,7 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, { sparse_matrix *Hptr; - if ( control->cm_domain_sparsify_enabled ) + if ( control->cm_domain_sparsify_enabled == TRUE ) { Hptr = workspace->H_sp; } @@ -1611,9 +1618,9 @@ static void Compute_Preconditioner_EEM( const reax_system * const system, switch ( control->cm_solver_pre_app_type ) { case TRI_SOLVE_PA: - tri_solve( workspace->L_EEM, ones, x, system->N, LOWER ); + tri_solve( workspace->L_EEM, ones, x, workspace->L_EEM->n, LOWER ); Transpose_I( workspace->U_EEM ); - tri_solve( workspace->U_EEM, ones, y, system->N, LOWER ); + tri_solve( workspace->U_EEM, ones, y, workspace->U_EEM->n, LOWER ); Transpose_I( workspace->U_EEM ); memcpy( workspace->L->start, workspace->L_EEM->start, sizeof(unsigned int) * (system->N + 1) ); @@ -1659,9 +1666,9 @@ static void Compute_Preconditioner_EEM( const reax_system * const system, break; case TRI_SOLVE_LEVEL_SCHED_PA: - tri_solve_level_sched( workspace->L_EEM, ones, x, system->N, LOWER, TRUE ); + tri_solve_level_sched( workspace->L_EEM, ones, x, workspace->L_EEM->n, LOWER, TRUE ); Transpose_I( workspace->U_EEM ); - tri_solve_level_sched( workspace->U_EEM, ones, y, system->N, LOWER, TRUE ); + tri_solve_level_sched( workspace->U_EEM, ones, y, workspace->U_EEM->n, LOWER, TRUE ); Transpose_I( workspace->U_EEM ); memcpy( workspace->L->start, workspace->L_EEM->start, sizeof(unsigned int) * (system->N + 1) ); @@ -1708,9 +1715,9 @@ static void Compute_Preconditioner_EEM( const reax_system * const system, //TODO: add Jacobi iter, etc.? default: - tri_solve( workspace->L_EEM, ones, x, system->N, LOWER ); + tri_solve( workspace->L_EEM, ones, x, workspace->L_EEM->n, LOWER ); Transpose_I( workspace->U_EEM ); - tri_solve( workspace->U_EEM, ones, y, system->N, LOWER ); + tri_solve( workspace->U_EEM, ones, y, workspace->U_EEM->n, LOWER ); Transpose_I( workspace->U_EEM ); memcpy( workspace->L->start, workspace->L_EEM->start, sizeof(unsigned int) * (system->N + 1) ); @@ -1778,6 +1785,99 @@ static void Compute_Preconditioner_EEM( const reax_system * const system, } +/* Compute preconditioner for ACKS2 + */ +static void Compute_Preconditioner_ACKS2( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace, + const list * const far_nbrs ) +{ + sparse_matrix *Hptr; + + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + +#if defined(TEST_MAT) + Hptr = create_test_mat( ); +#endif + +// fprintf( stderr, "(%4d, %4d): %f\n", system->N, Hptr->j[Hptr->start[system->N + 1] - 1], Hptr->val[Hptr->start[system->N + 1] - 1] ); +// fprintf( stderr, "(%4d, %4d): %f\n", system->N_cm - 1, Hptr->j[Hptr->start[system->N_cm] - 1], Hptr->val[Hptr->start[system->N_cm] - 1] ); + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; +// fprintf( stderr, "(%4d, %4d): %f\n", system->N, Hptr->j[Hptr->start[system->N + 1] - 1], Hptr->val[Hptr->start[system->N + 1] - 1] ); +// fprintf( stderr, "(%4d, %4d): %f\n", system->N_cm - 1, Hptr->j[Hptr->start[system->N_cm] - 1], Hptr->val[Hptr->start[system->N_cm] - 1] ); +// fflush( stderr ); + + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + data->timing.cm_solver_pre_comp += + diag_pre_comp( system, workspace->Hdia_inv ); + break; + + case ICHOLT_PC: + data->timing.cm_solver_pre_comp += + ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); + break; + + case ILU_PAR_PC: + data->timing.cm_solver_pre_comp += + ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U ); + break; + + case ILUT_PAR_PC: + data->timing.cm_solver_pre_comp += + ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps, + workspace->L, workspace->U ); + break; + + case ILU_SUPERLU_MT_PC: +#if defined(HAVE_SUPERLU_MT) + data->timing.cm_solver_pre_comp += + SuperLU_Factorize( Hptr, workspace->L, workspace->U ); +#else + fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" ); + exit( INVALID_INPUT ); +#endif + break; + + default: + fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; + +#if defined(DEBUG) + if ( control->cm_solver_pre_comp_type != DIAG_PC ) + { + fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); + +#if defined(DEBUG_FOCUS) + sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->L, fname ); + sprintf( fname, "%s.U%d.out", control->sim_name, data->step ); + Print_Sparse_Matrix2( workspace->U, fname ); + + fprintf( stderr, "icholt-" ); + //sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); + //Print_Sparse_Matrix2( workspace->L, fname ); + //Print_Sparse_Matrix( U ); +#endif + } +#endif +} + + /* Setup routines before computing the preconditioner for QEq */ static void Setup_Preconditioner_QEq( const reax_system * const system, @@ -1786,10 +1886,10 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, const list * const far_nbrs ) { int i, fillin; - real s_tmp, t_tmp, time; + real time; sparse_matrix *Hptr; - if (control->cm_domain_sparsify_enabled) + if ( control->cm_domain_sparsify_enabled == TRUE ) { Hptr = workspace->H_sp; } @@ -1830,7 +1930,7 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, case DIAG_PC: if ( workspace->Hdia_inv == NULL ) { - if ( ( workspace->Hdia_inv = (real *) calloc( system->N_cm, sizeof( real ) ) ) == NULL ) + if ( ( workspace->Hdia_inv = (real *) calloc( Hptr->n, sizeof( real ) ) ) == NULL ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -1850,13 +1950,13 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, #if defined(DEBUG) fprintf( stderr, "fillin = %d\n", fillin ); fprintf( stderr, "allocated memory: L = U = %ldMB\n", - fillin * sizeof(sparse_matrix_entry) / (1024 * 1024) ); + fillin * (sizeof(real) + sizeof(unsigned int)) / (1024 * 1024) ); #endif if ( workspace->L == NULL ) { - if ( Allocate_Matrix( &(workspace->L), far_nbrs->n, fillin ) == FAILURE || - Allocate_Matrix( &(workspace->U), far_nbrs->n, fillin ) == FAILURE ) + if ( Allocate_Matrix( &(workspace->L), Hptr->n, fillin ) == FAILURE || + Allocate_Matrix( &(workspace->U), Hptr->n, fillin ) == FAILURE ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -1942,10 +2042,10 @@ static void Setup_Preconditioner_EEM( const reax_system * const system, const list * const far_nbrs ) { int i, fillin; - real s_tmp, t_tmp, time; + real time; sparse_matrix *Hptr; - if (control->cm_domain_sparsify_enabled) + if ( control->cm_domain_sparsify_enabled == TRUE ) { Hptr = workspace->H_sp; } @@ -2025,7 +2125,7 @@ static void Setup_Preconditioner_EEM( const reax_system * const system, #if defined(DEBUG) fprintf( stderr, "fillin = %d\n", fillin ); fprintf( stderr, "allocated memory: L = U = %ldMB\n", - fillin * sizeof(sparse_matrix_entry) / (1024 * 1024) ); + fillin * (sizeof(real) + sizeof(unsigned int)) / (1024 * 1024) ); #endif if ( workspace->L == NULL ) @@ -2117,6 +2217,167 @@ static void Setup_Preconditioner_EEM( const reax_system * const system, } +/* Setup routines before computing the preconditioner for ACKS2 + */ +static void Setup_Preconditioner_ACKS2( const reax_system * const system, + const control_params * const control, + simulation_data * const data, static_storage * const workspace, + const list * const far_nbrs ) +{ + int i, fillin; + real time; + sparse_matrix *Hptr; + + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + + /* sort H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */ + time = Get_Time( ); + Sort_Matrix_Rows( workspace->H ); + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Sort_Matrix_Rows( workspace->H_sp ); + } + + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; + + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = setup_graph_coloring( workspace->H_sp ); + } + else + { + Hptr = setup_graph_coloring( workspace->H ); + } + + Sort_Matrix_Rows( Hptr ); + } + data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); + +#if defined(DEBUG) + fprintf( stderr, "H matrix sorted\n" ); +#endif + + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + if ( workspace->Hdia_inv == NULL ) + { + if ( ( workspace->Hdia_inv = (real *) calloc( Hptr->n, sizeof( real ) ) ) == NULL ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + break; + + case ICHOLT_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "drop tolerances calculated\n" ); +#endif + + fillin = Estimate_LU_Fill( Hptr, workspace->droptol ); + +#if defined(DEBUG) + fprintf( stderr, "fillin = %d\n", fillin ); + fprintf( stderr, "allocated memory: L = U = %ldMB\n", + fillin * (sizeof(real) + sizeof(unsigned int)) / (1024 * 1024) ); +#endif + + if ( workspace->L == NULL ) + { + if ( Allocate_Matrix( &(workspace->L), Hptr->n, fillin ) == FAILURE || + Allocate_Matrix( &(workspace->U), Hptr->n, fillin ) == FAILURE ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + else + { + //TODO: reallocate + } + break; + + case ILU_PAR_PC: + if ( workspace->L == NULL ) + { + /* factors have sparsity pattern as H */ + if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == FAILURE || + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + else + { + //TODO: reallocate + } + break; + + case ILUT_PAR_PC: + Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "drop tolerances calculated\n" ); +#endif + + if ( workspace->L == NULL ) + { + /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */ + if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == FAILURE || + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + else + { + //TODO: reallocate + } + break; + + case ILU_SUPERLU_MT_PC: + if ( workspace->L == NULL ) + { + /* factors have sparsity pattern as H */ + if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == FAILURE || + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + else + { + //TODO: reallocate + } + break; + + default: + fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; +} + + /* Combine ficticious charges s and t to get atomic charge q for QEq method */ static void Calculate_Charges_QEq( const reax_system * const system, @@ -2147,7 +2408,7 @@ static void Calculate_Charges_EEM( const reax_system * const system, { int i; - for ( i = 0; i < system->N_cm; ++i ) + for ( i = 0; i < system->N; ++i ) { system->atoms[i].q = workspace->s[0][i]; } @@ -2280,13 +2541,6 @@ static void EEM( reax_system * const system, control_params * const control, } // Print_Linear_System( system, control, workspace, data->step ); -// Print_Sparse_Matrix2( workspace->H, "H_0.out" ); -// Print_Sparse_Matrix2( workspace->L, "L_0.out" ); -// Print_Sparse_Matrix2( workspace->U, "U_0.out" ); -// Print_Sparse_Matrix2( workspace->H_EEM, "H_EEM_0.out" ); -// Print_Sparse_Matrix2( workspace->L_EEM, "L_EEM_0.out" ); -// Print_Sparse_Matrix2( workspace->U_EEM, "U_EEM_0.out" ); -// exit(0); Extrapolate_Charges_EEM( system, control, data, workspace ); @@ -2335,10 +2589,88 @@ static void EEM( reax_system * const system, control_params * const control, } +/* Main driver method for ACKS2 kernel + * + * Rough outline: + * 1) init / setup routines for preconditioning of linear solver + * 2) compute preconditioner + * 3) extrapolate charges + * 4) perform 1 linear solve + * 5) compute atomic charges based on output of (4) + */ +static void ACKS2( reax_system * const system, control_params * const control, + simulation_data * const data, static_storage * const workspace, + const list * const far_nbrs, const output_controls * const out_control ) +{ + int iters; + + if ( control->cm_solver_pre_comp_refactor > 0 && + ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ) + { + Setup_Preconditioner_ACKS2( system, control, data, workspace, far_nbrs ); + + Compute_Preconditioner_ACKS2( system, control, data, workspace, far_nbrs ); + } + +// Print_Linear_System( system, control, workspace, data->step ); +// Print_Sparse_Matrix2( workspace->H, "H_0.out" ); +// Print_Sparse_Matrix2( workspace->L, "L_0.out" ); +// Print_Sparse_Matrix2( workspace->U, "U_0.out" ); +// FILE * fp = fopen( "b.out", "w" ); +// Vector_Print( fp, "b.out", workspace->b_s, system->N_cm ); +// fclose( fp ); +// exit(0); + + Extrapolate_Charges_EEM( system, control, data, workspace ); + + switch ( control->cm_solver_type ) + { + case GMRES_S: + iters = GMRES( workspace, control, data, workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + out_control->log, + ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); + break; + + case GMRES_H_S: + iters = GMRES_HouseHolder( workspace, control, data,workspace->H, + workspace->b_s, control->cm_solver_q_err, workspace->s[0], + out_control->log, + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); + break; + + case CG_S: + iters = CG( workspace, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], out_control->log ) + 1; + break; + + case SDM_S: + iters = SDM( workspace, workspace->H, workspace->b_s, control->cm_solver_q_err, + workspace->s[0], out_control->log ) + 1; + break; + + default: + fprintf( stderr, "Unrecognized ACKS2 solver selection. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + + data->timing.cm_solver_iters += iters; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "linsolve-" ); +#endif + + Calculate_Charges_EEM( system, workspace ); +} + + void Compute_Charges( reax_system * const system, control_params * const control, simulation_data * const data, static_storage * const workspace, const list * const far_nbrs, const output_controls * const out_control ) { + int i; + switch ( control->charge_method ) { case QEQ_CM: @@ -2350,7 +2682,7 @@ void Compute_Charges( reax_system * const system, control_params * const control break; case ACKS2_CM: - //TODO + ACKS2( system, control, data, workspace, far_nbrs, out_control ); break; default: diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c index e9470449..7f0134f3 100644 --- a/sPuReMD/src/control.c +++ b/sPuReMD/src/control.c @@ -266,7 +266,10 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, { val = atof( tmp[1] ); control->cm_domain_sparsity = val; - control->cm_domain_sparsify_enabled = TRUE; + if ( val < 1.0 ) + { + control->cm_domain_sparsify_enabled = TRUE; + } } else if ( strcmp(tmp[0], "cm_solver_pre_comp_type") == 0 ) { diff --git a/sPuReMD/src/ffield.c b/sPuReMD/src/ffield.c index 088e1acb..0fa59d4e 100644 --- a/sPuReMD/src/ffield.c +++ b/sPuReMD/src/ffield.c @@ -206,6 +206,7 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) val = atof(tmp[5]); reax->sbp[i].b_o_133 = val; val = atof(tmp[6]); + reax->sbp[i].b_s_acks2 = val; val = atof(tmp[7]); /* line 4 */ diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c index 5e602253..e6c82684 100644 --- a/sPuReMD/src/forces.c +++ b/sPuReMD/src/forces.c @@ -424,13 +424,33 @@ static inline real Init_Charge_Matrix_Entry( reax_system *system, break; case ACKS2_CM: - //TODO switch ( pos ) { case OFF_DIAGONAL: + if ( r_ij < control->r_cut && r_ij > 0.001 ) + { + Tap = control->Tap7 * r_ij + control->Tap6; + Tap = Tap * r_ij + control->Tap5; + Tap = Tap * r_ij + control->Tap4; + Tap = Tap * r_ij + control->Tap3; + Tap = Tap * r_ij + control->Tap2; + Tap = Tap * r_ij + control->Tap1; + Tap = Tap * r_ij + control->Tap0; + + gamij = SQRT( system->reaxprm.sbp[system->atoms[i].type].gamma + * system->reaxprm.sbp[system->atoms[j].type].gamma ); + /* shielding */ + dr3gamij_1 = POW( r_ij, 3.0 ) + 1.0 / POW( gamij, 3.0 ); + dr3gamij_3 = POW( dr3gamij_1 , 1.0 / 3.0 ); + + ret = Tap * EV_to_KCALpMOL / dr3gamij_3; + } break; + case DIAGONAL: + ret = system->reaxprm.sbp[system->atoms[i].type].eta; break; + default: fprintf( stderr, "[Init_forces] Invalid matrix position. Terminating...\n" ); exit( INVALID_INPUT ); @@ -449,10 +469,12 @@ static inline real Init_Charge_Matrix_Entry( reax_system *system, static void Init_Charge_Matrix_Remaining_Entries( reax_system *system, - control_params *control, sparse_matrix * H, sparse_matrix * H_sp, + control_params *control, list *far_nbrs, + sparse_matrix * H, sparse_matrix * H_sp, int * Htop, int * H_sp_top ) { - int i; + int i, j, pj; + real d, xcut, bond_softness, * X_diag; switch ( control->charge_method ) { @@ -484,6 +506,130 @@ static void Init_Charge_Matrix_Remaining_Entries( reax_system *system, break; case ACKS2_CM: + if ( (X_diag = (real*) calloc(system->N, sizeof(real))) == NULL ) + { + fprintf( stderr, "not enough memory for charge matrix. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + H->start[system->N] = *Htop; + H_sp->start[system->N] = *H_sp_top; + + for ( i = 0; i < system->N; ++i ) + { + H->j[*Htop] = i; + H->val[*Htop] = 1.0; + *Htop = *Htop + 1; + + H_sp->j[*H_sp_top] = i; + H_sp->val[*H_sp_top] = 1.0; + *H_sp_top = *H_sp_top + 1; + } + + H->j[*Htop] = system->N; + H->val[*Htop] = 0.0; + *Htop = *Htop + 1; + + H_sp->j[*H_sp_top] = system->N; + H_sp->val[*H_sp_top] = 0.0; + *H_sp_top = *H_sp_top + 1; + + for ( i = 0; i < system->N; ++i ) + { + H->start[system->N + i + 1] = *Htop; + H_sp->start[system->N + i + 1] = *H_sp_top; + + H->j[*Htop] = i; + H->val[*Htop] = 1.0; + *Htop = *Htop + 1; + + H_sp->j[*H_sp_top] = i; + H_sp->val[*H_sp_top] = 1.0; + *H_sp_top = *H_sp_top + 1; + + for ( pj = Start_Index(i, far_nbrs); pj < End_Index(i, far_nbrs); ++pj ) + { + if ( far_nbrs->select.far_nbr_list[pj].d <= control->r_cut ) + { + j = far_nbrs->select.far_nbr_list[pj].nbr; + + xcut = ( system->reaxprm.sbp[ system->atoms[i].type ].b_s_acks2 + + system->reaxprm.sbp[ system->atoms[j].type ].b_s_acks2 ) + / 2.0; + + if ( far_nbrs->select.far_nbr_list[pj].d < xcut && + far_nbrs->select.far_nbr_list[pj].d > 0.001 ) + { + d = far_nbrs->select.far_nbr_list[pj].d / xcut; + bond_softness = system->reaxprm.gp.l[34] * POW( d, 3.0 ) * POW( 1.0 - d, 6.0 ); + + H->j[*Htop] = system->N + j + 1; + H->val[*Htop] = MAX( 0.0, bond_softness ); + *Htop = *Htop + 1; + + H_sp->j[*H_sp_top] = system->N + j + 1; + H_sp->val[*H_sp_top] = MAX( 0.0, bond_softness ); + *H_sp_top = *H_sp_top + 1; + + X_diag[i] -= bond_softness; + X_diag[j] -= bond_softness; + } + } + } + + H->j[*Htop] = system->N + i + 1; + H->val[*Htop] = 0.0; + *Htop = *Htop + 1; + + H_sp->j[*H_sp_top] = system->N + i + 1; + H_sp->val[*H_sp_top] = 0.0; + *H_sp_top = *H_sp_top + 1; + } + + H->start[system->N_cm - 1] = *Htop; + H_sp->start[system->N_cm - 1] = *H_sp_top; + + for ( i = system->N + 1; i < system->N_cm - 1; ++i ) + { + for ( pj = H->start[i]; pj < H->start[i + 1]; ++pj ) + { + if ( H->j[pj] == i ) + { + H->val[pj] = X_diag[i - system->N - 1]; + break; + } + } + + for ( pj = H_sp->start[i]; pj < H_sp->start[i + 1]; ++pj ) + { + if ( H_sp->j[pj] == i ) + { + H_sp->val[pj] = X_diag[i - system->N - 1]; + break; + } + } + } + + for ( i = system->N + 1; i < system->N_cm - 1; ++i ) + { + H->j[*Htop] = i; + H->val[*Htop] = 1.0; + *Htop = *Htop + 1; + + H_sp->j[*H_sp_top] = i; + H_sp->val[*H_sp_top] = 1.0; + *H_sp_top = *H_sp_top + 1; + } + + H->j[*Htop] = system->N_cm - 1; + H->val[*Htop] = 0.0; + *Htop = *Htop + 1; + + H_sp->j[*H_sp_top] = system->N_cm - 1; + H_sp->val[*H_sp_top] = 0.0; + *H_sp_top = *H_sp_top + 1; + + free( X_diag ); break; default: @@ -775,7 +921,8 @@ void Init_Forces( reax_system *system, control_params *control, // i, Start_Index( i, bonds ), End_Index( i, bonds ) ); } - Init_Charge_Matrix_Remaining_Entries( system, control, H, H_sp, &Htop, &H_sp_top ); + Init_Charge_Matrix_Remaining_Entries( system, control, far_nbrs, + H, H_sp, &Htop, &H_sp_top ); // mark the end of j list H->start[system->N_cm] = Htop; @@ -1062,8 +1209,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, void Estimate_Storage_Sizes( reax_system *system, control_params *control, - list **lists, int *Htop, int *hb_top, - int *bond_top, int *num_3body ) + list **lists, int *Htop, int *hb_top, int *bond_top, int *num_3body ) { int i, j, pj; int start_i, end_i; diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index 58c5c98c..2ddcbeb6 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -375,7 +375,24 @@ void Init_Workspace( reax_system *system, control_params *control, break; case ACKS2_CM: - //TODO + for ( i = 0; i < system->N; ++i ) + { + workspace->b_s[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; + + //TODO: check if unused (redundant) + workspace->b[i] = -system->reaxprm.sbp[ system->atoms[i].type ].chi; + } + + workspace->b_s[system->N] = control->cm_q_net; + workspace->b[system->N] = control->cm_q_net; + + for ( i = system->N + 1; i < system->N_cm; ++i ) + { + workspace->b_s[i] = 0.0; + + //TODO: check if unused (redundant) + workspace->b[i] = 0.0; + } break; default: @@ -489,7 +506,7 @@ void Init_Lists( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, list **lists, output_controls *out_control ) { - int i, num_nbrs, num_hbonds, num_bonds, num_3body, Htop; + int i, num_nbrs, num_hbonds, num_bonds, num_3body, Htop, max_nnz; int *hb_top, *bond_top; num_nbrs = Estimate_NumNeighbors( system, control, workspace, lists ); @@ -513,7 +530,23 @@ void Init_Lists( reax_system *system, control_params *control, Estimate_Storage_Sizes( system, control, lists, &Htop, hb_top, bond_top, &num_3body ); - if ( Allocate_Matrix( &(workspace->H), system->N_cm, Htop ) == FAILURE ) + switch ( control->charge_method ) + { + case QEQ_CM: + max_nnz = Htop; + break; + case EEM_CM: + max_nnz = Htop + system->N_cm; + break; + case ACKS2_CM: + max_nnz = 2 * Htop + 3 * system->N + 2; + break; + default: + max_nnz = Htop; + break; + } + + if ( Allocate_Matrix( &(workspace->H), system->N_cm, max_nnz ) == FAILURE ) { fprintf( stderr, "Not enough space for init matrices. Terminating...\n" ); exit( INSUFFICIENT_MEMORY ); @@ -522,7 +555,7 @@ void Init_Lists( reax_system *system, control_params *control, * If so, need to refactor Estimate_Storage_Sizes * to use various cut-off distances as parameters * (non-bonded, hydrogen, 3body, etc.) */ - if ( Allocate_Matrix( &(workspace->H_sp), system->N_cm, Htop ) == FAILURE ) + if ( Allocate_Matrix( &(workspace->H_sp), system->N_cm, max_nnz ) == FAILURE ) { fprintf( stderr, "Not enough space for init matrices. Terminating...\n" ); exit( INSUFFICIENT_MEMORY ); diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index 94aeeaf0..826dee38 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -251,7 +251,7 @@ l[30] = p_coa4 l[31] = p_ovun4 l[32] = p_ovun3 l[33] = p_val8 -l[34] = N/A +l[34] = ACKS2 bond softness l[35] = N/A l[36] = N/A l[37] = version number @@ -298,6 +298,7 @@ typedef struct real b_o_131; real b_o_132; real b_o_133; + real b_s_acks2; /* bond softness for ACKS2 */ /* Line four in the field file */ real p_ovun2; diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c index 015e2573..8a595276 100644 --- a/sPuReMD/src/print_utils.c +++ b/sPuReMD/src/print_utils.c @@ -838,7 +838,7 @@ void Print_Sparse_Matrix2( sparse_matrix *A, char *fname ) { //fprintf( f, "%d%d %.15e\n", A->entries[j].j, i, A->entries[j].val ); //Convert 0-based to 1-based (for Matlab) - fprintf( f, "%6d%6d %24.15e\n", i+1, A->j[j]+1, A->val[j] ); + fprintf( f, "%6d %6d %24.15e\n", i+1, A->j[j]+1, A->val[j] ); } } diff --git a/sPuReMD/src/testmd.c b/sPuReMD/src/testmd.c index 075a49b1..9ff007a9 100644 --- a/sPuReMD/src/testmd.c +++ b/sPuReMD/src/testmd.c @@ -176,8 +176,8 @@ int main(int argc, char* argv[]) /* compute f_0 */ //if( control.restart == 0 ) { Reset( &system, &control, &data, &workspace, &lists ); - Generate_Neighbor_Lists( &system, &control, &data, &workspace, - &lists, &out_control ); + Generate_Neighbor_Lists( &system, &control, &data, &workspace, + &lists, &out_control ); //fprintf( stderr, "total: %.2f secs\n", data.timing.nbrs); Compute_Forces(&system, &control, &data, &workspace, &lists, &out_control); @@ -186,8 +186,7 @@ int main(int argc, char* argv[]) ++data.step; //} // - - + for ( ; data.step <= control.nsteps; data.step++ ) { if ( control.T_mode ) -- GitLab