From fa9e5d7edaae7aa46efbc4f27f9eef5a4fa49214 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@cse.msu.edu> Date: Wed, 7 Sep 2016 17:24:24 -0400 Subject: [PATCH] sPuReMD: expand domain-based QEq matrix cut-off and add control file parameter. Update control file. --- environ/param.gpu.water | 1 + sPuReMD/src/QEq.c | 46 +++++++++++++++++++++++-------------- sPuReMD/src/control.c | 14 ++++++++++-- sPuReMD/src/forces.c | 50 +++++++++++++++++++++++++++++++++-------- sPuReMD/src/mytypes.h | 2 ++ 5 files changed, 85 insertions(+), 28 deletions(-) diff --git a/environ/param.gpu.water b/environ/param.gpu.water index 7b3d00e9..d5d3900d 100644 --- a/environ/param.gpu.water +++ b/environ/param.gpu.water @@ -16,6 +16,7 @@ hbond_cutoff 7.50 ! cutoff distance for hydrogen b qeq_solver_type 0 ! method used to solve charge equilibration phase (QEq) qeq_solver_q_err 1e-6 ! relative residual norm threshold used in solver +qeq_domain_sparsity 1.0 ! scalar for scaling cut-off distance, used to sparsify QEq matrix (between 0.0 and 1.0) pre_comp_type 0 ! method used to compute QEq preconditioner, if applicable pre_comp_refactor 100 ! nsteps to recompute preconditioner pre_comp_droptol 0.0 ! threshold tolerance for dropping values in preconditioner computation, if applicable diff --git a/sPuReMD/src/QEq.c b/sPuReMD/src/QEq.c index 514330bd..ddf0000d 100644 --- a/sPuReMD/src/QEq.c +++ b/sPuReMD/src/QEq.c @@ -262,7 +262,7 @@ static int Estimate_LU_Fill( const sparse_matrix * const A, const real * const d fillin = 0; #pragma omp parallel for schedule(static) \ - default(none) private(i, j, pj, val) reduction(+: fillin) + default(none) private(i, j, pj, val) reduction(+: fillin) for ( i = 0; i < A->n; ++i ) { for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) @@ -1018,7 +1018,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, { /* for each nonzero in L */ #pragma omp parallel for schedule(static) \ - default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) + default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) for ( j = 0; j < DAD->start[DAD->n]; ++j ) { sum = ZERO; @@ -1405,9 +1405,19 @@ static void Init_MatVec( const reax_system * const system, const control_params { int i, fillin; real s_tmp, t_tmp, time; + sparse_matrix *Hptr; sparse_matrix *H_test, *L_test, *U_test; // char fname[100]; + if (control->qeq_domain_sparsify_enabled) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + if (control->pre_comp_refactor > 0 && ((data->step - data->prev_steps) % control->pre_comp_refactor == 0 || workspace->L == NULL)) { @@ -1417,7 +1427,10 @@ static void Init_MatVec( const reax_system * const system, const control_params if ( control->pre_comp_type != DIAG_PC ) { Sort_Matrix_Rows( workspace->H ); - Sort_Matrix_Rows( workspace->H_sp ); + if (control->qeq_domain_sparsify_enabled) + { + Sort_Matrix_Rows( workspace->H_sp ); + } } data->timing.QEq_sort_mat_rows += Get_Timing_Info( time ); #if defined(DEBUG) @@ -1438,13 +1451,13 @@ static void Init_MatVec( const reax_system * const system, const control_params data->timing.pre_comp += diag_pre_comp( system, workspace->Hdia_inv ); break; case ICHOLT_PC: - Calculate_Droptol( workspace->H, workspace->droptol, control->pre_comp_droptol ); + Calculate_Droptol( Hptr, workspace->droptol, control->pre_comp_droptol ); #if defined(DEBUG_FOCUS) fprintf( stderr, "drop tolerances calculated\n" ); #endif if ( workspace->L == NULL ) { - fillin = Estimate_LU_Fill( workspace->H, workspace->droptol ); + fillin = Estimate_LU_Fill( Hptr, workspace->droptol ); if ( Allocate_Matrix( &(workspace->L), far_nbrs->n, fillin ) == 0 || Allocate_Matrix( &(workspace->U), far_nbrs->n, fillin ) == 0 ) { @@ -1458,15 +1471,14 @@ static void Init_MatVec( const reax_system * const system, const control_params #endif } - data->timing.pre_comp += ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U ); -// data->timing.pre_comp += ICHOLT( workspace->H_sp, workspace->droptol, workspace->L, workspace->U ); + data->timing.pre_comp += ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U ); break; case ILU_PAR_PC: if ( workspace->L == NULL ) { /* factors have sparsity pattern as H */ - if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 || - Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 ) + if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == 0 || + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == 0 ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -1474,7 +1486,7 @@ static void Init_MatVec( const reax_system * const system, const control_params } // data->timing.pre_comp += ICHOL_PAR( workspace->H, control->pre_comp_sweeps, workspace->L, workspace->U ); - data->timing.pre_comp += ILU_PAR( workspace->H, control->pre_comp_sweeps, workspace->L, workspace->U ); + data->timing.pre_comp += ILU_PAR( Hptr, control->pre_comp_sweeps, workspace->L, workspace->U ); // Print_Sparse_Matrix2( workspace->H, "H.out" ); // Print_Sparse_Matrix2( workspace->L, "L.out" ); @@ -1516,7 +1528,7 @@ static void Init_MatVec( const reax_system * const system, const control_params // exit( 0 ); break; case ILUT_PAR_PC: - Calculate_Droptol( workspace->H, workspace->droptol, control->pre_comp_droptol ); + Calculate_Droptol( Hptr, workspace->droptol, control->pre_comp_droptol ); #if defined(DEBUG_FOCUS) fprintf( stderr, "drop tolerances calculated\n" ); #endif @@ -1524,30 +1536,30 @@ static void Init_MatVec( const reax_system * const system, const control_params if ( workspace->L == NULL ) { /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */ - if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 || - Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 ) + if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == 0 || + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == 0 ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); } } - data->timing.pre_comp += ILUT_PAR( workspace->H, workspace->droptol, control->pre_comp_sweeps, + data->timing.pre_comp += ILUT_PAR( Hptr, workspace->droptol, control->pre_comp_sweeps, workspace->L, workspace->U ); break; case ILU_SUPERLU_MT_PC: if ( workspace->L == NULL ) { /* factors have sparsity pattern as H */ - if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 || - Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 ) + if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == 0 || + Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == 0 ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); } } #if defined(HAVE_SUPERLU_MT) - data->timing.pre_comp += SuperLU_Factorize( workspace->H, workspace->L, workspace->U ); + data->timing.pre_comp += SuperLU_Factorize( Hptr, workspace->L, workspace->U ); #else fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" ); exit( INVALID_INPUT ); diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c index 62311c59..41f74496 100644 --- a/sPuReMD/src/control.c +++ b/sPuReMD/src/control.c @@ -61,7 +61,8 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, control->reneighbor = 1; control->vlist_cut = 0; control->nbr_cut = 4.; - control->r_cut = 10; + control->r_cut = 10.; + control->r_sp_cut = 10.; control->max_far_nbrs = 1000; control->bo_cut = 0.01; control->thb_cut = 0.001; @@ -71,6 +72,8 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, control->qeq_solver_type = GMRES_S; control->qeq_solver_q_err = 0.000001; + control->qeq_domain_sparsify_enabled = FALSE; + control->qeq_domain_sparsity = 1.0; control->pre_comp_type = ICHOLT_PC; control->pre_comp_sweeps = 3; control->pre_comp_refactor = 100; @@ -247,6 +250,12 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, val = atof( tmp[1] ); control->qeq_solver_q_err = val; } + else if ( strcmp(tmp[0], "qeq_domain_sparsity") == 0 ) + { + val = atof( tmp[1] ); + control->qeq_domain_sparsity = val; + control->qeq_domain_sparsify_enabled = TRUE; + } else if ( strcmp(tmp[0], "pre_comp_type") == 0 ) { ival = atoi( tmp[1] ); @@ -501,7 +510,7 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, else { fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] ); - exit( 15 ); + exit( UNKNOWN_OPTION ); } } @@ -521,6 +530,7 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, control->bo_cut = 0.01 * system->reaxprm.gp.l[29]; control->r_low = system->reaxprm.gp.l[11]; control->r_cut = system->reaxprm.gp.l[12]; + control->r_sp_cut = control->r_cut * control->qeq_domain_sparsity; control->vlist_cut += control->r_cut; system->g.cell_size = control->vlist_cut / 2.; diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c index e6e63707..9108a8dd 100644 --- a/sPuReMD/src/forces.c +++ b/sPuReMD/src/forces.c @@ -304,9 +304,6 @@ void Init_Forces( reax_system *system, control_params *control, p_boc1 = system->reaxprm.gp.l[0]; p_boc2 = system->reaxprm.gp.l[1]; - //TODO: add to control file, remove - control->r_sp_cut = 8.0; - for ( i = 0; i < system->N; ++i ) { atom_i = &(system->atoms[i]); @@ -384,7 +381,7 @@ void Init_Forces( reax_system *system, control_params *control, if ( flag_sp ) { H_sp->j[H_sp_top] = j; - H_sp->val[H_sp_top] = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3; + H_sp->val[H_sp_top] = H->val[Htop - 1]; ++H_sp_top; } @@ -542,12 +539,14 @@ void Init_Forces( reax_system *system, control_params *control, } } + /* diagonal entry */ H->j[Htop] = i; H->val[Htop] = system->reaxprm.sbp[type_i].eta; ++Htop; + /* diagonal entry */ H_sp->j[H_sp_top] = i; - H_sp->val[H_sp_top] = system->reaxprm.sbp[type_i].eta; + H_sp->val[H_sp_top] = H->val[Htop - 1]; ++H_sp_top; Set_End_Index( i, btop_i, bonds ); @@ -582,17 +581,17 @@ void Init_Forces_Tab( reax_system *system, control_params *control, int i, j, pj; int start_i, end_i; int type_i, type_j; - int Htop, btop_i, btop_j, num_bonds, num_hbonds; + int Htop, H_sp_top, btop_i, btop_j, num_bonds, num_hbonds; int tmin, tmax, r; int ihb, jhb, ihb_top, jhb_top; - int flag; + int flag, flag_sp; real r_ij, r2, self_coef; real val, dif, base; real C12, C34, C56; real Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2; real BO, BO_s, BO_pi, BO_pi2; real p_boc1, p_boc2; - sparse_matrix *H; + sparse_matrix *H, *H_sp; list *far_nbrs, *bonds, *hbonds; single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; @@ -607,7 +606,9 @@ void Init_Forces_Tab( reax_system *system, control_params *control, hbonds = *lists + HBONDS; H = workspace->H; + H_sp = workspace->H_sp; Htop = 0; + H_sp_top = 0; num_bonds = 0; num_hbonds = 0; btop_i = btop_j = 0; @@ -621,6 +622,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, start_i = Start_Index(i, far_nbrs); end_i = End_Index(i, far_nbrs); H->start[i] = Htop; + H_sp->start[i] = H_sp_top; btop_i = End_Index( i, bonds ); sbp_i = &(system->reaxprm.sbp[type_i]); ihb = ihb_top = -1; @@ -634,15 +636,30 @@ void Init_Forces_Tab( reax_system *system, control_params *control, atom_j = &(system->atoms[j]); flag = 0; + flag_sp = 0; if ((data->step - data->prev_steps) % control->reneighbor == 0) { if (nbr_pj->d <= control->r_cut) + { flag = 1; - else flag = 0; + if ( nbr_pj->d <= control->r_sp_cut ) + { + flag_sp = 1; + } + } + else + { + flag = 0; + flag_sp = 0; + } } else if ((nbr_pj->d = Sq_Distance_on_T3(atom_i->x, atom_j->x, &(system->box), nbr_pj->dvec)) <= SQR(control->r_cut)) { + if ( nbr_pj->d <= SQR(control->r_sp_cut)) + { + flag_sp = 1; + } nbr_pj->d = sqrt(nbr_pj->d); flag = 1; } @@ -671,6 +688,14 @@ void Init_Forces_Tab( reax_system *system, control_params *control, H->val[Htop] = self_coef * val; ++Htop; + /* H_sp matrix entry */ + if ( flag_sp ) + { + H_sp->j[H_sp_top] = j; + H_sp->val[H_sp_top] = H->val[Htop - 1]; + ++H_sp_top; + } + /* hydrogen bond lists */ if ( control->hb_cut > 0 && (ihb == 1 || ihb == 2) && nbr_pj->d <= control->hb_cut ) @@ -797,10 +822,16 @@ void Init_Forces_Tab( reax_system *system, control_params *control, } } + /* diagonal entry */ H->j[Htop] = i; H->val[Htop] = system->reaxprm.sbp[type_i].eta; ++Htop; + /* diagonal entry */ + H_sp->j[H_sp_top] = i; + H_sp->val[H_sp_top] = H->val[Htop - 1]; + ++H_sp_top; + Set_End_Index( i, btop_i, bonds ); if ( ihb == 1 ) Set_End_Index( workspace->hbond_index[i], ihb_top, hbonds ); @@ -808,6 +839,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, // mark the end of j list H->start[i] = Htop; + H_sp->start[i] = H_sp_top; /* validate lists - decide if reallocation is required! */ Validate_Lists( workspace, lists, data->step, system->N, H->m, Htop, num_bonds, num_hbonds ); diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index 9be7cf75..d3cd8b79 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -511,6 +511,8 @@ typedef struct unsigned int qeq_solver_type; real qeq_solver_q_err; + real qeq_domain_sparsity; + unsigned int qeq_domain_sparsify_enabled; unsigned int pre_comp_type; unsigned int pre_comp_refactor; real pre_comp_droptol; -- GitLab