From f14972a49d808172f8f6be2b7d8acf37b6aa1546 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnk@msu.edu> Date: Thu, 24 Aug 2017 00:11:41 -0400 Subject: [PATCH] sPuReMD: update SDM with preconditioning methods and OpenMP support. --- sPuReMD/src/charges.c | 21 ++++++++------- sPuReMD/src/init_md.c | 4 ++- sPuReMD/src/lin_alg.c | 63 +++++++++++++++++++++++-------------------- sPuReMD/src/lin_alg.h | 3 ++- sPuReMD/src/vector.c | 13 +++------ 5 files changed, 54 insertions(+), 50 deletions(-) diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c index d2fddf36..f81fba69 100644 --- a/sPuReMD/src/charges.c +++ b/sPuReMD/src/charges.c @@ -2011,7 +2011,7 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, simulation_data * const data, static_storage * const workspace, const list * const far_nbrs ) { - int i, fillin; + int fillin; real time; sparse_matrix *Hptr; @@ -2156,7 +2156,7 @@ static void Setup_Preconditioner_EE( const reax_system * const system, simulation_data * const data, static_storage * const workspace, const list * const far_nbrs ) { - int i, fillin; + int fillin; real time; sparse_matrix *Hptr; @@ -2305,7 +2305,7 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, simulation_data * const data, static_storage * const workspace, const list * const far_nbrs ) { - int i, fillin; + int fillin; real time; sparse_matrix *Hptr; @@ -2547,9 +2547,10 @@ static void QEq( reax_system * const system, control_params * const control, case SDM_S: iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0] ) + 1; + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; iters += SDM( workspace,control, workspace->H, workspace->b_t, control->cm_solver_q_err, - workspace->t[0] ) + 1; + workspace->t[0], FALSE ) + 1; break; default: @@ -2628,7 +2629,8 @@ static void EE( reax_system * const system, control_params * const control, case SDM_S: iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0] ) + 1; + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; break; default: @@ -2689,8 +2691,8 @@ static void ACKS2( reax_system * const system, control_params * const control, case GMRES_H_S: iters = GMRES_HouseHolder( workspace, control, data,workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], - (control->cm_solver_pre_comp_refactor > 0 && - data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); + control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); break; case CG_S: @@ -2701,7 +2703,8 @@ static void ACKS2( reax_system * const system, control_params * const control, case SDM_S: iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0] ) + 1; + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; break; default: diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index 4228f8bd..3cbf575f 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -428,7 +428,9 @@ void Init_Workspace( reax_system *system, control_params *control, break; case SDM_S: - //TODO + workspace->r = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->d = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->q = (real *) calloc( system->N_cm, sizeof( real ) ); break; default: diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index 631b543a..28b7c956 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -1764,53 +1764,58 @@ int CG( const static_storage * const workspace, const control_params * const con /* Steepest Descent */ int SDM( const static_storage * const workspace, const control_params * const control, const sparse_matrix * const H, const real * const b, const real tol, - real * const x ) + real * const x, const int fresh_pre ) { - int i, j, N; - real tmp, alpha, beta, b_norm; - real sig0, sig; + int i, itr, N; + real tmp, alpha, b_norm; + real sig; N = H->n; - b_norm = Norm( b, N ); - Sparse_MatVec( H, x, workspace->q ); - Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); - for ( j = 0; j < N; ++j ) + #pragma omp parallel default(none) private(i, tmp, alpha, b_norm, sig) \ + shared(itr, N) { - workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; - } + b_norm = Norm( b, N ); - sig = Dot( workspace->r, workspace->d, N ); - sig0 = sig; + Sparse_MatVec( H, x, workspace->q ); + Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->q, N ); - for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / b_norm > tol; ++i ) - { - Sparse_MatVec( H, workspace->d, workspace->q ); + apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre ); sig = Dot( workspace->r, workspace->d, N ); - tmp = Dot( workspace->d, workspace->q, N ); - alpha = sig / tmp; - Vector_Add( x, alpha, workspace->d, N ); - Vector_Add( workspace->r, -alpha, workspace->q, N ); - for ( j = 0; j < N; ++j ) + for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / b_norm > tol; ++i ) { - workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + Sparse_MatVec( H, workspace->d, workspace->q ); + + sig = Dot( workspace->r, workspace->d, N ); + + /* ensure each thread gets a local copy of + * the function return value + * (which is stored as global inside the function) + * before proceeding */ + #pragma omp barrier + + tmp = Dot( workspace->d, workspace->q, N ); + alpha = sig / tmp; + + Vector_Add( x, alpha, workspace->d, N ); + Vector_Add( workspace->r, -alpha, workspace->q, N ); + + apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE ); } - //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n", - //Norm(workspace->d,N), Norm(workspace->q,N), tmp ); + #pragma omp single + itr = i; } - fprintf( stderr, "SDM took %d iterations\n", i ); - - if ( i >= 300 ) + if ( itr >= control->cm_solver_max_iters ) { - fprintf( stderr, "SDM convergence failed!\n" ); - return i; + fprintf( stderr, "[WARNING] SDM convergence failed (%d iters)\n", itr ); + return itr; } - return i; + return itr; } diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h index f62742a2..a3148514 100644 --- a/sPuReMD/src/lin_alg.h +++ b/sPuReMD/src/lin_alg.h @@ -63,7 +63,8 @@ int CG( const static_storage * const, const control_params * const, real * const, const int ); int SDM( const static_storage * const, const control_params * const, - const sparse_matrix * const, const real * const, const real, real * const ); + const sparse_matrix * const, const real * const, const real, + real * const, const int ); real condest( const sparse_matrix * const, const sparse_matrix * const ); diff --git a/sPuReMD/src/vector.c b/sPuReMD/src/vector.c index 24e05de8..5329aea7 100644 --- a/sPuReMD/src/vector.c +++ b/sPuReMD/src/vector.c @@ -32,13 +32,11 @@ inline int Vector_isZero( const real * const v, const unsigned int k ) { unsigned int i; - #pragma omp master + #pragma omp single { ret = TRUE; } - #pragma omp barrier - #pragma omp for reduction(&&: ret) schedule(static) for ( i = 0; i < k; ++i ) { @@ -134,14 +132,11 @@ inline real Dot( const real * const v1, const real * const v2, const unsigned in { unsigned int i; - #pragma omp master + #pragma omp single { ret2 = ZERO; } - #pragma omp barrier - - #pragma omp for reduction(+: ret2) schedule(static) for ( i = 0; i < k; ++i ) { @@ -156,13 +151,11 @@ inline real Norm( const real * const v1, const unsigned int k ) { unsigned int i; - #pragma omp master + #pragma omp single { ret2 = ZERO; } - #pragma omp barrier - #pragma omp for reduction(+: ret2) schedule(static) for ( i = 0; i < k; ++i ) { -- GitLab