From e7f161fb69da32a770736fcf598c0f48bf490248 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnk@msu.edu> Date: Wed, 23 Aug 2017 22:49:37 -0400 Subject: [PATCH] sPuReMD: update CG with preconditioning methods and OpenMP support. --- sPuReMD/src/charges.c | 80 +++++++++---------- sPuReMD/src/control.c | 6 +- sPuReMD/src/lin_alg.c | 179 +++++++++++------------------------------- sPuReMD/src/lin_alg.h | 16 ++-- sPuReMD/src/vector.c | 2 +- 5 files changed, 96 insertions(+), 187 deletions(-) diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c index 6e663d91..d2fddf36 100644 --- a/sPuReMD/src/charges.c +++ b/sPuReMD/src/charges.c @@ -2522,49 +2522,34 @@ static void QEq( reax_system * const system, control_params * const control, case GMRES_S: iters = GMRES( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], - out_control->log, (control->cm_solver_pre_comp_refactor > 0 && (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); iters += GMRES( workspace, control, data, workspace->H, - workspace->b_t, control->cm_solver_q_err, workspace->t[0], - out_control->log, FALSE ); + workspace->b_t, control->cm_solver_q_err, workspace->t[0], FALSE ); break; case GMRES_H_S: iters = GMRES_HouseHolder( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], - out_control->log, (control->cm_solver_pre_comp_refactor > 0 && (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); iters += GMRES_HouseHolder( workspace, control, data, workspace->H, - workspace->b_t, control->cm_solver_q_err, workspace->t[0], - out_control->log, 0 ); + workspace->b_t, control->cm_solver_q_err, workspace->t[0], 0 ); break; case CG_S: iters = CG( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0], out_control->log ) + 1; + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; iters += CG( workspace, control, workspace->H, workspace->b_t, control->cm_solver_q_err, - workspace->t[0], out_control->log ) + 1; -// iters = CG( workspace, workspace->H, workspace->b_s, control->cm_solver_q_err, -// workspace->L, workspace->U, workspace->s[0], control->cm_solver_pre_app_type, -// control->cm_solver_pre_app_jacobi_iters, out_control->log ) + 1; -// iters += CG( workspace, workspace->H, workspace->b_t, control->cm_solver_q_err, -// workspace->L, workspace->U, workspace->t[0], control->cm_solver_pre_app_type, -// control->cm_solver_pre_app_jacobi_iters, out_control->log ) + 1; + workspace->t[0], FALSE ) + 1; break; case SDM_S: iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0], out_control->log ) + 1; + workspace->s[0] ) + 1; iters += SDM( workspace,control, workspace->H, workspace->b_t, control->cm_solver_q_err, - workspace->t[0], out_control->log ) + 1; -// iters = SDM( workspace, workspace->H, workspace->b_s, control->cm_solver_q_err, -// workspace->L, workspace->U, workspace->s[0], control->cm_solver_pre_app_type, -// control->cm_solver_pre_app_jacobi_iters, out_control->log ) + 1; -// iters += SDM( workspace, workspace->H, workspace->b_t, control->cm_solver_q_err, -// workspace->L, workspace->U, workspace->t[0], control->cm_solver_pre_app_type, -// control->cm_solver_pre_app_jacobi_iters, out_control->log ) + 1; + workspace->t[0] ) + 1; break; default: @@ -2624,25 +2609,26 @@ static void EE( reax_system * const system, control_params * const control, case GMRES_S: iters = GMRES( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], - out_control->log, - ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); + (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); break; case GMRES_H_S: iters = GMRES_HouseHolder( workspace, control, data,workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], - out_control->log, - (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); + control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); break; case CG_S: iters = CG( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0], out_control->log ) + 1; + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; break; case SDM_S: iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0], out_control->log ) + 1; + workspace->s[0] ) + 1; break; default: @@ -2696,25 +2682,26 @@ static void ACKS2( reax_system * const system, control_params * const control, case GMRES_S: iters = GMRES( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], - out_control->log, - ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); + (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ); break; case GMRES_H_S: iters = GMRES_HouseHolder( workspace, control, data,workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], - out_control->log, - (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); + (control->cm_solver_pre_comp_refactor > 0 && + data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ); break; case CG_S: iters = CG( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0], out_control->log ) + 1; + workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && + (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; break; case SDM_S: iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, - workspace->s[0], out_control->log ) + 1; + workspace->s[0] ) + 1; break; default: @@ -2742,12 +2729,17 @@ void Compute_Charges( reax_system * const system, control_params * const control char fname[200]; FILE * fp; - if ( data->step == 0 || data->step == 10 || data->step == 50 || data->step == 100 ) + if ( data->step >= 100 ) { sprintf( fname, "s_%d_%s.out", data->step, control->sim_name ); fp = fopen( fname, "w" ); Vector_Print( fp, NULL, workspace->s[0], system->N_cm ); fclose( fp ); + + sprintf( fname, "t_%d_%s.out", data->step, control->sim_name ); + fp = fopen( fname, "w" ); + Vector_Print( fp, NULL, workspace->t[0], system->N_cm ); + fclose( fp ); } #endif @@ -2772,19 +2764,21 @@ void Compute_Charges( reax_system * const system, control_params * const control } #if defined(DEBUG_FOCUS) - if ( data->step == 0 || data->step == 10 || data->step == 50 || data->step == 100 ) + if ( data->step >= 100 ) { - if ( data->step == 0 ) - { - sprintf( fname, "H_%d_%s.out", data->step, control->sim_name ); - Print_Sparse_Matrix2( workspace->H, fname, NULL ); - Print_Sparse_Matrix_Binary( workspace->H, fname ); - } + sprintf( fname, "H_%d_%s.out", data->step, control->sim_name ); + Print_Sparse_Matrix2( workspace->H, fname, NULL ); +// Print_Sparse_Matrix_Binary( workspace->H, fname ); - sprintf( fname, "b_%d_%s.out", data->step, control->sim_name ); + sprintf( fname, "b_s_%d_%s.out", data->step, control->sim_name ); fp = fopen( fname, "w" ); Vector_Print( fp, NULL, workspace->b_s, system->N_cm ); fclose( fp ); + + sprintf( fname, "b_t_%d_%s.out", data->step, control->sim_name ); + fp = fopen( fname, "w" ); + Vector_Print( fp, NULL, workspace->b_t, system->N_cm ); + fclose( fp ); } #endif } diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c index e906fc85..16083739 100644 --- a/sPuReMD/src/control.c +++ b/sPuReMD/src/control.c @@ -66,7 +66,7 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, control->max_far_nbrs = 1000; control->bo_cut = 0.01; control->thb_cut = 0.001; - control->hb_cut = 7.50; + control->hb_cut = 0.0; control->tabulate = 0; @@ -105,9 +105,9 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, control->remove_CoM_vel = 25; out_control->debug_level = 0; - out_control->energy_update_freq = 10; + out_control->energy_update_freq = 0; - out_control->write_steps = 100; + out_control->write_steps = 0; out_control->traj_compress = 0; out_control->write = fprintf; out_control->traj_format = 0; diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index 118654b7..631b543a 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -68,7 +68,7 @@ real *Dinv_b = NULL, *rp = NULL, *rp2 = NULL, *rp3 = NULL; * x: vector * b: vector (result) */ static void Sparse_MatVec( const sparse_matrix * const A, - const real * const x, real * const b ) + const real * const x, real * const b ) { int i, j, k, n, si, ei; real H; @@ -1182,7 +1182,7 @@ static void apply_preconditioner( const static_storage * const workspace, const /* generalized minimual residual iterative solver for sparse linear systems */ int GMRES( const static_storage * const workspace, const control_params * const control, simulation_data * const data, const sparse_matrix * const H, const real * const b, - const real tol, real * const x, const FILE * const fout, const int fresh_pre ) + const real tol, real * const x, const int fresh_pre ) { int i, j, k, itr, N, g_j, g_itr; real cc, tmp1, tmp2, temp, ret_temp, bnorm, time_start; @@ -1495,22 +1495,9 @@ int GMRES( const static_storage * const workspace, const control_params * const } } - // Sparse_MatVec( H, x, workspace->b_prm ); - // for( i = 0; i < N; ++i ) - // workspace->b_prm[i] *= workspace->Hdia_inv[i]; - // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", - // workspace->b_prc[i], workspace->b_prm[i], x[i] );*/ - - // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n", - // itr, j, fabs( workspace->g[j] ) / bnorm ); - // data->timing.solver_iters += itr * control->cm_solver_restart + j; - if ( g_itr >= control->cm_solver_max_iters ) { fprintf( stderr, "GMRES convergence failed\n" ); - // return -1; return g_itr * (control->cm_solver_restart + 1) + g_j + 1; } @@ -1521,7 +1508,7 @@ int GMRES( const static_storage * const workspace, const control_params * const int GMRES_HouseHolder( const static_storage * const workspace, const control_params * const control, simulation_data * const data, const sparse_matrix * const H, const real * const b, real tol, - real * const x, const FILE * const fout, const int fresh_pre ) + real * const x, const int fresh_pre ) { int i, j, k, itr, N; real cc, tmp1, tmp2, temp, bnorm; @@ -1693,10 +1680,6 @@ int GMRES_HouseHolder( const static_storage * const workspace, Vector_Add( x, workspace->y[i], z[i], N ); } - // fprintf( stderr, "\nx_aft: " ); - // for( i = 0; i < N; ++i ) - // fprintf( stderr, "%6.2f ", x[i] ); - /* stopping condition */ if ( fabs( w[j] ) / bnorm <= tol ) { @@ -1704,22 +1687,9 @@ int GMRES_HouseHolder( const static_storage * const workspace, } } - // Sparse_MatVec( H, x, workspace->b_prm ); - // for( i = 0; i < N; ++i ) - // workspace->b_prm[i] *= workspace->Hdia_inv[i]; - - // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", - // workspace->b_prc[i], workspace->b_prm[i], x[i] ); - - //fprintf( fout,"GMRES outer:%d, inner:%d iters - residual norm: %15.10f\n", - // itr, j, fabs( workspace->g[j] ) / bnorm ); - if ( itr >= control->cm_solver_max_iters ) { fprintf( stderr, "GMRES convergence failed\n" ); - // return -1; return itr * (control->cm_solver_restart + 1) + j + 1; } @@ -1727,130 +1697,74 @@ int GMRES_HouseHolder( const static_storage * const workspace, } -/* Preconditioned Conjugate Gradient */ -int PCG( static_storage *workspace, const control_params * const control, - sparse_matrix *A, real *b, real tol, - sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout ) +/* Conjugate Gradient */ +int CG( const static_storage * const workspace, const control_params * const control, + const sparse_matrix * const H, const real * const b, const real tol, + real * const x, const int fresh_pre ) { - int i, N; + int i, itr, N; real tmp, alpha, beta, b_norm, r_norm; - real sig0, sig_old, sig_new; + real *d, *r, *p, *z; + real sig_old, sig_new; - N = A->n; - b_norm = Norm( b, N ); - //fprintf( stderr, "b_norm: %.15e\n", b_norm ); - - Sparse_MatVec( A, x, workspace->q ); - Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); - r_norm = Norm(workspace->r, N); - //Print_Soln( workspace, x, q, b, N ); - //fprintf( stderr, "res: %.15e\n", r_norm ); - - tri_solve( L, workspace->r, workspace->d, L->n, LOWER ); - tri_solve( U, workspace->d, workspace->p, U->n, UPPER ); - sig_new = Dot( workspace->r, workspace->p, N ); - sig0 = sig_new; + N = H->n; + d = workspace->d; + r = workspace->r; + p = workspace->q; + z = workspace->p; - for ( i = 0; i < 200 && r_norm / b_norm > tol; ++i ) + #pragma omp parallel default(none) private(i, tmp, alpha, beta, b_norm, r_norm, sig_old, sig_new) \ + shared(itr, N, d, r, p, z) { - //for( i = 0; i < 200 && sig_new > SQR(tol) * sig0; ++i ) { - Sparse_MatVec( A, workspace->p, workspace->q ); - tmp = Dot( workspace->q, workspace->p, N ); - alpha = sig_new / tmp; - Vector_Add( x, alpha, workspace->p, N ); - //fprintf( stderr, "iter%d: |p|=%.15e |q|=%.15e tmp=%.15e\n", - // i+1, Norm(workspace->p,N), Norm(workspace->q,N), tmp ); - - Vector_Add( workspace->r, -alpha, workspace->q, N ); - r_norm = Norm(workspace->r, N); - //fprintf( stderr, "res: %.15e\n", r_norm ); - - tri_solve( L, workspace->r, workspace->d, L->n, LOWER ); - tri_solve( U, workspace->d, workspace->d, U->n, UPPER ); - sig_old = sig_new; - sig_new = Dot( workspace->r, workspace->d, N ); - beta = sig_new / sig_old; - Vector_Sum( workspace->p, 1., workspace->d, beta, workspace->p, N ); - } + b_norm = Norm( b, N ); - //fprintf( fout, "CG took %d iterations\n", i ); - if ( i >= 200 ) - { - fprintf( stderr, "CG convergence failed!\n" ); - return i; - } + Sparse_MatVec( H, x, d ); + Vector_Sum( r, 1.0, b, -1.0, d, N ); + r_norm = Norm( r, N ); - return i; -} + apply_preconditioner( workspace, control, r, z, fresh_pre ); + Vector_Copy( p, z, N ); + sig_new = Dot( r, z, N ); -/* Conjugate Gradient */ -int CG( const static_storage * const workspace, const control_params * const control, - const sparse_matrix * const H, const real * const b, const real tol, real * const x, - const FILE * const fout ) -{ - int i, j, N; - real tmp, alpha, beta, b_norm; - real sig_old, sig_new, sig0; + for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i ) + { + Sparse_MatVec( H, p, d ); - N = H->n; - b_norm = Norm( b, N ); - //fprintf( stderr, "b_norm: %10.6f\n", b_norm ); + tmp = Dot( d, p, N ); + alpha = sig_new / tmp; + Vector_Add( x, alpha, p, N ); - Sparse_MatVec( H, x, workspace->q ); - Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); - for ( j = 0; j < N; ++j ) - { - workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; - } + Vector_Add( r, -alpha, d, N ); + r_norm = Norm( r, N ); - sig_new = Dot( workspace->r, workspace->d, N ); - sig0 = sig_new; - //Print_Soln( workspace, x, q, b, N ); - //fprintf( stderr, "sig_new: %24.15e, d_norm:%24.15e, q_norm:%24.15e\n", - // sqrt(sig_new), Norm(workspace->d,N), Norm(workspace->q,N) ); - //fprintf( stderr, "sig_new: %f\n", sig_new ); + apply_preconditioner( workspace, control, r, z, FALSE ); - for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig_new) / b_norm > tol; ++i ) - { - //for( i = 0; i < 300 && sig_new > SQR(tol)*sig0; ++i ) { - Sparse_MatVec( H, workspace->d, workspace->q ); - tmp = Dot( workspace->d, workspace->q, N ); - //fprintf( stderr, "tmp: %f\n", tmp ); - alpha = sig_new / tmp; - Vector_Add( x, alpha, workspace->d, N ); - //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n", - // Norm(workspace->d,N), Norm(workspace->q,N), tmp ); + sig_old = sig_new; + sig_new = Dot( r, z, N ); - Vector_Add( workspace->r, -alpha, workspace->q, N ); - for ( j = 0; j < N; ++j ) - { - workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; + beta = sig_new / sig_old; + Vector_Sum( p, 1., z, beta, p, N ); } - sig_old = sig_new; - sig_new = Dot( workspace->r, workspace->p, N ); - beta = sig_new / sig_old; - Vector_Sum( workspace->d, 1., workspace->p, beta, workspace->d, N ); - //fprintf( stderr, "sig_new: %f\n", sig_new ); + #pragma omp single + itr = i; } - fprintf( stderr, "CG took %d iterations\n", i ); - - if ( i >= 300 ) + if ( itr >= control->cm_solver_max_iters ) { - fprintf( stderr, "CG convergence failed!\n" ); - return i; + fprintf( stderr, "[WARNING] CG convergence failed (%d iters)\n", itr ); + return itr; } - return i; + return itr; } /* Steepest Descent */ int SDM( const static_storage * const workspace, const control_params * const control, - const sparse_matrix * const H, const real * const b, const real tol, real * const x, - const FILE * const fout ) + const sparse_matrix * const H, const real * const b, const real tol, + real * const x ) { int i, j, N; real tmp, alpha, beta, b_norm; @@ -1858,7 +1772,6 @@ int SDM( const static_storage * const workspace, const control_params * const co N = H->n; b_norm = Norm( b, N ); - //fprintf( stderr, "b_norm: %10.6f\n", b_norm ); Sparse_MatVec( H, x, workspace->q ); Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); @@ -1886,7 +1799,7 @@ int SDM( const static_storage * const workspace, const control_params * const co } //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n", - // Norm(workspace->d,N), Norm(workspace->q,N), tmp ); + //Norm(workspace->d,N), Norm(workspace->q,N), tmp ); } fprintf( stderr, "SDM took %d iterations\n", i ); diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h index 7085f5a6..f62742a2 100644 --- a/sPuReMD/src/lin_alg.h +++ b/sPuReMD/src/lin_alg.h @@ -24,7 +24,6 @@ #include "mytypes.h" - typedef enum { LOWER = 0, @@ -33,13 +32,16 @@ typedef enum void Transpose( const sparse_matrix * const, sparse_matrix const * ); + void Transpose_I( sparse_matrix * const ); void tri_solve( const sparse_matrix * const, const real * const, real * const, const int, const TRIANGULARITY ); + void tri_solve_level_sched( const sparse_matrix * const, const real * const, real * const, const int, const TRIANGULARITY, int ); + void jacobi_iter( const sparse_matrix * const, const real * const, const real * const, real * const, const TRIANGULARITY, const unsigned int ); @@ -49,21 +51,21 @@ sparse_matrix * setup_graph_coloring( sparse_matrix * const ); int GMRES( const static_storage * const, const control_params * const, simulation_data * const, const sparse_matrix * const, const real * const, const real, real * const, - const FILE * const, const int ); + const int ); int GMRES_HouseHolder( const static_storage * const, const control_params * const, simulation_data * const, const sparse_matrix * const, const real * const, const real, real * const, - const FILE * const, const int ); + const int ); int CG( const static_storage * const, const control_params * const, - const sparse_matrix * const, const real * const, const real, real * const, - const FILE * const ); + const sparse_matrix * const, const real * const, const real, + real * const, const int ); int SDM( const static_storage * const, const control_params * const, - const sparse_matrix * const, const real * const, const real, real * const, - const FILE * const ); + const sparse_matrix * const, const real * const, const real, real * const ); real condest( const sparse_matrix * const, const sparse_matrix * const ); + #endif diff --git a/sPuReMD/src/vector.c b/sPuReMD/src/vector.c index 5cabd03b..24e05de8 100644 --- a/sPuReMD/src/vector.c +++ b/sPuReMD/src/vector.c @@ -166,7 +166,7 @@ inline real Norm( const real * const v1, const unsigned int k ) #pragma omp for reduction(+: ret2) schedule(static) for ( i = 0; i < k; ++i ) { - ret2 += SQR( v1[i] ); + ret2 += SQR( v1[i] ); } return SQRT( ret2 ); -- GitLab