diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c index 7561df494dd1246dd0e51cd8a812d2bd9e90be4a..7b9cbdab2470ae42a1ea5556a8c8a53455aa9400 100644 --- a/sPuReMD/src/charges.c +++ b/sPuReMD/src/charges.c @@ -871,7 +871,6 @@ static void Setup_Preconditioner_EE( const reax_system * const system, const control_params * const control, simulation_data * const data, static_storage * const workspace ) { - int fillin; real time; sparse_matrix *Hptr; @@ -968,7 +967,6 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, const control_params * const control, simulation_data * const data, static_storage * const workspace ) { - int fillin; real time; sparse_matrix *Hptr; diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index 4461a9cd251894040eab34d58aec153e7a809f27..5faf0b8c0f302275fda257649537811ca643e17c 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -618,6 +618,20 @@ static void Init_Workspace( reax_system *system, control_params *control, workspace->rp2 = NULL; } + /* ILUTP preconditioner related */ + if ( control->cm_solver_pre_comp_type == ILUTP_PC ) + { + workspace->perm_ilutp = smalloc( sizeof( int ) * system->N_cm, + "Init_Workspace::workspace->perm_ilutp" ); + workspace->r_p = smalloc( sizeof( real ) * system->N_cm, + "Init_Workspace::workspace->r_p" ); + } + else + { + workspace->perm_ilutp = NULL; + workspace->r_p = NULL; + } + /* integrator storage */ workspace->a = smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->a" ); @@ -1349,6 +1363,13 @@ static void Finalize_Workspace( reax_system *system, control_params *control, sfree( workspace->rp2, "Finalize_Workspace::rp2" ); } + /* ILUTP preconditioner related */ + if ( control->cm_solver_pre_comp_type == ILUTP_PC ) + { + sfree( workspace->perm_ilutp, "Finalize_Workspace::workspace->perm_ilutp" ); + sfree( workspace->r_p, "Finalize_Workspace::workspace->r_p" ); + } + /* integrator storage */ sfree( workspace->a, "Finalize_Workspace::workspace->a" ); sfree( workspace->f_old, "Finalize_Workspace::workspace->f_old" ); diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index 770c1141f18ab5eadafc4c7b6dc39e0cea9b86e1..f65b9f5f96f9b90d0f348956e5b06105a1838ddc 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -2209,6 +2209,28 @@ void sort_rows_by_colors( const static_storage * const workspace, } +/* Apply permutation P*x = x_p + * + * x: vector to permute + * perm: vector containing mapping if computing P*x + * x_p (output): permuted vector + * n: number of entries in x + */ +static void permute_vector( const real * const x, const int * const perm, + real * const x_p, const unsigned int n ) +{ + unsigned int i; + +#ifdef _OPENMP + #pragma omp for schedule(static) +#endif + for ( i = 0; i < n; ++i ) + { + x_p[i] = x[perm[i]]; + } +} + + /* Apply permutation Q^T*x or Q*x based on graph coloring * * workspace: storage container for workspace structures @@ -2218,7 +2240,7 @@ void sort_rows_by_colors( const static_storage * const workspace, * invert_map: if TRUE, use Q^T, otherwise use Q * tri: coloring to triangular factor to use (lower/upper) */ -static void permute_vector( const static_storage * const workspace, +static void permute_vector_gc( const static_storage * const workspace, real * const x, const unsigned int n, const int invert_map, const TRIANGULARITY tri ) { @@ -2514,7 +2536,7 @@ void jacobi_iter( const static_storage * const workspace, * control: data struct containing parameters * y: vector to which to apply preconditioning, * specific to internals of iterative solver being used - * x: preconditioned vector (output) + * x (output): preconditioned vector * fresh_pre: parameter indicating if this is a newly computed (fresh) preconditioner * side: used in determining how to apply preconditioner if the preconditioner is * factorized as M = M_{1}M_{2} (e.g., incomplete LU, A \approx LU) @@ -2522,9 +2544,9 @@ void jacobi_iter( const static_storage * const workspace, * Assumptions: * Matrices have non-zero diagonals * Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */ -static void apply_preconditioner( const static_storage * const workspace, const control_params * const control, - const real * const y, real * const x, const int fresh_pre, - const int side ) +static void apply_preconditioner( const static_storage * const workspace, + const control_params * const control, const real * const y, real * const x, + const int fresh_pre, const int side ) { int i, si; @@ -2551,15 +2573,18 @@ static void apply_preconditioner( const static_storage * const workspace, const break; case ICHOLT_PC: case ILUT_PC: - case ILUTP_PC: case FG_ILUT_PC: tri_solve( workspace->L, y, x, workspace->L->n, LOWER ); break; + case ILUTP_PC: + permute_vector( y, workspace->perm_ilutp, workspace->r_p, workspace->H->n ); + tri_solve( workspace->L, workspace->r_p, x, workspace->L->n, LOWER ); + break; case SAI_PC: Sparse_MatVec_full( workspace->H_app_inv, y, x ); break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; } @@ -2572,16 +2597,20 @@ static void apply_preconditioner( const static_storage * const workspace, const break; case ICHOLT_PC: case ILUT_PC: - case ILUTP_PC: case FG_ILUT_PC: tri_solve_level_sched( (static_storage *) workspace, workspace->L, y, x, workspace->L->n, LOWER, fresh_pre ); break; + case ILUTP_PC: + permute_vector( y, workspace->perm_ilutp, workspace->r_p, workspace->H->n ); + tri_solve_level_sched( (static_storage *) workspace, + workspace->L, workspace->r_p, x, workspace->L->n, LOWER, fresh_pre ); + break; case SAI_PC: Sparse_MatVec_full( workspace->H_app_inv, y, x ); break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; } @@ -2591,12 +2620,11 @@ static void apply_preconditioner( const static_storage * const workspace, const { case JACOBI_PC: case SAI_PC: - fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unsupported preconditioner computation/application method combination. Terminating...\n" ); exit( INVALID_INPUT ); break; case ICHOLT_PC: case ILUT_PC: - case ILUTP_PC: case FG_ILUT_PC: #ifdef _OPENMP #pragma omp for schedule(static) @@ -2606,12 +2634,27 @@ static void apply_preconditioner( const static_storage * const workspace, const workspace->y_p[i] = y[i]; } - permute_vector( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER ); + permute_vector_gc( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER ); + tri_solve_level_sched( (static_storage *) workspace, + workspace->L, workspace->y_p, x, workspace->L->n, LOWER, fresh_pre ); + break; + case ILUTP_PC: + permute_vector( y, workspace->perm_ilutp, workspace->r_p, workspace->H->n ); + +#ifdef _OPENMP + #pragma omp for schedule(static) +#endif + for ( i = 0; i < workspace->H->n; ++i ) + { + workspace->y_p[i] = workspace->r_p[i]; + } + + permute_vector_gc( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER ); tri_solve_level_sched( (static_storage *) workspace, workspace->L, workspace->y_p, x, workspace->L->n, LOWER, fresh_pre ); break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; } @@ -2621,12 +2664,11 @@ static void apply_preconditioner( const static_storage * const workspace, const { case JACOBI_PC: case SAI_PC: - fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unsupported preconditioner computation/application method combination. Terminating...\n" ); exit( INVALID_INPUT ); break; case ICHOLT_PC: case ILUT_PC: - case ILUTP_PC: case FG_ILUT_PC: /* construct D^{-1}_L */ if ( fresh_pre == TRUE ) @@ -2644,14 +2686,33 @@ static void apply_preconditioner( const static_storage * const workspace, const jacobi_iter( workspace, workspace->L, workspace->Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters ); break; + case ILUTP_PC: + permute_vector( y, workspace->perm_ilutp, workspace->r_p, workspace->H->n ); + + /* construct D^{-1}_L */ + if ( fresh_pre == TRUE ) + { +#ifdef _OPENMP + #pragma omp for schedule(static) +#endif + for ( i = 0; i < workspace->L->n; ++i ) + { + si = workspace->L->start[i + 1] - 1; + workspace->Dinv_L[i] = 1.0 / workspace->L->val[si]; + } + } + + jacobi_iter( workspace, workspace->L, workspace->Dinv_L, + workspace->r_p, x, LOWER, control->cm_solver_pre_app_jacobi_iters ); + break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; } break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; @@ -2678,7 +2739,7 @@ static void apply_preconditioner( const static_storage * const workspace, const tri_solve( workspace->U, y, x, workspace->U->n, UPPER ); break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; } @@ -2701,7 +2762,7 @@ static void apply_preconditioner( const static_storage * const workspace, const workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; } @@ -2711,7 +2772,7 @@ static void apply_preconditioner( const static_storage * const workspace, const { case JACOBI_PC: case SAI_PC: - fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unsupported preconditioner computation/application method combination. Terminating...\n" ); exit( INVALID_INPUT ); break; case ICHOLT_PC: @@ -2720,10 +2781,10 @@ static void apply_preconditioner( const static_storage * const workspace, const case FG_ILUT_PC: tri_solve_level_sched( (static_storage *) workspace, workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); - permute_vector( workspace, x, workspace->H->n, TRUE, UPPER ); + permute_vector_gc( workspace, x, workspace->H->n, TRUE, UPPER ); break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; } @@ -2733,7 +2794,7 @@ static void apply_preconditioner( const static_storage * const workspace, const { case JACOBI_PC: case SAI_PC: - fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unsupported preconditioner computation/application method combination. Terminating...\n" ); exit( INVALID_INPUT ); break; case ICHOLT_PC: @@ -2757,13 +2818,13 @@ static void apply_preconditioner( const static_storage * const workspace, const y, x, UPPER, control->cm_solver_pre_app_jacobi_iters ); break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; } break; default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; @@ -2781,7 +2842,7 @@ int GMRES( const static_storage * const workspace, const control_params * const const real tol, real * const x, const int fresh_pre ) { int i, j, k, itr, N, g_j, g_itr; - real cc, tmp1, tmp2, temp, bnorm; + real cc, tmp1, tmp2, temp, bnorm, g_bnorm; real t_start, t_ortho, t_pa, t_spmv, t_ts, t_vops; N = H->n; @@ -2796,7 +2857,7 @@ int GMRES( const static_storage * const workspace, const control_params * const #ifdef _OPENMP #pragma omp parallel default(none) \ private(i, j, k, itr, bnorm, temp, t_start) \ - shared(N, cc, tmp1, tmp2, g_itr, g_j, stderr) \ + shared(N, cc, tmp1, tmp2, g_itr, g_j, g_bnorm, stderr) \ reduction(+: t_ortho, t_pa, t_spmv, t_ts, t_vops) #endif { @@ -2962,6 +3023,7 @@ int GMRES( const static_storage * const workspace, const control_params * const { g_j = j; g_itr = itr; + g_bnorm = bnorm; } } @@ -2974,7 +3036,7 @@ int GMRES( const static_storage * const workspace, const control_params * const if ( g_itr >= control->cm_solver_max_iters ) { fprintf( stderr, "[WARNING] GMRES convergence failed (%d outer iters)\n", g_itr ); - fprintf( stderr, " [INFO] Rel. residual error: %f\n", FABS(workspace->g[g_j]) / bnorm ); + fprintf( stderr, " [INFO] Rel. residual error: %f\n", FABS(workspace->g[g_j]) / g_bnorm ); return g_itr * (control->cm_solver_restart + 1) + g_j + 1; } @@ -2988,7 +3050,7 @@ int GMRES_HouseHolder( const static_storage * const workspace, real * const x, const int fresh_pre ) { int i, j, k, itr, N, g_j, g_itr; - real cc, tmp1, tmp2, temp, bnorm; + real cc, tmp1, tmp2, temp, bnorm, g_bnorm; real v[10000], z[control->cm_solver_restart + 2][10000], w[control->cm_solver_restart + 2]; real u[control->cm_solver_restart + 2][10000]; real t_start, t_ortho, t_pa, t_spmv, t_ts, t_vops; @@ -3004,7 +3066,7 @@ int GMRES_HouseHolder( const static_storage * const workspace, #ifdef _OPENMP #pragma omp parallel default(none) \ private(i, j, k, itr, bnorm, temp, t_start) \ - shared(v, z, w, u, tol, N, cc, tmp1, tmp2, g_itr, g_j, stderr) \ + shared(v, z, w, u, tol, N, cc, tmp1, tmp2, g_itr, g_j, g_bnorm, stderr) \ reduction(+: t_ortho, t_pa, t_spmv, t_ts, t_vops) #endif { @@ -3204,6 +3266,7 @@ int GMRES_HouseHolder( const static_storage * const workspace, { g_j = j; g_itr = itr; + g_bnorm = bnorm; } } @@ -3216,7 +3279,7 @@ int GMRES_HouseHolder( const static_storage * const workspace, if ( g_itr >= control->cm_solver_max_iters ) { fprintf( stderr, "[WARNING] GMRES convergence failed (%d outer iters)\n", g_itr ); - fprintf( stderr, " [INFO] Rel. residual error: %f\n", FABS(w[g_j]) / bnorm ); + fprintf( stderr, " [INFO] Rel. residual error: %f\n", FABS(w[g_j]) / g_bnorm ); return g_itr * (control->cm_solver_restart + 1) + j + 1; } @@ -3230,7 +3293,7 @@ int CG( const static_storage * const workspace, const control_params * const con const real tol, real * const x, const int fresh_pre ) { int i, g_itr, N; - real tmp, alpha, beta, b_norm, r_norm; + real tmp, alpha, beta, bnorm, g_bnorm, rnorm, g_rnorm; real *d, *r, *p, *z; real sig_old, sig_new; real t_start, t_pa, t_spmv, t_vops; @@ -3246,9 +3309,9 @@ int CG( const static_storage * const workspace, const control_params * const con #ifdef _OPENMP #pragma omp parallel default(none) \ - private(i, tmp, alpha, beta, b_norm, r_norm, sig_old, sig_new, t_start) \ + private(i, tmp, alpha, beta, bnorm, rnorm, sig_old, sig_new, t_start) \ reduction(+: t_pa, t_spmv, t_vops) \ - shared(g_itr, N, d, r, p, z) + shared(g_itr, g_bnorm, g_rnorm, N, d, r, p, z) #endif { t_pa = 0.0; @@ -3256,7 +3319,7 @@ int CG( const static_storage * const workspace, const control_params * const con t_vops = 0.0; t_start = Get_Time( ); - b_norm = Norm( b, N ); + bnorm = Norm( b, N ); t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3265,7 +3328,7 @@ int CG( const static_storage * const workspace, const control_params * const con t_start = Get_Time( ); Vector_Sum( r, 1.0, b, -1.0, d, N ); - r_norm = Norm( r, N ); + rnorm = Norm( r, N ); t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3278,7 +3341,7 @@ int CG( const static_storage * const workspace, const control_params * const con sig_new = Dot( r, p, N ); t_vops += Get_Timing_Info( t_start ); - for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i ) + for ( i = 0; i < control->cm_solver_max_iters && rnorm / bnorm > tol; ++i ) { t_start = Get_Time( ); Sparse_MatVec( workspace, H, p, d ); @@ -3289,7 +3352,7 @@ int CG( const static_storage * const workspace, const control_params * const con alpha = sig_new / tmp; Vector_Add( x, alpha, p, N ); Vector_Add( r, -1.0 * alpha, d, N ); - r_norm = Norm( r, N ); + rnorm = Norm( r, N ); t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3308,7 +3371,11 @@ int CG( const static_storage * const workspace, const control_params * const con #ifdef _OPENMP #pragma omp single #endif - g_itr = i; + { + g_itr = i; + g_bnorm = bnorm; + g_rnorm = rnorm; + } } data->timing.cm_solver_pre_app += t_pa / control->num_threads; @@ -3318,7 +3385,7 @@ int CG( const static_storage * const workspace, const control_params * const con if ( g_itr >= control->cm_solver_max_iters ) { fprintf( stderr, "[WARNING] CG convergence failed (%d iters)\n", g_itr ); - fprintf( stderr, " [INFO] Rel. residual error: %f\n", r_norm / b_norm ); + fprintf( stderr, " [INFO] Rel. residual error: %f\n", g_rnorm / g_bnorm ); return g_itr; } @@ -3343,7 +3410,7 @@ int BiCGStab( const static_storage * const workspace, const control_params * con const real tol, real * const x, const int fresh_pre ) { int i, g_itr, N; - real tmp, alpha, beta, omega, rho, rho_old, sigma, r_norm, b_norm; + real tmp, alpha, beta, omega, rho, rho_old, sigma, rnorm, g_rnorm, bnorm, g_bnorm; real t_start, t_pa, t_spmv, t_vops; N = H->n; @@ -3353,9 +3420,9 @@ int BiCGStab( const static_storage * const workspace, const control_params * con #ifdef _OPENMP #pragma omp parallel default(none) \ - private(i, tmp, alpha, beta, omega, rho, rho_old, sigma, r_norm, b_norm, t_start) \ + private(i, tmp, alpha, beta, omega, rho, rho_old, sigma, rnorm, bnorm, t_start) \ reduction(+: t_pa, t_spmv, t_vops) \ - shared(g_itr, N) + shared(g_itr, g_rnorm, g_bnorm, N) #endif { t_pa = 0.0; @@ -3367,18 +3434,18 @@ int BiCGStab( const static_storage * const workspace, const control_params * con t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); - b_norm = Norm( b, N ); + bnorm = Norm( b, N ); Vector_Sum( workspace->r, 1.0, b, -1.0, workspace->d, N ); t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); Vector_Copy( workspace->r_hat, workspace->r, N ); - r_norm = Norm( workspace->r, N ); + rnorm = Norm( workspace->r, N ); Vector_Copy( workspace->p, workspace->r, N ); rho_old = Dot( workspace->r, workspace->r_hat, N ); t_vops += Get_Timing_Info( t_start ); - for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i ) + for ( i = 0; i < control->cm_solver_max_iters && rnorm / bnorm > tol; ++i ) { t_start = Get_Time( ); Sparse_MatVec( workspace, H, workspace->p, workspace->d ); @@ -3401,7 +3468,7 @@ int BiCGStab( const static_storage * const workspace, const control_params * con Vector_Sum( workspace->z, alpha, workspace->p, omega, workspace->q, N ); Vector_Add( x, 1.0, workspace->z, N ); Vector_Sum( workspace->r, 1.0, workspace->q, -1.0 * omega, workspace->y, N ); - r_norm = Norm( workspace->r, N ); + rnorm = Norm( workspace->r, N ); t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3416,7 +3483,11 @@ int BiCGStab( const static_storage * const workspace, const control_params * con #ifdef _OPENMP #pragma omp single #endif - g_itr = i; + { + g_itr = i; + g_rnorm = rnorm; + g_bnorm = bnorm; + } } data->timing.cm_solver_pre_app += t_pa / control->num_threads; @@ -3426,7 +3497,7 @@ int BiCGStab( const static_storage * const workspace, const control_params * con if ( g_itr >= control->cm_solver_max_iters ) { fprintf( stderr, "[WARNING] BiCGStab convergence failed (%d iters)\n", g_itr ); - fprintf( stderr, " [INFO] Rel. residual error: %f\n", r_norm / b_norm ); + fprintf( stderr, " [INFO] Rel. residual error: %f\n", g_rnorm / g_bnorm ); return g_itr; } @@ -3440,8 +3511,8 @@ int SDM( const static_storage * const workspace, const control_params * const co const real tol, real * const x, const int fresh_pre ) { int i, g_itr, N; - real tmp, alpha, b_norm; - real sig; + real tmp, alpha, bnorm, g_bnorm; + real sig, g_sig; real t_start, t_pa, t_spmv, t_vops; N = H->n; @@ -3451,9 +3522,9 @@ int SDM( const static_storage * const workspace, const control_params * const co #ifdef _OPENMP #pragma omp parallel default(none) \ - private(i, tmp, alpha, b_norm, sig, t_start) \ + private(i, tmp, alpha, bnorm, sig, t_start) \ reduction(+: t_pa, t_spmv, t_vops) \ - shared(g_itr, N) + shared(g_itr, g_sig, g_bnorm, N) #endif { t_pa = 0.0; @@ -3461,7 +3532,7 @@ int SDM( const static_storage * const workspace, const control_params * const co t_vops = 0.0; t_start = Get_Time( ); - b_norm = Norm( b, N ); + bnorm = Norm( b, N ); t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3481,7 +3552,7 @@ int SDM( const static_storage * const workspace, const control_params * const co sig = Dot( workspace->r, workspace->d, N ); t_vops += Get_Timing_Info( t_start ); - for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / b_norm > tol; ++i ) + for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / bnorm > tol; ++i ) { t_start = Get_Time( ); Sparse_MatVec( workspace, H, workspace->d, workspace->q ); @@ -3514,7 +3585,11 @@ int SDM( const static_storage * const workspace, const control_params * const co #ifdef _OPENMP #pragma omp single #endif - g_itr = i; + { + g_itr = i; + g_sig = sig; + g_bnorm = bnorm; + } } data->timing.cm_solver_pre_app += t_pa / control->num_threads; @@ -3524,7 +3599,7 @@ int SDM( const static_storage * const workspace, const control_params * const co if ( g_itr >= control->cm_solver_max_iters ) { fprintf( stderr, "[WARNING] SDM convergence failed (%d iters)\n", g_itr ); - fprintf( stderr, " [INFO] Rel. residual error: %f\n", SQRT(sig) / b_norm ); + fprintf( stderr, " [INFO] Rel. residual error: %f\n", SQRT(g_sig) / g_bnorm ); return g_itr; } diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c index aeabf83ca9e54fea713269cf7757748a8ad4f231..602cc4ada1586eb47cf1822e7c71ef0f63185855 100644 --- a/sPuReMD/src/print_utils.c +++ b/sPuReMD/src/print_utils.c @@ -884,7 +884,7 @@ void Read_Sparse_Matrix2( sparse_matrix *A, char *fname ) A->start[cur_row] = top; - while ( fscanf( f, "%6d %6d %24.15lf", &row, &col, &val ) == 3 ) + while ( fscanf( f, "%6d %6d %24lf", &row, &col, &val ) == 3 ) { /* assumption: every row has at least one nonzero (diagonal) */ if ( cur_row != row - 1 ) @@ -904,6 +904,28 @@ void Read_Sparse_Matrix2( sparse_matrix *A, char *fname ) } +/* Read permuation matrix in COO format (1-based indexing) + * and store in a dense vector (preallocated) + * + * Note: the file must be sorted in increasing order of + * columns than rows */ +void Read_Permutation_Matrix( int *v, char *fname ) +{ + int row, col; + real val; + FILE *f; + + f = sfopen( fname, "r" ); + + while ( fscanf( f, "%6d %6d %24lf", &row, &col, &val ) == 3 ) + { + v[row - 1] = col - 1; + } + + sfclose( f, "Read_Permuation_Matrix::f" ); +} + + /* Note: watch out for portability issues with endianness * due to serialization of numeric types (integer, IEEE 754) */ void Print_Sparse_Matrix_Binary( sparse_matrix *A, char *fname ) diff --git a/sPuReMD/src/print_utils.h b/sPuReMD/src/print_utils.h index 8496a405d716a356d753ac24ee49ae400f3164ac..1852ca31e5d4a4d5ad27063633029f303edef5b7 100644 --- a/sPuReMD/src/print_utils.h +++ b/sPuReMD/src/print_utils.h @@ -56,6 +56,8 @@ void Print_Sparse_Matrix2( sparse_matrix*, char*, char* ); void Read_Sparse_Matrix2( sparse_matrix *, char * ); +void Read_Permutation_Matrix( int *, char * ); + void Print_Sparse_Matrix_Binary( sparse_matrix*, char* ); void Print_Bonds( reax_system*, reax_list*, char* ); diff --git a/sPuReMD/src/reax_types.h b/sPuReMD/src/reax_types.h index e26363a48b042d661d554de7919ccfc59dc0c9c5..b0fa09a758288dff14e49750e8e44cce9caa2b0d 100644 --- a/sPuReMD/src/reax_types.h +++ b/sPuReMD/src/reax_types.h @@ -1116,7 +1116,7 @@ struct static_storage real *b_local; #endif - /* Level scheduling related storage for applying, e.g. ICHOLT and ILU(T), + /* Level scheduling related storage for applying ICHOLT/ILUT(P)/FG-ILUT * preconditioners */ int levels_L; int levels_U; @@ -1128,7 +1128,7 @@ struct static_storage unsigned int *level_rows_cnt_U; unsigned int *top; - /* Graph coloring related storage for applying, e.g. ICHOLT and ILU(T), + /* Graph coloring related storage for applying ICHOLT/ILUT(P)/FG-ILUT * preconditioners */ unsigned int *color; unsigned int *to_color; @@ -1142,7 +1142,7 @@ struct static_storage real *y_p; real *x_p; - /* Jacobi iteration related storage for applying, e.g. ICHOLT and ILU(T), + /* Jacobi iteration related storage for applying ICHOLT/ILUT(P)/FG-ILUT * preconditioners */ real *Dinv_L; real *Dinv_U; @@ -1150,6 +1150,11 @@ struct static_storage real *rp; real *rp2; + /* permutation for ILUTP */ + int *perm_ilutp; + /* permuted residual for preconditioning with ILUTP */ + real *r_p; + int num_H; int *hbond_index; // for hydrogen bonds