diff --git a/sPuReMD/src/GMRES.c b/sPuReMD/src/GMRES.c index 6f526aeb12fd56b9d266ffbf670f5f5533b3a78b..9037d314162cc56ffb09778d630835181097d17f 100644 --- a/sPuReMD/src/GMRES.c +++ b/sPuReMD/src/GMRES.c @@ -22,6 +22,7 @@ #include "GMRES.h" #include "allocate.h" #include "list.h" +#include "tool_box.h" #include "vector.h" @@ -38,7 +39,7 @@ typedef enum * x: vector * b: vector (result) */ static void Sparse_MatVec( const sparse_matrix * const A, - const real * const x, real * const b ) + const real * const x, real * const b ) { int i, j, k, n, si, ei; real H; @@ -124,8 +125,8 @@ static void Sparse_MatVec( const sparse_matrix * const A, * b: vector (result) * D^{-1}b (Dinv_b): precomputed vector-vector product */ static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R, - const TRIANGULARITY tri, const real * const Dinv, - const real * const x, real * const b, const real * const Dinv_b) + const TRIANGULARITY tri, const real * const Dinv, + const real * const x, real * const b, const real * const Dinv_b) { int i, k, si = 0, ei = 0; #ifdef _OPENMP @@ -203,109 +204,157 @@ static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R, } -/* solve sparse lower triangular linear system using forward substitution */ -static void Forward_Subs( const sparse_matrix * const L, const real * const b, real * const y ) +static void diag_pre_app( const real * const Hdia_inv, const real * const y, + real * const x, const int N ) { - int i, pj, j, si, ei; - real val; + unsigned int i; - for ( i = 0; i < L->n; ++i ) + #pragma omp parallel for schedule(guided) \ + default(none) private(i) shared(stderr) + for ( i = 0; i < N; ++i ) { - y[i] = b[i]; - si = L->start[i]; - ei = L->start[i + 1]; - for ( pj = si; pj < ei - 1; ++pj ) - { - j = L->j[pj]; - val = L->val[pj]; - y[i] -= val * y[j]; - } - y[i] /= L->val[pj]; + x[i] = y[i] * Hdia_inv[i]; } } -/* solve sparse upper triangular linear system using backward substitution */ -static void Backward_Subs( const sparse_matrix * const U, const real * const y, real * const x ) +/* Solve triangular system LU*x = y using level scheduling + * + * LU: lower/upper triangular, stored in CSR + * y: constants in linear system (RHS) + * x: solution + * tri: triangularity of LU (lower/upper) + * + * Assumptions: + * LU has non-zero diagonals + * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ +static void tri_solve( const sparse_matrix * const LU, const real * const y, + real * const x, const TRIANGULARITY tri ) { int i, pj, j, si, ei; real val; - for ( i = U->n - 1; i >= 0; --i ) + if ( tri == LOWER ) { - x[i] = y[i]; - si = U->start[i]; - ei = U->start[i + 1]; - for ( pj = si + 1; pj < ei; ++pj ) + for ( i = 0; i < LU->n; ++i ) { - j = U->j[pj]; - val = U->val[pj]; - x[i] -= val * x[j]; + x[i] = y[i]; + si = LU->start[i]; + ei = LU->start[i + 1]; + for ( pj = si; pj < ei - 1; ++pj ) + { + j = LU->j[pj]; + val = LU->val[pj]; + x[i] -= val * x[j]; + } + x[i] /= LU->val[pj]; + } + } + else + { + for ( i = LU->n - 1; i >= 0; --i ) + { + x[i] = y[i]; + si = LU->start[i]; + ei = LU->start[i + 1]; + for ( pj = si + 1; pj < ei; ++pj ) + { + j = LU->j[pj]; + val = LU->val[pj]; + x[i] -= val * x[j]; + } + x[i] /= LU->val[si]; } - x[i] /= U->val[si]; } } -/* Triangular solve LU*x = y using level scheduling +/* Solve triangular system LU*x = y using level scheduling * * LU: lower/upper triangular, stored in CSR * y: constants in linear system (RHS) * x: solution - * tri: triangularity of A (lower/upper) + * tri: triangularity of LU (lower/upper) + * find_levels: perform level search if positive, otherwise reuse existing levels * * Assumptions: * LU has non-zero diagonals * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ -static void tri_solve_level_sched( sparse_matrix * const LU, const real * const y, - real * const x, const TRIANGULARITY tri ) +static void tri_solve_level_sched( const sparse_matrix * const LU, const real * const y, + real * const x, const TRIANGULARITY tri, int find_levels ) { - int i, j, pj, local_row, local_level, levels = 1; + int i, j, pj, local_row, local_level, levels; + static int levels_L = 1, levels_U = 1; + static unsigned int *row_levels_L = NULL, *level_rows_L = NULL, *level_rows_cnt_L = NULL; + static unsigned int *row_levels_U = NULL, *level_rows_U = NULL, *level_rows_cnt_U = NULL; unsigned int *row_levels, *level_rows, *level_rows_cnt; - - if ( (row_levels = (unsigned int*) calloc(LU->n, sizeof(unsigned int))) == NULL - || (level_rows = (unsigned int*) malloc(LU->n * LU->n * sizeof(unsigned int))) == NULL - || (level_rows_cnt = (unsigned int*) calloc(LU->n, sizeof(unsigned int))) == NULL ) + + if ( tri == LOWER ) { - fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); + row_levels = row_levels_L; + level_rows = level_rows_L; + level_rows_cnt = level_rows_cnt_L; + levels = levels_L; + } + else + { + row_levels = row_levels_U; + level_rows = level_rows_U; + level_rows_cnt = level_rows_cnt_U; + levels = levels_U; + } + + if ( row_levels == NULL || level_rows == NULL || level_rows_cnt == NULL ) + { + if ( (row_levels = (unsigned int*) malloc(LU->n * sizeof(unsigned int))) == NULL + || (level_rows = (unsigned int*) malloc(LU->n * LU->n * sizeof(unsigned int))) == NULL + || (level_rows_cnt = (unsigned int*) malloc(LU->n * sizeof(unsigned int))) == NULL ) + { + fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } } /* find levels (row dependencies in substitutions) */ - if ( tri == LOWER ) + if ( find_levels ) { - for ( i = 0; i < LU->n; ++i ) + memset( row_levels, 0, LU->n * sizeof( unsigned int) ); + memset( level_rows_cnt, 0, LU->n * sizeof( unsigned int) ); + + if ( tri == LOWER ) { - local_level = 0; - for ( pj = LU->start[i]; pj < LU->start[i + 1] - 1; ++pj ) + for ( i = 0; i < LU->n; ++i ) { - local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); + local_level = 0; + for ( pj = LU->start[i]; pj < LU->start[i + 1] - 1; ++pj ) + { + local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); + } + + levels = MAX( levels, local_level + 1 ); + row_levels[i] = local_level; + level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i; + ++level_rows_cnt[local_level]; } - - levels = MAX( levels, local_level + 1 ); - row_levels[i] = local_level; - level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i; - ++level_rows_cnt[local_level]; } - } - else - { - for ( i = LU->n - 1; i >= 0; --i ) + else { - local_level = 0; - for ( pj = LU->start[i] + 1; pj < LU->start[i + 1]; ++pj ) + for ( i = LU->n - 1; i >= 0; --i ) { - local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); + local_level = 0; + for ( pj = LU->start[i] + 1; pj < LU->start[i + 1]; ++pj ) + { + local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); + } + + levels = MAX( levels, local_level + 1 ); + row_levels[i] = local_level; + level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i; + ++level_rows_cnt[local_level]; } - - levels = MAX( levels, local_level + 1 ); - row_levels[i] = local_level; - level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i; - ++level_rows_cnt[local_level]; } } - /* perform substitutions by level */ if ( tri == LOWER ) { @@ -346,9 +395,21 @@ static void tri_solve_level_sched( sparse_matrix * const LU, const real * const } } - free( level_rows_cnt ); - free( level_rows ); - free( row_levels ); + /* save level info for re-use if performing repeated triangular solves via preconditioning */ + if ( tri == LOWER ) + { + row_levels_L = row_levels; + level_rows_L = level_rows; + level_rows_cnt_L = level_rows_cnt; + levels_L = levels; + } + else + { + row_levels_U = row_levels; + level_rows_U = level_rows; + level_rows_cnt_U = level_rows_cnt; + levels_U = levels; + } } @@ -363,9 +424,9 @@ static void tri_solve_level_sched( sparse_matrix * const LU, const real * const * * Note: Newmann series arises from series expansion of the inverse of * the coefficient matrix in the triangular system */ -static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, - const real * const Dinv, const unsigned int n, - const real * const b, real * const x, const unsigned int maxiter ) +static void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, + const real * const b, real * const x, const TRIANGULARITY tri, + const unsigned int maxiter ) { unsigned int i, k, si = 0, ei = 0, iter = 0; #ifdef _OPENMP @@ -387,7 +448,7 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, * overhead per Sparse_MatVec call*/ if ( Dinv_b == NULL ) { - if ( (Dinv_b = (real*) malloc(sizeof(real) * n)) == NULL ) + if ( (Dinv_b = (real*) malloc(sizeof(real) * R->n)) == NULL ) { fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -395,7 +456,7 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, } if ( rp == NULL ) { - if ( (rp = (real*) malloc(sizeof(real) * n)) == NULL ) + if ( (rp = (real*) malloc(sizeof(real) * R->n)) == NULL ) { fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -403,7 +464,7 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, } if ( rp2 == NULL ) { - if ( (rp2 = (real*) malloc(sizeof(real) * n)) == NULL ) + if ( (rp2 = (real*) malloc(sizeof(real) * R->n)) == NULL ) { fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -420,14 +481,14 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, } #endif - Vector_MakeZero( rp, n ); + Vector_MakeZero( rp, R->n ); } #pragma omp barrier /* precompute and cache, as invariant in loop below */ #pragma omp for schedule(guided) - for ( i = 0; i < n; ++i ) + for ( i = 0; i < R->n; ++i ) { Dinv_b[i] = Dinv[i] * b[i]; } @@ -456,7 +517,7 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, si = R->start[i]; ei = R->start[i + 1] - 1; } - else if (tri == UPPER) + else { si = R->start[i] + 1; @@ -506,42 +567,195 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, while ( iter < maxiter ); } - Vector_Copy( x, rp, n ); + Vector_Copy( x, rp, R->n ); } -/* generalized minimual residual iterative solver for sparse linear systems, - * diagonal preconditioner */ -int GMRES( static_storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, FILE *fout, real *time, real *spmv_time ) +/* Solve triangular system LU*x = y using level scheduling + * + * workspace: lower/upper triangular, stored in CSR + * y: constants in linear system (RHS) + * x: solution + * tri: triangularity of preconditoning factor LU (lower/upper), if using ILU-based + * pc_type: preconditioner computation method used + * pa_type: preconditioner application method to use + * extra: parameter for some application methods, if applicable + * + * Assumptions: + * LU has non-zero diagonals + * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ +void apply_preconditioner( const static_storage * const workspace, real * const y, + real * const x, const TRIANGULARITY tri, int pc_type, int pa_type, int extra ) { - int i, j, k, itr, N; - real cc, tmp1, tmp2, temp, bnorm; - struct timeval start, stop; + int i, si; + sparse_matrix *LU; + static real *Dinv_L = NULL, *Dinv_U = NULL; + + if ( tri == LOWER ) + { + LU = workspace->L; + } + else + { + LU = workspace->U; + } + + if ( ( pc_type != DIAG_PC || tri != LOWER ) && LU == NULL ) + { + return; + } + + switch ( pa_type ) + { + case NONE_PA: + break; + case TRI_SOLVE_PA: + switch ( pc_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + tri_solve( LU, y, x, tri ); + break; + default: + break; + } + break; + case TRI_SOLVE_LEVEL_SCHED_PA: + switch ( pc_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + tri_solve_level_sched( LU, y, x, tri, extra ); + break; + default: + break; + } + break; + case JACOBI_ITER_PA: + switch ( pc_type ) + { + case DIAG_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + if ( tri == LOWER ) + { + if ( Dinv_L == NULL ) + { + if ( (Dinv_L = (real*) malloc(sizeof(real) * LU->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + /* construct D^{-1} */ + for ( i = 0; i < LU->n; ++i ) + { + si = LU->start[i + 1] - 1; + Dinv_L[i] = 1. / LU->val[si]; + } + + jacobi_iter( LU, Dinv_L, y, x, tri, extra ); + } + else + { + if ( Dinv_U == NULL ) + { + if ( (Dinv_U = (real*) malloc(sizeof(real) * LU->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + /* construct D^{-1}_U */ + for ( i = 0; i < LU->n; ++i ) + { + si = LU->start[i]; + Dinv_U[i] = 1. / LU->val[si]; + } + + jacobi_iter( LU, Dinv_U, y, x, tri, extra ); + } + break; + default: + break; + } + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + + } + + return; +} + + +/* generalized minimual residual iterative solver for sparse linear systems */ +int GMRES( const static_storage * const workspace, const control_params * const control, + const sparse_matrix * const H, const real * const b, real tol, real * const x, + const FILE * const fout, real * const time, real * const spmv_time ) +{ + int i, j, k, itr, N, si; + real cc, tmp1, tmp2, temp, bnorm, time_start; N = H->n; bnorm = Norm( b, N ); - /* apply the diagonal pre-conditioner to rhs */ - gettimeofday( &start, NULL ); - for ( i = 0; i < N; ++i ) - workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); + + if ( control->pre_comp_type == DIAG_PC ) + { + /* apply precondioner to RHS */ + apply_preconditioner( workspace, workspace->b, workspace->b_prc, LOWER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + } /* GMRES outer-loop */ for ( itr = 0; itr < MAX_ITR; ++itr ) { /* calculate r0 */ - gettimeofday( &start, NULL ); + time_start = Get_Time( ); Sparse_MatVec( H, x, workspace->b_prm ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - for ( i = 0; i < N; ++i ) - workspace->b_prm[i] *= workspace->Hdia_inv[i]; /* pre-conditioner */ + *spmv_time += Get_Timing_Info( time_start ); + + if ( control->pre_comp_type == DIAG_PC ) + { + time_start = Get_Time( ); + apply_preconditioner( workspace, workspace->b_prm, workspace->b_prm, LOWER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + *time += Get_Timing_Info( time_start ); + } + + + if ( control->pre_comp_type == DIAG_PC ) + { + Vector_Sum( workspace->v[0], 1., workspace->b_prc, -1., workspace->b_prm, N ); + } + else + { + Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N ); + } + + if ( control->pre_comp_type != DIAG_PC ) + { + time_start = Get_Time( ); + apply_preconditioner( workspace, workspace->v[0], workspace->v[0], LOWER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + apply_preconditioner( workspace, workspace->v[0], workspace->v[0], UPPER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + *time += Get_Timing_Info( time_start ); + } - Vector_Sum(workspace->v[0], 1., workspace->b_prc, -1., workspace->b_prm, N); workspace->g[0] = Norm( workspace->v[0], N ); Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N ); //fprintf( stderr, "res: %.15e\n", workspace->g[0] ); @@ -550,26 +764,28 @@ int GMRES( static_storage *workspace, sparse_matrix *H, for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) { /* matvec */ - gettimeofday( &start, NULL ); + time_start = Get_Time( ); Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - /*pre-conditioner*/ - gettimeofday( &start, NULL ); - for ( k = 0; k < N; ++k ) - workspace->v[j + 1][k] *= workspace->Hdia_inv[k]; - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - //fprintf( stderr, "%d-%d: matvec done.\n", itr, j ); + *spmv_time += Get_Timing_Info( time_start ); + + time_start = Get_Time( ); + apply_preconditioner( workspace, workspace->v[j + 1], workspace->v[j + 1], LOWER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + apply_preconditioner( workspace, workspace->v[j + 1], workspace->v[j + 1], UPPER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + *time += Get_Timing_Info( time_start ); /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for ( i = 0; i <= j; i++ ) + for ( i = 0; i < j - 1; i++ ) + { + workspace->h[i][j] = 0; + } + + //for( i = 0; i <= j; i++ ) { + for ( i = MAX(j - 1, 0); i <= j; i++ ) { workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N ); - Vector_Add( workspace->v[j + 1], - -workspace->h[i][j], workspace->v[i], N ); + Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); } workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N ); @@ -577,9 +793,8 @@ int GMRES( static_storage *workspace, sparse_matrix *H, 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); - /* Givens rotations on the upper-Hessenberg matrix to make it U */ - for ( i = 0; i <= j; i++ ) + for ( i = MAX(j - 1, 0); i <= j; i++ ) { if ( i == j ) { @@ -603,32 +818,41 @@ int GMRES( static_storage *workspace, sparse_matrix *H, workspace->g[j] = tmp1; workspace->g[j + 1] = tmp2; - // fprintf( stderr, "h: " ); - // for( i = 0; i <= j+1; ++i ) - // fprintf( stderr, "%.6f ", workspace->h[i][j] ); - // fprintf( stderr, "\n" ); + //fprintf( stderr, "h: " ); + //for( i = 0; i <= j+1; ++i ) + //fprintf( stderr, "%.6f ", workspace->h[i][j] ); + //fprintf( stderr, "\n" ); //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] ); } - /* solve Hy = g. - H is now upper-triangular, do back-substitution */ + /* TODO: solve using Jacobi iteration? */ + /* solve Hy = g: H is now upper-triangular, do back-substitution */ for ( i = j - 1; i >= 0; i-- ) { temp = workspace->g[i]; for ( k = j - 1; k > i; k-- ) + { temp -= workspace->h[i][k] * workspace->y[k]; + } workspace->y[i] = temp / workspace->h[i][i]; } /* update x = x_0 + Vy */ + Vector_MakeZero( workspace->p, N ); for ( i = 0; i < j; i++ ) - Vector_Add( x, workspace->y[i], workspace->v[i], N ); + { + Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N ); + } + + Vector_Add( x, 1., workspace->p, N ); /* stopping condition */ if ( fabs(workspace->g[j]) / bnorm <= tol ) + { break; + } } // Sparse_MatVec( H, x, workspace->b_prm ); @@ -654,9 +878,9 @@ int GMRES( static_storage *workspace, sparse_matrix *H, } - -int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, FILE *fout) +int GMRES_HouseHolder( const static_storage * const workspace, const control_params * const control, + const sparse_matrix * const H, const real * const b, real tol, real * const x, + const FILE * const fout, real * const time, real * const spmv_time ) { int i, j, k, itr, N; real cc, tmp1, tmp2, temp, bnorm; @@ -844,330 +1068,7 @@ int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H, } -/* generalized minimual residual iterative solver for sparse linear systems, - * with preconditioner using factors LU \approx H - * and forward / backward substitution */ -int PGMRES( static_storage *workspace, sparse_matrix *H, real *b, real tol, - sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout, real *time, real *spmv_time ) -{ - int i, j, k, itr, N; - real cc, tmp1, tmp2, temp, bnorm; - struct timeval start, stop; - - N = H->n; - bnorm = Norm( b, N ); - - /* GMRES outer-loop */ - for ( itr = 0; itr < MAX_ITR; ++itr ) - { - /* calculate r0 */ - gettimeofday( &start, NULL ); - Sparse_MatVec( H, x, workspace->b_prm ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N ); - gettimeofday( &start, NULL ); - Forward_Subs( L, workspace->v[0], workspace->v[0] ); - Backward_Subs( U, workspace->v[0], workspace->v[0] ); - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - workspace->g[0] = Norm( workspace->v[0], N ); - Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N ); - //fprintf( stderr, "res: %.15e\n", workspace->g[0] ); - - /* GMRES inner-loop */ - for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) - { - /* matvec */ - gettimeofday( &start, NULL ); - Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - gettimeofday( &start, NULL ); - Forward_Subs( L, workspace->v[j + 1], workspace->v[j + 1] ); - Backward_Subs( U, workspace->v[j + 1], workspace->v[j + 1] ); - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - - /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for ( i = 0; i < j - 1; i++ ) workspace->h[i][j] = 0; - - //for( i = 0; i <= j; i++ ) { - for ( i = MAX(j - 1, 0); i <= j; i++ ) - { - workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N ); - Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); - } - - workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N ); - Vector_Scale( workspace->v[j + 1], - 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); - // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); - - /* Givens rotations on the upper-Hessenberg matrix to make it U */ - for ( i = MAX(j - 1, 0); i <= j; i++ ) - { - if ( i == j ) - { - cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) ); - workspace->hc[j] = workspace->h[j][j] / cc; - workspace->hs[j] = workspace->h[j + 1][j] / cc; - } - - tmp1 = workspace->hc[i] * workspace->h[i][j] + - workspace->hs[i] * workspace->h[i + 1][j]; - tmp2 = -workspace->hs[i] * workspace->h[i][j] + - workspace->hc[i] * workspace->h[i + 1][j]; - - workspace->h[i][j] = tmp1; - workspace->h[i + 1][j] = tmp2; - } - - /* apply Givens rotations to the rhs as well */ - tmp1 = workspace->hc[j] * workspace->g[j]; - tmp2 = -workspace->hs[j] * workspace->g[j]; - workspace->g[j] = tmp1; - workspace->g[j + 1] = tmp2; - - //fprintf( stderr, "h: " ); - //for( i = 0; i <= j+1; ++i ) - //fprintf( stderr, "%.6f ", workspace->h[i][j] ); - //fprintf( stderr, "\n" ); - //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] ); - } - - - /* solve Hy = g: H is now upper-triangular, do back-substitution */ - for ( i = j - 1; i >= 0; i-- ) - { - temp = workspace->g[i]; - for ( k = j - 1; k > i; k-- ) - temp -= workspace->h[i][k] * workspace->y[k]; - - workspace->y[i] = temp / workspace->h[i][i]; - } - - /* update x = x_0 + Vy */ - Vector_MakeZero( workspace->p, N ); - for ( i = 0; i < j; i++ ) - Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N ); - //Backward_Subs( U, workspace->p, workspace->p ); - //Forward_Subs( L, workspace->p, workspace->p ); - Vector_Add( x, 1., workspace->p, N ); - - /* stopping condition */ - if ( fabs(workspace->g[j]) / bnorm <= tol ) - break; - } - - // Sparse_MatVec( H, x, workspace->b_prm ); - // for( i = 0; i < N; ++i ) - // workspace->b_prm[i] *= workspace->Hdia_inv[i]; - // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", - // workspace->b_prc[i], workspace->b_prm[i], x[i] );*/ - - // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n", - // itr, j, fabs( workspace->g[j] ) / bnorm ); - // data->timing.matvec += itr * RESTART + j; - - if ( itr >= MAX_ITR ) - { - fprintf( stderr, "GMRES convergence failed\n" ); - // return -1; - return itr * (RESTART + 1) + j + 1; - } - - return itr * (RESTART + 1) + j + 1; -} - - -/* generalized minimual residual iterative solver for sparse linear systems, - * with preconditioner using factors LU \approx H - * and Jacobi iteration for approximate factor application */ -int PGMRES_Jacobi( static_storage *workspace, sparse_matrix *H, real *b, real tol, - sparse_matrix *L, sparse_matrix *U, real *x, unsigned int iters, - FILE *fout, real *time, real *spmv_time ) -{ - int i, j, k, itr, N, si; - real cc, tmp1, tmp2, temp, bnorm; - real *Dinv_L, *Dinv_U; - struct timeval start, stop; - - N = H->n; - bnorm = Norm( b, N ); - - /* Compute Jacobi iteration matrices from - * truncated Newmann series: x_{k+1} = Gx_k + D^{-1}b - * where: - * G = I - D^{-1}R - * R = triangular matrix - * D = diagonal matrix, diagonals from R */ - if ( (Dinv_L = (real*) malloc(sizeof(real) * N)) == NULL - || (Dinv_U = (real*) malloc(sizeof(real) * N)) == NULL ) - { - fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } - - /* construct D^{-1}_L and D^{-1}_U */ - for ( i = 0; i < N; ++i ) - { - si = L->start[i + 1] - 1; - Dinv_L[i] = 1. / L->val[si]; - - si = U->start[i]; - Dinv_U[i] = 1. / U->val[si]; - } - - /* GMRES outer-loop */ - for ( itr = 0; itr < MAX_ITR; ++itr ) - { - /* calculate r0 */ - gettimeofday( &start, NULL ); - Sparse_MatVec( H, x, workspace->b_prm ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N ); - gettimeofday( &start, NULL ); - Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[0], workspace->v[0], iters ); - Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[0], workspace->v[0], iters ); - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - workspace->g[0] = Norm( workspace->v[0], N ); - Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N ); - //fprintf( stderr, "res: %.15e\n", workspace->g[0] ); - - /* GMRES inner-loop */ - for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) - { - /* matvec */ - gettimeofday( &start, NULL ); - Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - gettimeofday( &start, NULL ); - Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[j + 1], workspace->v[j + 1], iters ); - Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[j + 1], workspace->v[j + 1], iters ); - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - - /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for ( i = 0; i < j - 1; i++ ) - { - workspace->h[i][j] = 0; - } - - //for( i = 0; i <= j; i++ ) { - for ( i = MAX(j - 1, 0); i <= j; i++ ) - { - workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N ); - Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); - } - - workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N ); - Vector_Scale( workspace->v[j + 1], - 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); - // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); - - /* Givens rotations on the upper-Hessenberg matrix to make it U */ - for ( i = MAX(j - 1, 0); i <= j; i++ ) - { - if ( i == j ) - { - cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) ); - workspace->hc[j] = workspace->h[j][j] / cc; - workspace->hs[j] = workspace->h[j + 1][j] / cc; - } - - tmp1 = workspace->hc[i] * workspace->h[i][j] + - workspace->hs[i] * workspace->h[i + 1][j]; - tmp2 = -workspace->hs[i] * workspace->h[i][j] + - workspace->hc[i] * workspace->h[i + 1][j]; - - workspace->h[i][j] = tmp1; - workspace->h[i + 1][j] = tmp2; - } - - /* apply Givens rotations to the rhs as well */ - tmp1 = workspace->hc[j] * workspace->g[j]; - tmp2 = -workspace->hs[j] * workspace->g[j]; - workspace->g[j] = tmp1; - workspace->g[j + 1] = tmp2; - - //fprintf( stderr, "h: " ); - //for( i = 0; i <= j+1; ++i ) - //fprintf( stderr, "%.6f ", workspace->h[i][j] ); - //fprintf( stderr, "\n" ); - //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] ); - } - - - /* TODO: solve using Jacobi iteration? */ - /* solve Hy = g: H is now upper-triangular, do back-substitution */ - for ( i = j - 1; i >= 0; i-- ) - { - temp = workspace->g[i]; - for ( k = j - 1; k > i; k-- ) - { - temp -= workspace->h[i][k] * workspace->y[k]; - } - - workspace->y[i] = temp / workspace->h[i][i]; - } - - /* update x = x_0 + Vy */ - Vector_MakeZero( workspace->p, N ); - for ( i = 0; i < j; i++ ) - { - Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N ); - } - //Backward_Subs( U, workspace->p, workspace->p ); - //Forward_Subs( L, workspace->p, workspace->p ); - Vector_Add( x, 1., workspace->p, N ); - - /* stopping condition */ - if ( fabs(workspace->g[j]) / bnorm <= tol ) - { - break; - } - } - - // Sparse_MatVec( H, x, workspace->b_prm ); - // for( i = 0; i < N; ++i ) - // workspace->b_prm[i] *= workspace->Hdia_inv[i]; - // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", - // workspace->b_prc[i], workspace->b_prm[i], x[i] );*/ - - // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n", - // itr, j, fabs( workspace->g[j] ) / bnorm ); - // data->timing.matvec += itr * RESTART + j; - - free( Dinv_U ); - free( Dinv_L ); - - if ( itr >= MAX_ITR ) - { - fprintf( stderr, "GMRES convergence failed\n" ); - // return -1; - return itr * (RESTART + 1) + j + 1; - } - - return itr * (RESTART + 1) + j + 1; -} - - +/* Preconditioned Conjugate Gradient */ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout ) { @@ -1185,8 +1086,8 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, //Print_Soln( workspace, x, q, b, N ); //fprintf( stderr, "res: %.15e\n", r_norm ); - Forward_Subs( L, workspace->r, workspace->d ); - Backward_Subs( U, workspace->d, workspace->p ); + tri_solve( L, workspace->r, workspace->d, LOWER ); + tri_solve( U, workspace->d, workspace->p, UPPER ); sig_new = Dot( workspace->r, workspace->p, N ); sig0 = sig_new; @@ -1204,8 +1105,8 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, r_norm = Norm(workspace->r, N); //fprintf( stderr, "res: %.15e\n", r_norm ); - Forward_Subs( L, workspace->r, workspace->d ); - Backward_Subs( U, workspace->d, workspace->d ); + tri_solve( L, workspace->r, workspace->d, LOWER ); + tri_solve( U, workspace->d, workspace->d, UPPER ); sig_old = sig_new; sig_new = Dot( workspace->r, workspace->d, N ); beta = sig_new / sig_old; @@ -1223,6 +1124,7 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, } +/* Conjugate Gradient */ int CG( static_storage *workspace, sparse_matrix *H, real *b, real tol, real *x, FILE *fout ) { @@ -1237,7 +1139,9 @@ int CG( static_storage *workspace, sparse_matrix *H, Sparse_MatVec( H, x, workspace->q ); Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); for ( j = 0; j < N; ++j ) + { workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } sig_new = Dot( workspace->r, workspace->d, N ); sig0 = sig_new; @@ -1280,7 +1184,6 @@ int CG( static_storage *workspace, sparse_matrix *H, } - /* Steepest Descent */ int SDM( static_storage *workspace, sparse_matrix *H, real *b, real tol, real *x, FILE *fout ) @@ -1296,7 +1199,9 @@ int SDM( static_storage *workspace, sparse_matrix *H, Sparse_MatVec( H, x, workspace->q ); Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); for ( j = 0; j < N; ++j ) + { workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } sig = Dot( workspace->r, workspace->d, N ); sig0 = sig; @@ -1312,7 +1217,9 @@ int SDM( static_storage *workspace, sparse_matrix *H, Vector_Add( x, alpha, workspace->d, N ); Vector_Add( workspace->r, -alpha, workspace->q, N ); for ( j = 0; j < N; ++j ) + { workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n", // Norm(workspace->d,N), Norm(workspace->q,N), tmp ); @@ -1330,6 +1237,15 @@ int SDM( static_storage *workspace, sparse_matrix *H, } +/* Estimate the stability of a 2-side preconditioning scheme + * using the factorization A \approx LU. Specifically, estimate the 1-norm of A^{-1} + * using the 1-norm of (LU)^{-1}e, with e = [1 1 ... 1]^T through 2 triangular solves: + * 1) Ly = e + * 2) Ux = y where y = Ux + * That is, we seek to solve e = LUx for unknown x + * + * Reference: Incomplete LU Preconditioning with the Multilevel Fast Multipole Algorithm + * for Electromagnetic Scattering, SIAM J. Sci. Computing, 2007 */ real condest( const sparse_matrix * const L, const sparse_matrix * const U ) { unsigned int i, N; @@ -1348,9 +1264,10 @@ real condest( const sparse_matrix * const L, const sparse_matrix * const U ) e[i] = 1.; } - Forward_Subs( L, e, e ); - Backward_Subs( U, e, e ); + tri_solve( L, e, e, LOWER ); + tri_solve( U, e, e, UPPER ); + /* compute 1-norm of vector e */ c = fabs(e[0]); for ( i = 1; i < N; ++i) { diff --git a/sPuReMD/src/GMRES.h b/sPuReMD/src/GMRES.h index 2acf61f2c2df46a175b4bccf31d229c147611755..d6d1c4844a9ea2870edffbdf1d65a5da358b362f 100644 --- a/sPuReMD/src/GMRES.h +++ b/sPuReMD/src/GMRES.h @@ -24,20 +24,13 @@ #include "mytypes.h" -int GMRES( static_storage*, sparse_matrix*, - real*, real, real*, FILE*, real*, real* ); +int GMRES( const static_storage * const, const control_params * const, + const sparse_matrix * const, const real * const, real, real * const, + const FILE * const, real * const, real * const ); -int GMRES_HouseHolder( static_storage*, sparse_matrix*, - real*, real, real*, FILE* ); - -int PGMRES( static_storage*, sparse_matrix*, real*, real, - sparse_matrix*, sparse_matrix*, real*, FILE*, real*, real* ); - -int PGMRES_Jacobi( static_storage*, sparse_matrix*, real*, real, - sparse_matrix*, sparse_matrix*, real*, unsigned int, FILE*, real*, real* ); - -int PCG( static_storage*, sparse_matrix*, real*, real, - sparse_matrix*, sparse_matrix*, real*, FILE* ); +int GMRES_HouseHolder( const static_storage * const, const control_params * const, + const sparse_matrix * const, const real * const, real, real * const, + const FILE * const, real * const, real * const ); int CG( static_storage*, sparse_matrix*, real*, real, real*, FILE* ); diff --git a/sPuReMD/src/QEq.c b/sPuReMD/src/QEq.c index 0355e68b0422af5a417b84b4958f8ff724f030d5..eca3d5516afba1184a61091c98798cadba6b4a57 100644 --- a/sPuReMD/src/QEq.c +++ b/sPuReMD/src/QEq.c @@ -506,8 +506,8 @@ static real SuperLU_Factorize( const sparse_matrix * const A, #endif -/* Diagonal (Jacobi) preconditioner */ -static real diagonal_pre( const reax_system * const system, real * const Hdia_inv ) +/* Diagonal (Jacobi) preconditioner computation */ +static real diag_pre_comp( const reax_system * const system, real * const Hdia_inv ) { unsigned int i; struct timeval start, stop; @@ -1093,9 +1093,13 @@ static void Init_MatVec( const reax_system * const system, const control_params case DIAG_PC: if ( workspace->Hdia_inv == NULL ) { - workspace->Hdia_inv = (real *) calloc( system->N, sizeof( real ) ); + if ( ( workspace->Hdia_inv = (real *) calloc( system->N, sizeof( real ) ) ) == NULL ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } } - data->timing.pre_comp += diagonal_pre( system, workspace->Hdia_inv ); + data->timing.pre_comp += diag_pre_comp( system, workspace->Hdia_inv ); break; case ICHOLT_PC: Calculate_Droptol( workspace->H, workspace->droptol, control->pre_comp_droptol ); @@ -1121,7 +1125,7 @@ static void Init_MatVec( const reax_system * const system, const control_params data->timing.pre_comp += ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U ); // data->timing.pre_comp += ICHOLT( workspace->H_sp, workspace->droptol, workspace->L, workspace->U ); break; - case ICHOL_PAR_PC: + case ILU_PAR_PC: if ( workspace->L == NULL ) { /* factors have sparsity pattern as H */ @@ -1290,52 +1294,40 @@ void QEq( reax_system * const system, control_params * const control, simulation switch ( control->qeq_solver_type ) { case GMRES_S: - matvecs = GMRES( workspace, workspace->H, - workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log, - &(data->timing.pre_app), &(data->timing.spmv) ); - matvecs += GMRES( workspace, workspace->H, - workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log, - &(data->timing.pre_app), &(data->timing.spmv) ); + matvecs = GMRES( workspace, control, workspace->H, workspace->b_s, control->qeq_solver_q_err, + workspace->s[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); + matvecs += GMRES( workspace, control, workspace->H, workspace->b_t, control->qeq_solver_q_err, + workspace->t[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); break; case GMRES_H_S: - matvecs = GMRES_HouseHolder( workspace, workspace->H, - workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log ); - matvecs += GMRES_HouseHolder( workspace, workspace->H, - workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log ); - break; - case PGMRES_S: - matvecs = PGMRES( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->s[0], out_control->log, - &(data->timing.pre_app), &(data->timing.spmv) ); - matvecs += PGMRES( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->t[0], out_control->log, - &(data->timing.pre_app), &(data->timing.spmv) ); - break; - case PGMRES_J_S: - matvecs = PGMRES_Jacobi( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->s[0], control->pre_app_jacobi_iters, - out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); - matvecs += PGMRES_Jacobi( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->t[0], control->pre_app_jacobi_iters, - out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); + matvecs = GMRES_HouseHolder( workspace, control, workspace->H, workspace->b_s, control->qeq_solver_q_err, + workspace->s[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); + matvecs += GMRES_HouseHolder( workspace, control, workspace->H, workspace->b_t, control->qeq_solver_q_err, + workspace->t[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); break; case CG_S: - matvecs = CG( workspace, workspace->H, - workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log ) + 1; - matvecs += CG( workspace, workspace->H, - workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log ) + 1; - break; - case PCG_S: - matvecs = PCG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->s[0], out_control->log ) + 1; - matvecs += PCG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->t[0], out_control->log ) + 1; + matvecs = CG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, + workspace->s[0], out_control->log ) + 1; + matvecs += CG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, + workspace->t[0], out_control->log ) + 1; +// matvecs = CG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, +// workspace->L, workspace->U, workspace->s[0], control->pre_app_type, +// control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1; +// matvecs += CG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, +// workspace->L, workspace->U, workspace->t[0], control->pre_app_type, +// control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1; break; case SDM_S: - matvecs = SDM( workspace, workspace->H, - workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log ) + 1; - matvecs += SDM( workspace, workspace->H, - workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log ) + 1; + matvecs = SDM( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, + workspace->s[0], out_control->log ) + 1; + matvecs += SDM( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, + workspace->t[0], out_control->log ) + 1; +// matvecs = SDM( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, +// workspace->L, workspace->U, workspace->s[0], control->pre_app_type, +// control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1; +// matvecs += SDM( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, +// workspace->L, workspace->U, workspace->t[0], control->pre_app_type, +// control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1; break; default: fprintf( stderr, "Unrecognized QEq solver selection. Terminating...\n" ); diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c index c6bb58c356a1a6d2b846540b612a986ba2cced4d..d70f830979c6be19a65ade21da2d263150072ecd 100644 --- a/sPuReMD/src/control.c +++ b/sPuReMD/src/control.c @@ -69,12 +69,13 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, control->tabulate = 0; - control->qeq_solver_type = PGMRES_S; + control->qeq_solver_type = GMRES_S; control->qeq_solver_q_err = 0.000001; control->pre_comp_type = ICHOLT_PC; control->pre_comp_sweeps = ICHOLT_PC; control->pre_comp_refactor = 100; control->pre_comp_droptol = 0.01; + control->pre_app_type = TRI_SOLVE_PA; control->pre_app_jacobi_iters = 10; control->T_init = 0.; @@ -266,6 +267,11 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control, ival = atoi( tmp[1] ); control->pre_comp_sweeps = ival; } + else if ( strcmp(tmp[0], "pre_app_type") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_app_type = ival; + } else if ( strcmp(tmp[0], "pre_app_jacobi_iters") == 0 ) { val = atof( tmp[1] ); diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index 7b1407721b50cb2166c44a3cc7b45ab042b750e4..7dd51a7b949005993c94288f548f265d2eef2162 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -190,13 +190,17 @@ enum geo_formats enum solver { - GMRES_S = 0, GMRES_H_S = 1, PGMRES_S = 2, - PGMRES_J_S = 3, CG_S = 4, PCG_S = 5, SDM_S = 6, + GMRES_S = 0, GMRES_H_S = 1, CG_S = 2, SDM_S = 3, }; enum pre_comp { - DIAG_PC = 0, ICHOLT_PC = 1, ICHOL_PAR_PC = 2, ILU_SUPERLU_MT_PC = 3, + DIAG_PC = 0, ICHOLT_PC = 1, ILU_PAR_PC = 2, ILU_SUPERLU_MT_PC = 3, +}; + +enum pre_app +{ + NONE_PA = 0, TRI_SOLVE_PA = 1, TRI_SOLVE_LEVEL_SCHED_PA = 2, JACOBI_ITER_PA = 3, }; @@ -510,6 +514,7 @@ typedef struct unsigned int pre_comp_refactor; real pre_comp_droptol; unsigned int pre_comp_sweeps; + unsigned int pre_app_type; unsigned int pre_app_jacobi_iters; int molec_anal; diff --git a/tools/run_sim.py b/tools/run_sim.py index 39342fdb03a3f71591453199b561fbc9ca9ba7c8..bcea5dbadefcf0484b189109b438c6cef4df679b 100644 --- a/tools/run_sim.py +++ b/tools/run_sim.py @@ -1,5 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 +import argparse from fileinput import input from itertools import product from re import sub @@ -12,7 +13,7 @@ from time import time class TestCase(): def __init__(self, geo_file, ffield_file, control_file, params={}, result_header_fmt='', - result_header='', result_body_fmt='', result_file='results.txt'): + result_header='', result_body_fmt='', result_file='results.txt', geo_format='1'): self.__geo_file = geo_file self.__ffield_file = ffield_file self.__control_file = control_file @@ -47,33 +48,35 @@ class TestCase(): r'(?P<key>geo_format\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ } + self.__params['geo_format'] = geo_format + def _setup(self, param, temp_file): fp = open(self.__control_file, 'r') - lines = fp.read() - fp.close() + lines = fp.read() + fp.close() for k in self.__control_res.keys(): try: lines = self.__control_res[k](lines, param[k]) except KeyError: pass - fp_temp = open(temp_file, 'w', 0) - fp_temp.write(lines) - fp_temp.close() + fp_temp = open(temp_file, 'w') + fp_temp.write(lines) + fp_temp.close() def run(self, bin_file='sPuReMD/bin/spuremd'): base_dir = getcwd() - bin_path = path.join(base_dir, bin_file) + bin_path = path.join(base_dir, bin_file) env = dict(environ) write_header = True if path.exists(self.__result_file): write_header = False - fout = open(self.__result_file, 'a', 0) + fout = open(self.__result_file, 'a') if write_header: fout.write(self.__result_header_fmt.format(*self.__result_header)) temp_dir = mkdtemp() - temp_file = path.join(temp_dir, path.basename(self.__control_file)) + temp_file = path.join(temp_dir, path.basename(self.__control_file)) for p in product(*[self.__params[k] for k in self.__param_names]): param_dict = dict((k, v) for (k, v) in zip(self.__param_names, p)) @@ -95,7 +98,7 @@ class TestCase(): fout.close() remove(temp_file) - rmdir(temp_dir) + rmdir(temp_dir) def _process_result(self, fout, time, param): qeq = 0. @@ -129,69 +132,122 @@ class TestCase(): if __name__ == '__main__': + DATA = [ \ + 'bilayer_56800', 'bilayer_340800', \ + 'dna_19733', \ + 'petn_48256', \ + 'silica_6000', 'silica_72000', 'silica_300000', \ + 'water_6540', 'water_78480', 'water_327000', \ + ] + + parser = argparse.ArgumentParser(description='Run molecular dynamics simulations on specified data sets.') + parser.add_argument('data', nargs='+', + choices=DATA, help='Data sets for which to run simulations.') + + # parse args and take action + args = parser.parse_args() + base_dir = getcwd() control_dir = path.join(base_dir, 'environ') data_dir = path.join(base_dir, 'data/benchmarks') header_fmt_str = '{:20}|{:5}|{:5}|{:5}|{:10}|{:10}|{:10}|{:10}|{:10}|{:10}|{:10}\n' header_str = ['Data Set', 'Steps', 'Q Tol', 'Pre T', 'Pre Comp', - 'Pre App', 'Iters', 'SpMV', 'QEq', 'Threads', 'Time (s)'] + 'Pre App', 'Iters', 'SpMV', 'QEq', 'Threads', 'Time (s)'] body_fmt_str = '{:20} {:5} {:5} {:5} {:10.6f} {:10.6f} {:10.6f} {:10.6f} {:10.6f} {:10} {:10.6f}\n' params = { 'ensemble_type': ['0'], - 'nsteps': ['20', '100', '500', '1000'], + 'nsteps': ['20'], +# 'nsteps': ['20', '100', '500', '1000'], 'qeq_solver_type': ['0'], - 'qeq_solver_q_err': ['1e-6', '1e-10'], + 'qeq_solver_q_err': ['1e-6'], +# 'qeq_solver_q_err': ['1e-6', '1e-10'], # 'qeq_solver_q_err': ['1e-6', '1e-8', '1e-10', '1e-14'], - 'pre_comp_type': ['0'], + 'pre_comp_type': ['2'], # 'pre_comp_type': ['0', '1', '2'], - 'pre_comp_refactor': ['1'], -# 'pre_comp_sweeps': ['3'], -# 'pre_app_type': ['0'], -# 'pre_app_jacobi_iters': ['50'], - 'threads': ['12'], + 'pre_comp_refactor': ['100'], + 'pre_comp_sweeps': ['3'], + 'pre_app_type': ['2'], + 'pre_app_jacobi_iters': ['50'], + 'threads': ['2'], # 'threads': ['1', '2', '4', '12', '24'], - 'geo_format': ['1'], + 'geo_format': [], } - test_cases = [ + test_cases = [] + if 'water_6540' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'water/water_6540.pdb'), path.join(data_dir, 'water/ffield.water'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), -# TestCase(path.join(data_dir, 'water/water_78480.pdb'), -# path.join(data_dir, 'water/ffield.water'), -# path.join(control_dir, 'param.gpu.water'), -# params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), -# TestCase(path.join(data_dir, 'water/water_327000.geo'), -# path.join(data_dir, 'water/ffield.water'), -# path.join(control_dir, 'param.gpu.water'), -# params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + if 'water_78480' in args.data: + test_cases.append( + TestCase(path.join(data_dir, 'water/water_78480.pdb'), + path.join(data_dir, 'water/ffield.water'), + path.join(control_dir, 'param.gpu.water'), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['0'])) + if 'water_327000' in args.data: + test_cases.append( + TestCase(path.join(data_dir, 'water/water_327000.geo'), + path.join(data_dir, 'water/ffield.water'), + path.join(control_dir, 'param.gpu.water'), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['0'])) + if 'bilayer_56800' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'bilayer/bilayer_56800.pdb'), path.join(data_dir, 'bilayer/ffield-bio'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + if 'dna_19733' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'dna/dna_19733.pdb'), path.join(data_dir, 'dna/ffield-dna'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + if 'silica_6000' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'silica/silica_6000.pdb'), path.join(data_dir, 'silica/ffield-bio'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), -# TestCase(path.join(data_dir, 'silica/silica_72000.geo'), -# path.join(data_dir, 'silica/ffield-bio'), -# path.join(control_dir, 'param.gpu.water'), -# params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), -# TestCase(path.join(data_dir, 'silica/silica_300000.geo'), -# path.join(data_dir, 'silica/ffield-bio'), -# path.join(control_dir, 'param.gpu.water'), -# params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + if 'silica_72000' in args.data: + test_cases.append( + TestCase(path.join(data_dir, 'silica/silica_72000.geo'), + path.join(data_dir, 'silica/ffield-bio'), + path.join(control_dir, 'param.gpu.water'), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['0'])) + if 'silica_300000' in args.data: + test_cases.append( + TestCase(path.join(data_dir, 'silica/silica_300000.geo'), + path.join(data_dir, 'silica/ffield-bio'), + path.join(control_dir, 'param.gpu.water'), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['0'])) + if 'petn_48256' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'petn/petn_48256.pdb'), path.join(data_dir, 'petn/ffield.petn'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), - ] + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + for test in test_cases: test.run(bin_file='sPuReMD/bin/spuremd')