diff --git a/PuReMD/src/allocate.c b/PuReMD/src/allocate.c index 98fb650206fbc64bbf8399d0e0606dacdd6222be..65474f1ea197475f32b86bacf2f58f9ebd96dc33 100644 --- a/PuReMD/src/allocate.c +++ b/PuReMD/src/allocate.c @@ -178,13 +178,15 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) if ( control->cm_solver_type == GMRES_S || control->cm_solver_type == GMRES_H_S - || control->cm_solver_type == PIPECG_S ) + || control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) { sfree( workspace->z, "z" ); } if ( control->cm_solver_type == CG_S - || control->cm_solver_type == PIPECG_S ) + || control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) { sfree( workspace->d, "d" ); sfree( workspace->p, "p" ); @@ -192,7 +194,8 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) sfree( workspace->r, "r" ); } - if ( control->cm_solver_type == PIPECG_S ) + if ( control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) { sfree( workspace->m, "m" ); sfree( workspace->n, "n" ); @@ -327,24 +330,25 @@ int Allocate_Workspace( reax_system *system, control_params *control, for ( i = 0; i < RESTART + 1; ++i ) { - workspace->h[i] = (real*) scalloc( RESTART + 1, sizeof(real), "h[i]", comm ); - workspace->v[i] = (real*) scalloc( total_cap, sizeof(real), "v[i]", comm ); + workspace->h[i] = scalloc( RESTART + 1, sizeof(real), "h[i]", comm ); + workspace->v[i] = scalloc( total_cap, sizeof(real), "v[i]", comm ); } } if ( control->cm_solver_type == GMRES_S - || control->cm_solver_type == GMRES_H_S - || control->cm_solver_type == PIPECG_S ) + || control->cm_solver_type == GMRES_H_S ) { workspace->z = scalloc( RESTART + 1, sizeof(real), "z", comm ); } - else if ( control->cm_solver_type == PIPECG_S ) + else if ( control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) { workspace->z = scalloc( total_cap, sizeof(real), "z", comm ); } if ( control->cm_solver_type == CG_S - || control->cm_solver_type == PIPECG_S ) + || control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) { workspace->d = scalloc( total_cap, sizeof(real), "d", comm ); workspace->p = scalloc( total_cap, sizeof(real), "p", comm ); @@ -352,7 +356,8 @@ int Allocate_Workspace( reax_system *system, control_params *control, workspace->r = scalloc( total_cap, sizeof(real), "r", comm ); } - if ( control->cm_solver_type == PIPECG_S ) + if ( control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) { workspace->m = scalloc( total_cap, sizeof(real), "m", comm ); workspace->n = scalloc( total_cap, sizeof(real), "n", comm ); diff --git a/PuReMD/src/linear_solvers.c b/PuReMD/src/linear_solvers.c index f29eab07b907d38528f75fde169990b99bed61f4..276daed272277b413e4bcb715da1bb2fd2c52cd6 100644 --- a/PuReMD/src/linear_solvers.c +++ b/PuReMD/src/linear_solvers.c @@ -1963,8 +1963,6 @@ int CG( reax_system *system, control_params *control, simulation_data *data, MPI_Reduce( timings, NULL, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); } - MPI_Barrier(mpi_data->world); - if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE ) { fprintf( stderr, "[WARNING] CG convergence failed!\n" ); @@ -1989,11 +1987,9 @@ int PIPECG( reax_system *system, control_params *control, simulation_data *data, real tol, real *x, mpi_datatypes* mpi_data ) { int i, j; - real alpha, beta, b_norm, norm; - real delta; - real gamma_old, gamma_new; + real alpha, beta, delta, gamma_old, gamma_new, norm; real t_start, t_pa, t_spmv, t_vops, t_comm, t_allreduce; - real timings[5], redux[4]; + real timings[5], redux[3]; MPI_Request req; t_pa = 0.0; @@ -2008,33 +2004,46 @@ int PIPECG( reax_system *system, control_params *control, simulation_data *data, t_start = MPI_Wtime( ); #if defined(NEUTRAL_TERRITORY) - Sparse_MatVec( H, x, workspace->q, H->NT ); + Sparse_MatVec( H, x, workspace->u, H->NT ); #else - Sparse_MatVec( H, x, workspace->q, system->N ); + Sparse_MatVec( H, x, workspace->u, system->N ); #endif t_spmv += MPI_Wtime( ) - t_start; if ( H->format == SYM_HALF_MATRIX ) { t_start = MPI_Wtime( ); - Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + Coll( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); t_comm += MPI_Wtime( ) - t_start; } #if defined(NEUTRAL_TERRITORY) else { t_start = MPI_Wtime( ); - Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + Coll( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); t_comm += MPI_Wtime( ) - t_start; } #endif t_start = MPI_Wtime( ); - Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->q, system->n ); + Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->u, system->n ); t_vops += MPI_Wtime( ) - t_start; /* pre-conditioning */ - if ( control->cm_solver_pre_comp_type == SAI_PC ) + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + Vector_Copy( workspace->u, workspace->r, system->n ); + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->u[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) { t_start = MPI_Wtime( ); Dist( system, mpi_data, workspace->r, REAL_PTR_TYPE, MPI_DOUBLE ); @@ -2048,15 +2057,6 @@ int PIPECG( reax_system *system, control_params *control, simulation_data *data, #endif t_pa += MPI_Wtime( ) - t_start; } - else if ( control->cm_solver_pre_comp_type == JACOBI_PC) - { - t_start = MPI_Wtime( ); - for ( j = 0; j < system->n; ++j ) - { - workspace->u[j] = workspace->r[j] * workspace->Hdia_inv[j]; - } - t_pa += MPI_Wtime( ) - t_start; - } t_start = MPI_Wtime( ); Dist( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); @@ -2089,14 +2089,25 @@ int PIPECG( reax_system *system, control_params *control, simulation_data *data, redux[0] = Dot_local( workspace->w, workspace->u, system->n ); redux[1] = Dot_local( workspace->r, workspace->u, system->n ); redux[2] = Dot_local( workspace->u, workspace->u, system->n ); - redux[3] = Dot_local( b, b, system->n ); t_vops += MPI_Wtime( ) - t_start; - t_start = MPI_Wtime( ); - MPI_Iallreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE, MPI_SUM, mpi_data->world, &req ); + MPI_Iallreduce( MPI_IN_PLACE, redux, 3, MPI_DOUBLE, MPI_SUM, mpi_data->world, &req ); /* pre-conditioning */ - if ( control->cm_solver_pre_comp_type == SAI_PC ) + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + Vector_Copy( workspace->m, workspace->w, system->n ); + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) { t_start = MPI_Wtime( ); Dist( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); @@ -2110,15 +2121,6 @@ int PIPECG( reax_system *system, control_params *control, simulation_data *data, #endif t_pa += MPI_Wtime( ) - t_start; } - else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) - { - t_start = MPI_Wtime( ); - for ( j = 0; j < system->n; ++j ) - { - workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j]; - } - t_pa += MPI_Wtime( ) - t_start; - } t_start = MPI_Wtime( ); Dist( system, mpi_data, workspace->m, REAL_PTR_TYPE, MPI_DOUBLE ); @@ -2147,14 +2149,14 @@ int PIPECG( reax_system *system, control_params *control, simulation_data *data, } #endif + t_start = MPI_Wtime( ); MPI_Wait( &req, MPI_STATUS_IGNORE ); t_allreduce += MPI_Wtime( ) - t_start; delta = redux[0]; gamma_new = redux[1]; norm = sqrt( redux[2] ); - b_norm = sqrt( redux[3] ); - for ( i = 0; i < control->cm_solver_max_iters && norm / b_norm > tol; ++i ) + for ( i = 0; i < control->cm_solver_max_iters && norm > tol; ++i ) { if ( i > 0 ) { @@ -2168,24 +2170,36 @@ int PIPECG( reax_system *system, control_params *control, simulation_data *data, } t_start = MPI_Wtime( ); - Vector_Sum( workspace->u , 1.0, workspace->u, -alpha, workspace->q, system->n ); - Vector_Sum( workspace->w , 1.0, workspace->w, -alpha, workspace->z, system->n ); - Vector_Sum( workspace->r , 1.0, workspace->r, -alpha, workspace->d, system->n ); - Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n ); - Vector_Sum( workspace->z , 1.0, workspace->n, beta, workspace->z, system->n ); - Vector_Sum( workspace->q , 1.0, workspace->m, beta, workspace->q, system->n ); - Vector_Sum( workspace->p , 1.0, workspace->u, beta, workspace->p, system->n ); - Vector_Sum( workspace->d , 1.0, workspace->w, beta, workspace->d, system->n ); + Vector_Sum( workspace->z, 1.0, workspace->n, beta, workspace->z, system->n ); + Vector_Sum( workspace->q, 1.0, workspace->m, beta, workspace->q, system->n ); + Vector_Sum( workspace->p, 1.0, workspace->u, beta, workspace->p, system->n ); + Vector_Sum( workspace->d, 1.0, workspace->w, beta, workspace->d, system->n ); + Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n ); + Vector_Sum( workspace->u, 1.0, workspace->u, -alpha, workspace->q, system->n ); + Vector_Sum( workspace->w, 1.0, workspace->w, -alpha, workspace->z, system->n ); + Vector_Sum( workspace->r, 1.0, workspace->r, -alpha, workspace->d, system->n ); redux[0] = Dot_local( workspace->w, workspace->u, system->n ); redux[1] = Dot_local( workspace->r, workspace->u, system->n ); redux[2] = Dot_local( workspace->u, workspace->u, system->n ); t_vops += MPI_Wtime( ) - t_start; - t_start = MPI_Wtime( ); MPI_Iallreduce( MPI_IN_PLACE, redux, 3, MPI_DOUBLE, MPI_SUM, mpi_data->world, &req ); /* pre-conditioning */ - if ( control->cm_solver_pre_comp_type == SAI_PC ) + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + Vector_Copy( workspace->m, workspace->w, system->n ); + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) { t_start = MPI_Wtime( ); Dist( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); @@ -2199,15 +2213,6 @@ int PIPECG( reax_system *system, control_params *control, simulation_data *data, #endif t_pa += MPI_Wtime( ) - t_start; } - else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) - { - t_start = MPI_Wtime( ); - for ( j = 0; j < system->n; ++j ) - { - workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j]; - } - t_pa += MPI_Wtime( ) - t_start; - } t_start = MPI_Wtime( ); Dist( system, mpi_data, workspace->m, REAL_PTR_TYPE, MPI_DOUBLE ); @@ -2238,6 +2243,7 @@ int PIPECG( reax_system *system, control_params *control, simulation_data *data, gamma_old = gamma_new; + t_start = MPI_Wtime( ); MPI_Wait( &req, MPI_STATUS_IGNORE ); t_allreduce += MPI_Wtime( ) - t_start; delta = redux[0]; @@ -2286,11 +2292,11 @@ int PIPECR( reax_system *system, control_params *control, simulation_data *data, storage *workspace, sparse_matrix *H, real *b, real tol, real *x, mpi_datatypes* mpi_data ) { - int i, j; - real tmp, alpha, beta, b_norm; - real sig_old, sig_new; + int i, j; + real alpha, beta, delta, gamma_old, gamma_new, norm; real t_start, t_pa, t_spmv, t_vops, t_comm, t_allreduce; - real timings[5]; + real timings[5], redux[3]; + MPI_Request req; t_pa = 0.0; t_spmv = 0.0; @@ -2304,33 +2310,46 @@ int PIPECR( reax_system *system, control_params *control, simulation_data *data, t_start = MPI_Wtime( ); #if defined(NEUTRAL_TERRITORY) - Sparse_MatVec( H, x, workspace->q, H->NT ); + Sparse_MatVec( H, x, workspace->u, H->NT ); #else - Sparse_MatVec( H, x, workspace->q, system->N ); + Sparse_MatVec( H, x, workspace->u, system->N ); #endif t_spmv += MPI_Wtime( ) - t_start; if ( H->format == SYM_HALF_MATRIX ) { t_start = MPI_Wtime( ); - Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + Coll( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); t_comm += MPI_Wtime( ) - t_start; } #if defined(NEUTRAL_TERRITORY) else { t_start = MPI_Wtime( ); - Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + Coll( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); t_comm += MPI_Wtime( ) - t_start; } #endif t_start = MPI_Wtime( ); - Vector_Sum( workspace->r , 1., b, -1., workspace->q, system->n ); + Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->u, system->n ); t_vops += MPI_Wtime( ) - t_start; /* pre-conditioning */ - if ( control->cm_solver_pre_comp_type == SAI_PC ) + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + Vector_Copy( workspace->u, workspace->r, system->n ); + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->u[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) { t_start = MPI_Wtime( ); Dist( system, mpi_data, workspace->r, REAL_PTR_TYPE, MPI_DOUBLE ); @@ -2338,102 +2357,139 @@ int PIPECR( reax_system *system, control_params *control, simulation_data *data, t_start = MPI_Wtime( ); #if defined(NEUTRAL_TERRITORY) - Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->d, H->NT ); + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->u, H->NT ); #else - Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->d, system->n ); + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->u, system->n ); #endif t_pa += MPI_Wtime( ) - t_start; } - else if ( control->cm_solver_pre_comp_type == JACOBI_PC) + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, workspace->u, workspace->w, H->NT ); +#else + Sparse_MatVec( H, workspace->u, workspace->w, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; + + if ( H->format == SYM_HALF_MATRIX ) { t_start = MPI_Wtime( ); - for ( j = 0; j < system->n; ++j ) - { - workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; - } - t_pa += MPI_Wtime( ) - t_start; + Coll( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; } +#endif - t_start = MPI_Wtime( ); - b_norm = Parallel_Norm( b, system->n, mpi_data->world ); - sig_new = Parallel_Dot( workspace->r, workspace->d, system->n, mpi_data->world ); - t_allreduce += MPI_Wtime( ) - t_start; + //TODO: better loop unrolling and termination condition check + norm = tol + 1.0; - for ( i = 0; i < control->cm_solver_max_iters && sqrt(sig_new) / b_norm > tol; ++i ) + for ( i = 0; i < control->cm_solver_max_iters && norm > tol; ++i ) { + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + Vector_Copy( workspace->m, workspace->w, system->n ); + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( workspace->H_app_inv, workspace->w, workspace->m, H->NT ); +#else + Sparse_MatVec( workspace->H_app_inv, workspace->w, workspace->m, system->n ); +#endif + t_pa += MPI_Wtime( ) - t_start; + } + t_start = MPI_Wtime( ); - Dist( system, mpi_data, workspace->d, REAL_PTR_TYPE, MPI_DOUBLE ); + redux[0] = Dot_local( workspace->w, workspace->u, system->n ); + redux[1] = Dot_local( workspace->m, workspace->w, system->n ); + redux[2] = Dot_local( workspace->u, workspace->u, system->n ); + t_vops += MPI_Wtime( ) - t_start; + + MPI_Iallreduce( MPI_IN_PLACE, redux, 3, MPI_DOUBLE, MPI_SUM, mpi_data->world, &req ); + + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->m, REAL_PTR_TYPE, MPI_DOUBLE ); t_comm += MPI_Wtime( ) - t_start; t_start = MPI_Wtime( ); #if defined(NEUTRAL_TERRITORY) - Sparse_MatVec( H, workspace->d, workspace->q, H->NT ); + Sparse_MatVec( H, workspace->m, workspace->n, H->NT ); #else - Sparse_MatVec( H, workspace->d, workspace->q, system->N ); + Sparse_MatVec( H, workspace->m, workspace->n, system->N ); #endif t_spmv += MPI_Wtime( ) - t_start; if ( H->format == SYM_HALF_MATRIX ) { t_start = MPI_Wtime( ); - Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + Coll( system, mpi_data, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE ); t_comm += MPI_Wtime( ) - t_start; } #if defined(NEUTRAL_TERRITORY) else { t_start = MPI_Wtime( ); - Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + Coll( system, mpi_data, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE ); t_comm += MPI_Wtime( ) - t_start; } #endif - t_start = MPI_Wtime( ); - tmp = Parallel_Dot( workspace->d, workspace->q, system->n, mpi_data->world ); - t_allreduce += MPI_Wtime( ) - t_start; - t_start = MPI_Wtime( ); - alpha = sig_new / tmp; - Vector_Add( x, alpha, workspace->d, system->n ); - Vector_Add( workspace->r, -alpha, workspace->q, system->n ); - t_vops += MPI_Wtime( ) - t_start; + MPI_Wait( &req, MPI_STATUS_IGNORE ); + t_allreduce += MPI_Wtime( ) - t_start; + gamma_new = redux[0]; + delta = redux[1]; + norm = sqrt( redux[2] ); - /* pre-conditioning */ - if ( control->cm_solver_pre_comp_type == SAI_PC ) + if ( i > 0 ) { - t_start = MPI_Wtime( ); - Dist( system, mpi_data, workspace->r, REAL_PTR_TYPE, MPI_DOUBLE ); - t_comm += MPI_Wtime( ) - t_start; - - t_start = MPI_Wtime( ); -#if defined(NEUTRAL_TERRITORY) - Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->p, H->NT ); -#else - Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->p, system->n ); -#endif - t_pa += MPI_Wtime( ) - t_start; + beta = gamma_new / gamma_old; + alpha = gamma_new / (delta - beta / alpha * gamma_new); } - - else if ( control->cm_solver_pre_comp_type == JACOBI_PC) + else { - t_start = MPI_Wtime( ); - for ( j = 0; j < system->n; ++j ) - { - workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; - } - t_pa += MPI_Wtime( ) - t_start; + beta = 0.0; + alpha = gamma_new / delta; } t_start = MPI_Wtime( ); - sig_old = sig_new; - sig_new = Parallel_Dot( workspace->r, workspace->p, system->n, mpi_data->world ); - t_allreduce += MPI_Wtime() - t_start; - - t_start = MPI_Wtime( ); - beta = sig_new / sig_old; - Vector_Sum( workspace->d, 1., workspace->p, beta, workspace->d, system->n ); + Vector_Sum( workspace->z, 1.0, workspace->n, beta, workspace->z, system->n ); + Vector_Sum( workspace->q, 1.0, workspace->m, beta, workspace->q, system->n ); + Vector_Sum( workspace->p, 1.0, workspace->u, beta, workspace->p, system->n ); + Vector_Sum( workspace->d, 1.0, workspace->w, beta, workspace->d, system->n ); + Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n ); + Vector_Sum( workspace->u, 1.0, workspace->u, -alpha, workspace->q, system->n ); + Vector_Sum( workspace->w, 1.0, workspace->w, -alpha, workspace->z, system->n ); + Vector_Sum( workspace->r, 1.0, workspace->r, -alpha, workspace->d, system->n ); t_vops += MPI_Wtime( ) - t_start; + + gamma_old = gamma_new; } timings[0] = t_pa; @@ -2457,11 +2513,9 @@ int PIPECR( reax_system *system, control_params *control, simulation_data *data, MPI_Reduce( timings, NULL, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); } - MPI_Barrier(mpi_data->world); - if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE ) { - fprintf( stderr, "[WARNING] CG convergence failed!\n" ); + fprintf( stderr, "[WARNING] PIPECR convergence failed!\n" ); return i; } diff --git a/PuReMD/src/qEq.c b/PuReMD/src/qEq.c index b936c46ea3f8acd6c57cfe21a1a7980e9cf56752..4599b9868482c56dbd79e719f4c946e4dba4604d 100644 --- a/PuReMD/src/qEq.c +++ b/PuReMD/src/qEq.c @@ -502,6 +502,34 @@ void QEq( reax_system *system, control_params *control, simulation_data *data, } break; + case PIPECR_S: + for ( j = 0; j < system->n; ++j ) + { + workspace->s[j] = workspace->x[j][0]; + } + + iters = PIPECR( system, control, data, workspace, workspace->H, workspace->b_s, + control->cm_solver_q_err, workspace->s, mpi_data ); + + for ( j = 0; j < system->n; ++j ) + { + workspace->x[j][0] = workspace->s[j]; + } + + for ( j = 0; j < system->n; ++j ) + { + workspace->t[j] = workspace->x[j][1]; + } + + iters += PIPECR( system, control, data, workspace, workspace->H, workspace->b_t, + control->cm_solver_q_err, workspace->t, mpi_data ); + + for ( j = 0; j < system->n; ++j ) + { + workspace->x[j][1] = workspace->t[j]; + } + break; + case GMRES_S: case GMRES_H_S: case SDM_S: diff --git a/PuReMD/src/reax_types.h b/PuReMD/src/reax_types.h index 244fdc0f9a6d3fc21d8fb8c8a29f2bc1c0675d46..8a94c0c995c8c77ff15cc907cbed90f67b57816f 100644 --- a/PuReMD/src/reax_types.h +++ b/PuReMD/src/reax_types.h @@ -88,6 +88,7 @@ enum solver SDM_S = 3, BiCGStab_S = 4, PIPECG_S = 5, + PIPECR_S = 6, }; /* preconditioner computation type for charge method linear solver */ @@ -199,8 +200,8 @@ typedef struct typedef struct { - MPI_Comm world; - MPI_Comm comm_mesh3D; + MPI_Comm world; + MPI_Comm comm_mesh3D; MPI_Datatype sys_info; MPI_Datatype mpi_atom_type; diff --git a/PuReMD/src/vector.c b/PuReMD/src/vector.c index 78ed514f4e00971dfaea8c6f4e61dc557fe576b2..ee4cf8ee96595150683526f34c26f1b1ed01251f 100644 --- a/PuReMD/src/vector.c +++ b/PuReMD/src/vector.c @@ -27,57 +27,70 @@ #include "reax_vector.h" #endif + int Vector_isZero( real* v, int k ) { - for ( --k; k >= 0; --k ) - if ( fabs( v[k] ) > ALMOST_ZERO ) - return 0; + int i, ret; - return 1; + ret = 1; + + for ( i = 0; i < k; ++i ) + { + if ( fabs( v[i] ) > ALMOST_ZERO ) + { + ret = 0; + break; + } + } + + return ret; } void Vector_MakeZero( real *v, int k ) { - for ( --k; k >= 0; --k ) - v[k] = 0; + int i; + + for ( i = 0; i < k; ++i ) + { + v[i] = 0; + } } void Vector_Copy( real* dest, real* v, int k ) { - for ( --k; k >= 0; --k ) - dest[k] = v[k]; -} - + int i; -void Vector_Scale( real* dest, real c, real* v, int k ) -{ - for ( --k; k >= 0; --k ) - dest[k] = c * v[k]; + for ( i = 0; i < k; ++i ) + { + dest[i] = v[i]; + } } -/*void Vector_Sum( real* dest, real c, real* v, real d, real* y, int k ) +void Vector_Scale( real* dest, real c, real* v, int k ) { - for ( --k; k >= 0; --k ) - dest[k] = c * v[k] + d * y[k]; -}*/ - + int i; -/*void Vector_Add( real* dest, real c, real* v, int k ) -{ - for ( --k; k >= 0; --k ) - dest[k] += c * v[k]; -}*/ + for ( i = 0; i < k; ++i ) + { + dest[i] = c * v[i]; + } +} real Dot( real* v1, real* v2, int k ) { - real ret = 0; + int i; + real ret; - for ( --k; k >= 0; --k ) - ret += v1[k] * v2[k]; + ret = 0.0; + + for ( i = 0; i < k; ++i ) + { + ret += v1[i] * v2[i]; + } return ret; } @@ -101,10 +114,15 @@ real Dot_local( real *v1, real *v2, int k ) real Norm( real* v1, int k ) { - real ret = 0; + int i; + real ret; - for ( --k; k >= 0; --k ) - ret += SQR( v1[k] ); + ret = 0.0; + + for ( i = 0; i < k; ++i ) + { + ret += SQR( v1[i] ); + } return sqrt( ret ); } diff --git a/PuReMD/src/vector.h b/PuReMD/src/vector.h index e133d8aef762291a5bf9861c3696bf47fd85c1d4..1b12342307e180eac1ecdfb96d894ce9f6be41bb 100644 --- a/PuReMD/src/vector.h +++ b/PuReMD/src/vector.h @@ -56,6 +56,7 @@ static inline void Vector_Add( real* dest, real c, real* v, int k ) } } + real Dot( real*, real*, int ); real Dot_local( real*, real*, int );