Newer
Older
Kurt A. O'Hearn
committed
/* no comm part2 because d is only local portion */
}
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->d, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( H, workspace->d, workspace->z, H->NT );
#else
Sparse_MatVec_local( H, workspace->d, workspace->z, system->N );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
Kurt A. O'Hearn
committed
H->format, workspace->z, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
redux[0] = Dot_local( workspace->r_hat, workspace->z, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 1, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
tmp = redux[0];
alpha = rho / tmp;
Vector_Sum( workspace->q, 1.0, workspace->r, -1.0 * alpha, workspace->z, system->n );
redux[0] = Dot_local( workspace->q, workspace->q, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 1, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
tmp = redux[0];
/* early convergence check */
if ( tmp < tol )
{
Vector_Add( x, alpha, workspace->d, system->n );
break;
}
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
/* pre-conditioning */
Kurt A. O'Hearn
committed
if ( control->cm_solver_pre_comp_type == NONE_PC )
{
Vector_Copy( workspace->q_hat, workspace->q, system->n );
}
else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
{
for ( j = 0; j < system->n; ++j )
{
workspace->q_hat[j] = workspace->q[j] * workspace->Hdia_inv[j];
}
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
}
else if ( control->cm_solver_pre_comp_type == SAI_PC )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->q, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( &workspace->H_app_inv, workspace->q, workspace->q_hat, H->NT );
#else
Sparse_MatVec_local( &workspace->H_app_inv, workspace->q, workspace->q_hat, system->n );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
/* no comm part2 because q_hat is only local portion */
}
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->q_hat, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( H, workspace->q_hat, workspace->y, H->NT );
#else
Sparse_MatVec_local( H, workspace->q_hat, workspace->y, system->N );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
Kurt A. O'Hearn
committed
H->format, workspace->y, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
redux[0] = Dot_local( workspace->y, workspace->q, system->n );
redux[1] = Dot_local( workspace->y, workspace->y, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
sigma = redux[0];
tmp = redux[1];
omega = sigma / tmp;
Vector_Sum( workspace->g, alpha, workspace->d, omega, workspace->q_hat, system->n );
Vector_Add( x, 1.0, workspace->g, system->n );
Vector_Sum( workspace->r, 1.0, workspace->q, -1.0 * omega, workspace->y, system->n );
redux[0] = Dot_local( workspace->r, workspace->r, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 1, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
r_norm = SQRT( redux[0] );
Kurt A. O'Hearn
committed
if ( omega == 0.0 )
{
break;
}
rho_old = rho;
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
if ( omega == 0.0 && system->my_rank == MASTER_NODE )
{
fprintf( stderr, "[WARNING] BiCGStab numeric breakdown (%d iters)\n", i );
Kurt A. O'Hearn
committed
fprintf( stderr, " [INFO] omega = %e\n", omega );
Kurt A. O'Hearn
committed
}
else if ( rho == 0.0 && system->my_rank == MASTER_NODE )
{
fprintf( stderr, "[WARNING] BiCGStab numeric breakdown (%d iters)\n", i );
Kurt A. O'Hearn
committed
fprintf( stderr, " [INFO] rho = %e\n", rho );
Kurt A. O'Hearn
committed
}
else if ( i >= control->cm_solver_max_iters
&& system->my_rank == MASTER_NODE )
{
fprintf( stderr, "[WARNING] BiCGStab convergence failed (%d iters)\n", i );
Kurt A. O'Hearn
committed
fprintf( stderr, " [INFO] Rel. residual error: %e\n", r_norm / b_norm );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
return i;
}
Kurt A. O'Hearn
committed
/* Dual iteration for the Pipelined Preconditioned Conjugate Gradient Method
* for QEq (2 simaltaneous solves)
*
* References:
* 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm,
* P. Ghysels and W. Vanroose, Parallel Computing, 2014.
* 2) Scalable Non-blocking Preconditioned Conjugate Gradient Methods,
* Paul R. Eller and William Gropp, SC '16 Proceedings of the International Conference
* for High Performance Computing, Networking, Storage and Analysis, 2016.
* */
int dual_PIPECG( reax_system const * const system, control_params const * const control,
simulation_data * const data,
storage * const workspace, sparse_matrix * const H, rvec2 * const b,
real tol, rvec2 * const x, mpi_datatypes * const mpi_data )
{
Kurt A. O'Hearn
committed
int i, j, ret;
Kurt A. O'Hearn
committed
rvec2 alpha, beta, delta, gamma_old, gamma_new, norm, b_norm;
Kurt A. O'Hearn
committed
real redux[8];
Kurt A. O'Hearn
committed
MPI_Request req;
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
real time;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
time = Get_Time( );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
x, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
dual_Sparse_MatVec_local( H, x, workspace->u2, H->NT );
#else
dual_Sparse_MatVec_local( H, x, workspace->u2, system->N );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
Kurt A. O'Hearn
committed
H->format, workspace->u2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
//Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->u, system->n );
for ( j = 0; j < system->n; ++j )
Kurt A. O'Hearn
committed
workspace->r2[j][0] = b[j][0] - workspace->u2[j][0];
workspace->r2[j][1] = b[j][1] - workspace->u2[j][1];
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
/* pre-conditioning */
if ( control->cm_solver_pre_comp_type == NONE_PC )
Kurt A. O'Hearn
committed
//Vector_Copy( workspace->u, workspace->r, system->n );
for ( j = 0; j < system->n ; ++j )
{
workspace->u2[j][0] = workspace->r2[j][0];
workspace->u2[j][1] = workspace->r2[j][1];
}
Kurt A. O'Hearn
committed
else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
Kurt A. O'Hearn
committed
for ( j = 0; j < system->n; ++j )
{
workspace->u2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j];
workspace->u2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j];
}
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
else if ( control->cm_solver_pre_comp_type == SAI_PC )
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->r2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->r2, workspace->u2, H->NT );
#else
dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->r2, workspace->u2, system->n );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
/* no comm part2 because u2 is only local portion */
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->u2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
dual_Sparse_MatVec_local( H, workspace->u2, workspace->w2, H->NT );
#else
dual_Sparse_MatVec_local( H, workspace->u2, workspace->w2, system->N );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
Kurt A. O'Hearn
committed
H->format, workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
//redux[0] = Dot_local( workspace->w, workspace->u, system->n );
//redux[1] = Dot_local( workspace->r, workspace->u, system->n );
//redux[2] = Dot_local( workspace->u, workspace->u, system->n );
//redux[3] = Dot_local( b, b, system->n );
for ( j = 0; j < 8; ++j )
Kurt A. O'Hearn
committed
redux[j] = 0.0;
Kurt A. O'Hearn
committed
for( j = 0; j < system->n; ++j )
Kurt A. O'Hearn
committed
redux[0] += workspace->w2[j][0] * workspace->u2[j][0];
redux[1] += workspace->w2[j][1] * workspace->u2[j][1];
redux[2] += workspace->r2[j][0] * workspace->u2[j][0];
redux[3] += workspace->r2[j][1] * workspace->u2[j][1];
redux[4] += workspace->u2[j][0] * workspace->u2[j][0];
redux[5] += workspace->u2[j][1] * workspace->u2[j][1];
redux[6] += b[j][0] * b[j][0];
redux[7] += b[j][1] * b[j][1];
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 8, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD, &req );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
/* pre-conditioning */
if ( control->cm_solver_pre_comp_type == NONE_PC )
Kurt A. O'Hearn
committed
//Vector_Copy( workspace->m, workspace->w, system->n );
for ( j = 0; j < system->n; ++j )
{
workspace->m2[j][0] = workspace->w2[j][0];
workspace->m2[j][1] = workspace->w2[j][1];
}
Kurt A. O'Hearn
committed
else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
{
for ( j = 0; j < system->n; ++j )
{
workspace->m2[j][0] = workspace->w2[j][0] * workspace->Hdia_inv[j];
workspace->m2[j][1] = workspace->w2[j][1] * workspace->Hdia_inv[j];
}
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
}
else if ( control->cm_solver_pre_comp_type == SAI_PC )
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->w2, workspace->m2, H->NT );
#else
dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->w2, workspace->m2, system->n );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
/* no comm part2 because m2 is only local portion */
}
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->m2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
dual_Sparse_MatVec_local( H, workspace->m2, workspace->n2, H->NT );
#else
dual_Sparse_MatVec_local( H, workspace->m2, workspace->n2, system->N );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
H->format, workspace->n2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
delta[0] = redux[0];
delta[1] = redux[1];
gamma_new[0] = redux[2];
gamma_new[1] = redux[3];
norm[0] = SQRT( redux[4] );
norm[1] = SQRT( redux[5] );
b_norm[0] = SQRT( redux[6] );
b_norm[1] = SQRT( redux[7] );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
for ( i = 0; i < control->cm_solver_max_iters; ++i )
{
if ( norm[0] / b_norm[0] <= tol || norm[1] / b_norm[1] <= tol )
{
break;
}
if ( i > 0 )
Kurt A. O'Hearn
committed
beta[0] = gamma_new[0] / gamma_old[0];
beta[1] = gamma_new[1] / gamma_old[1];
alpha[0] = gamma_new[0] / (delta[0] - beta[0] / alpha[0] * gamma_new[0]);
alpha[1] = gamma_new[1] / (delta[1] - beta[1] / alpha[1] * gamma_new[1]);
}
else
{
beta[0] = 0.0;
beta[1] = 0.0;
alpha[0] = gamma_new[0] / delta[0];
alpha[1] = gamma_new[1] / delta[1];
Kurt A. O'Hearn
committed
//Vector_Sum( workspace->z, 1.0, workspace->n, beta, workspace->z, system->n );
//Vector_Sum( workspace->q, 1.0, workspace->m, beta, workspace->q, system->n );
//Vector_Sum( workspace->p, 1.0, workspace->u, beta, workspace->p, system->n );
//Vector_Sum( workspace->d, 1.0, workspace->w, beta, workspace->d, system->n );
//Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n );
//Vector_Sum( workspace->u, 1.0, workspace->u, -alpha, workspace->q, system->n );
//Vector_Sum( workspace->w, 1.0, workspace->w, -alpha, workspace->z, system->n );
//Vector_Sum( workspace->r, 1.0, workspace->r, -alpha, workspace->d, system->n );
//redux[0] = Dot_local( workspace->w, workspace->u, system->n );
//redux[1] = Dot_local( workspace->r, workspace->u, system->n );
//redux[2] = Dot_local( workspace->u, workspace->u, system->n );
for ( j = 0; j < 6; ++j )
Kurt A. O'Hearn
committed
redux[j] = 0.0;
}
for ( j = 0; j < system->n; ++j )
{
workspace->z2[j][0] = workspace->n2[j][0] + beta[0] * workspace->z2[j][0];
workspace->z2[j][1] = workspace->n2[j][1] + beta[1] * workspace->z2[j][1];
Kurt A. O'Hearn
committed
workspace->q2[j][0] = workspace->m2[j][0] + beta[0] * workspace->q2[j][0];
workspace->q2[j][1] = workspace->m2[j][1] + beta[1] * workspace->q2[j][1];
Kurt A. O'Hearn
committed
workspace->p2[j][0] = workspace->u2[j][0] + beta[0] * workspace->p2[j][0];
workspace->p2[j][1] = workspace->u2[j][1] + beta[1] * workspace->p2[j][1];
Kurt A. O'Hearn
committed
workspace->d2[j][0] = workspace->w2[j][0] + beta[0] * workspace->d2[j][0];
workspace->d2[j][1] = workspace->w2[j][1] + beta[1] * workspace->d2[j][1];
Kurt A. O'Hearn
committed
x[j][0] += alpha[0] * workspace->p2[j][0];
x[j][1] += alpha[1] * workspace->p2[j][1];
Kurt A. O'Hearn
committed
workspace->u2[j][0] -= alpha[0] * workspace->q2[j][0];
workspace->u2[j][1] -= alpha[1] * workspace->q2[j][1];
Kurt A. O'Hearn
committed
workspace->w2[j][0] -= alpha[0] * workspace->z2[j][0];
workspace->w2[j][1] -= alpha[1] * workspace->z2[j][1];
Kurt A. O'Hearn
committed
workspace->r2[j][0] -= alpha[0] * workspace->d2[j][0];
workspace->r2[j][1] -= alpha[1] * workspace->d2[j][1];
redux[0] += workspace->w2[j][0] * workspace->u2[j][0];
redux[1] += workspace->w2[j][1] * workspace->u2[j][1];
redux[2] += workspace->r2[j][0] * workspace->u2[j][0];
redux[3] += workspace->r2[j][1] * workspace->u2[j][1];
redux[4] += workspace->u2[j][0] * workspace->u2[j][0];
redux[5] += workspace->u2[j][1] * workspace->u2[j][1];
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 6, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD, &req );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
/* pre-conditioning */
if ( control->cm_solver_pre_comp_type == NONE_PC )
Kurt A. O'Hearn
committed
//Vector_Copy( workspace->m, workspace->w, system->n );
for ( j = 0; j < system->n; ++j )
{
workspace->m2[j][0] = workspace->w2[j][0];
workspace->m2[j][1] = workspace->w2[j][1];
}
Kurt A. O'Hearn
committed
else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
{
for ( j = 0; j < system->n; ++j )
{
workspace->m2[j][0] = workspace->w2[j][0] * workspace->Hdia_inv[j];
workspace->m2[j][1] = workspace->w2[j][1] * workspace->Hdia_inv[j];
}
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
}
else if ( control->cm_solver_pre_comp_type == SAI_PC )
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->w2, workspace->m2, H->NT );
#else
dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->w2, workspace->m2, system->n );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
/* no comm part2 because m2 is only local portion */
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->m2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
dual_Sparse_MatVec_local( H, workspace->m2, workspace->n2, H->NT );
#else
dual_Sparse_MatVec_local( H, workspace->m2, workspace->n2, system->N );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
Kurt A. O'Hearn
committed
H->format, workspace->n2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
gamma_old[0] = gamma_new[0];
gamma_old[1] = gamma_new[1];
Kurt A. O'Hearn
committed
ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
delta[0] = redux[0];
delta[1] = redux[1];
gamma_new[0] = redux[2];
gamma_new[1] = redux[3];
norm[0] = SQRT( redux[4] );
norm[1] = SQRT( redux[5] );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
/* continue to solve the system that has not converged yet */
if ( norm[0] / b_norm[0] > tol )
Kurt A. O'Hearn
committed
for ( j = 0; j < system->n; ++j )
Kurt A. O'Hearn
committed
workspace->s[j] = workspace->x[j][0];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
i += PIPECG( system, control, data, workspace,
H, workspace->b_s, tol, workspace->s, mpi_data );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
for ( j = 0; j < system->n; ++j )
Kurt A. O'Hearn
committed
workspace->x[j][0] = workspace->s[j];
Kurt A. O'Hearn
committed
else if ( norm[1] / b_norm[1] > tol )
Kurt A. O'Hearn
committed
for ( j = 0; j < system->n; ++j )
Kurt A. O'Hearn
committed
workspace->t[j] = workspace->x[j][1];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
i += PIPECG( system, control, data, workspace,
H, workspace->b_t, tol, workspace->t, mpi_data );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
for ( j = 0; j < system->n; ++j )
Kurt A. O'Hearn
committed
workspace->x[j][1] = workspace->t[j];
Kurt A. O'Hearn
committed
if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
fprintf( stderr, "[WARNING] PIPECG convergence failed!\n" );
return i;
Kurt A. O'Hearn
committed
return i;
Kurt A. O'Hearn
committed
/* Pipelined Preconditioned Conjugate Gradient Method
*
* References:
* 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm,
* P. Ghysels and W. Vanroose, Parallel Computing, 2014.
* 2) Scalable Non-blocking Preconditioned Conjugate Gradient Methods,
* Paul R. Eller and William Gropp, SC '16 Proceedings of the International Conference
* for High Performance Computing, Networking, Storage and Analysis, 2016.
* */
int PIPECG( reax_system const * const system, control_params const * const control,
simulation_data * const data,
storage * const workspace, sparse_matrix * const H, real * const b,
real tol, real * const x, mpi_datatypes * const mpi_data )
Kurt A. O'Hearn
committed
int i, j, ret;
Kurt A. O'Hearn
committed
real alpha, beta, delta, gamma_old, gamma_new, norm, b_norm;
Kurt A. O'Hearn
committed
real redux[4];
Kurt A. O'Hearn
committed
MPI_Request req;
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
real time;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
time = Get_Time( );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
x, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( H, x, workspace->u, H->NT );
#else
Sparse_MatVec_local( H, x, workspace->u, system->N );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
Kurt A. O'Hearn
committed
H->format, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->u, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
/* pre-conditioning */
if ( control->cm_solver_pre_comp_type == NONE_PC )
{
Vector_Copy( workspace->u, workspace->r, system->n );
}
else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
{
for ( j = 0; j < system->n; ++j )
{
workspace->u[j] = workspace->r[j] * workspace->Hdia_inv[j];
}
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
}
else if ( control->cm_solver_pre_comp_type == SAI_PC )
{
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->r, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->u, H->NT );
#else
Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->u, system->n );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
/* no comm part2 because u is only local portion */
}
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->u, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( H, workspace->u, workspace->w, H->NT );
#else
Sparse_MatVec_local( H, workspace->u, workspace->w, system->N );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
Kurt A. O'Hearn
committed
H->format, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
redux[0] = Dot_local( workspace->w, workspace->u, system->n );
redux[1] = Dot_local( workspace->r, workspace->u, system->n );
redux[2] = Dot_local( workspace->u, workspace->u, system->n );
redux[3] = Dot_local( b, b, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD, &req );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
/* pre-conditioning */
if ( control->cm_solver_pre_comp_type == NONE_PC )
{
Vector_Copy( workspace->m, workspace->w, system->n );
}
else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
Kurt A. O'Hearn
committed
for ( j = 0; j < system->n; ++j )
{
workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j];
}
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
else if ( control->cm_solver_pre_comp_type == SAI_PC )
{
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->w, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, H->NT );
#else
Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
/* no comm part2 because m is only local portion */
}
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->m, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( H, workspace->m, workspace->n, H->NT );
#else
Sparse_MatVec_local( H, workspace->m, workspace->n, system->N );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
Kurt A. O'Hearn
committed
H->format, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
delta = redux[0];
gamma_new = redux[1];
norm = SQRT( redux[2] );
b_norm = SQRT( redux[3] );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
for ( i = 0; i < control->cm_solver_max_iters && norm / b_norm > tol; ++i )
{
if ( i > 0 )
{
beta = gamma_new / gamma_old;
alpha = gamma_new / (delta - beta / alpha * gamma_new);
}
else
{
beta = 0.0;
alpha = gamma_new / delta;
}
Vector_Sum( workspace->z, 1.0, workspace->n, beta, workspace->z, system->n );
Vector_Sum( workspace->q, 1.0, workspace->m, beta, workspace->q, system->n );
Vector_Sum( workspace->p, 1.0, workspace->u, beta, workspace->p, system->n );
Vector_Sum( workspace->d, 1.0, workspace->w, beta, workspace->d, system->n );
Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n );
Vector_Sum( workspace->u, 1.0, workspace->u, -alpha, workspace->q, system->n );
Vector_Sum( workspace->w, 1.0, workspace->w, -alpha, workspace->z, system->n );
Vector_Sum( workspace->r, 1.0, workspace->r, -alpha, workspace->d, system->n );
redux[0] = Dot_local( workspace->w, workspace->u, system->n );
redux[1] = Dot_local( workspace->r, workspace->u, system->n );
redux[2] = Dot_local( workspace->u, workspace->u, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 3, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD, &req );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
/* pre-conditioning */
if ( control->cm_solver_pre_comp_type == NONE_PC )
{
Vector_Copy( workspace->m, workspace->w, system->n );
}
else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
{
for ( j = 0; j < system->n; ++j )
{
workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j];
}
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
}
else if ( control->cm_solver_pre_comp_type == SAI_PC )
{
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->w, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, H->NT );
#else
Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, system->n );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
#endif
Kurt A. O'Hearn
committed
/* no comm part2 because m is only local portion */
}
Kurt A. O'Hearn
committed
Sparse_MatVec_Comm_Part1( system, control, mpi_data,
Kurt A. O'Hearn
committed
workspace->m, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( H, workspace->m, workspace->n, H->NT );
#else
Sparse_MatVec_local( H, workspace->m, workspace->n, system->N );
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data,
Kurt A. O'Hearn
committed
H->format, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Kurt A. O'Hearn
committed
gamma_old = gamma_new;
Kurt A. O'Hearn
committed
ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
delta = redux[0];
gamma_new = redux[1];
norm = SQRT( redux[2] );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
fprintf( stderr, "[WARNING] PIPECG convergence failed!\n" );
return i;
}
return i;
}
Kurt A. O'Hearn
committed
/* Pipelined Preconditioned Conjugate Residual Method
*
* References:
* 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm,
* P. Ghysels and W. Vanroose, Parallel Computing, 2014.
* */
int PIPECR( reax_system const * const system, control_params const * const control,
simulation_data * const data,
storage * const workspace, sparse_matrix * const H, real * const b,
real tol, real * const x, mpi_datatypes * const mpi_data )
{
Kurt A. O'Hearn
committed
int i, j, ret;
Kurt A. O'Hearn
committed
real alpha, beta, delta, gamma_old, gamma_new, norm, b_norm;
Kurt A. O'Hearn
committed
real redux[3];
Kurt A. O'Hearn
committed
MPI_Request req;
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
real time;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
time = Get_Time( );
#endif
Kurt A. O'Hearn
committed
redux[0] = Dot_local( b, b, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 1, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD, &req );