Newer
Older
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i )
{
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec( system, control, data, mpi_data, H, workspace->d,
H->NT, workspace->q );
#else
Sparse_MatVec( system, control, data, mpi_data, H, workspace->d,
system->N, workspace->q );
#endif
#if defined(LOG_PERFORMANCE)
time = Get_Time( );
#endif
tmp = Parallel_Dot( workspace->d, workspace->q, system->n, MPI_COMM_WORLD );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
alpha = sig_new / tmp;
Vector_Add( x, alpha, workspace->d, system->n );
Vector_Add( workspace->r, -1.0 * alpha, workspace->q, system->n );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r,
workspace->q, FALSE, LEFT );
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q,
workspace->p, FALSE, RIGHT );
#if defined(LOG_PERFORMANCE)
time = Get_Time( );
#endif
redux[0] = Dot_local( workspace->r, workspace->p, system->n );
redux[1] = Dot_local( workspace->p, workspace->p, system->n );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
sig_old = sig_new;
sig_new = redux[0];
r_norm = SQRT( redux[1] );
beta = sig_new / sig_old;
Vector_Sum( workspace->d, 1.0, workspace->p, beta, workspace->d, system->n );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
}
if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
{
fprintf( stderr, "[WARNING] CG convergence failed (%d iters)\n", i );
fprintf( stderr, " [INFO] Rel. residual error: %e\n", r_norm / b_norm );
return i;
}
return i;
}
/* Bi-conjugate gradient stabalized method with left preconditioning for
* solving nonsymmetric linear systems.
* This function performs dual iteration for QEq (2 simultaneous solves)
*
* system:
* workspace: struct containing storage for workspace for the linear solver
* control: struct containing parameters governing the simulation and numeric methods
* data: struct containing simulation data (e.g., atom info)
* H: sparse, symmetric matrix, lower half stored in CSR format
* b: right-hand side of the linear system
* tol: tolerence compared against the relative residual for determining convergence
* x: inital guess
* mpi_data:
*
* Reference: Netlib (in MATLAB)
* http://www.netlib.org/templates/matlab/bicgstab.m
* */
int dual_BiCGStab( reax_system const * const system, control_params const * const control,
simulation_data * const data,
storage * const workspace, sparse_matrix * const H, rvec2 * const b,
real tol, rvec2 * const x, mpi_datatypes * const mpi_data, int fresh_pre )
{
int i, j, ret;
rvec2 tmp, alpha, beta, omega, sigma, rho, rho_old, r_norm, b_norm;
real time, redux[4];
#if defined(NEUTRAL_TERRITORY)
Dual_Sparse_MatVec( system, control, data, mpi_data, H, x,
H->NT, workspace->d2 );
#else
Dual_Sparse_MatVec( system, control, data, mpi_data, H, x,
system->N, workspace->d2 );
#endif
#if defined(LOG_PERFORMANCE)
time = Get_Time( );
#endif
Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, b, -1.0, -1.0, workspace->d2, system->n );
Dot_local_rvec2( b, b, system->n, &redux[0], &redux[1] );
Dot_local_rvec2( workspace->r2, workspace->r2, system->n, &redux[2], &redux[3] );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
b_norm[0] = SQRT( redux[0] );
b_norm[1] = SQRT( redux[1] );
r_norm[0] = SQRT( redux[2] );
r_norm[1] = SQRT( redux[3] );
if ( b_norm[0] == 0.0 )
{
b_norm[0] = 1.0;
}
if ( b_norm[1] == 0.0 )
{
b_norm[1] = 1.0;
}
Vector_Copy_rvec2( workspace->r_hat2, workspace->r2, system->n );
omega[0] = 1.0;
omega[1] = 1.0;
rho[0] = 1.0;
rho[1] = 1.0;
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
for ( i = 0; i < control->cm_solver_max_iters; ++i )
{
if ( r_norm[0] / b_norm[0] <= tol || r_norm[1] / b_norm[1] <= tol )
{
break;
}
Dot_local_rvec2( workspace->r_hat2, workspace->r2, system->n,
&redux[0], &redux[1] );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
rho[0] = redux[0];
rho[1] = redux[1];
if ( rho[0] == 0.0 || rho[1] == 0.0 )
{
break;
}
if ( i > 0 )
{
beta[0] = (rho[0] / rho_old[0]) * (alpha[0] / omega[0]);
beta[1] = (rho[1] / rho_old[1]) * (alpha[1] / omega[1]);
Vector_Sum_rvec2( workspace->q2, 1.0, 1.0, workspace->p2,
-1.0 * omega[0], -1.0 * omega[1], workspace->z2, system->n );
Vector_Sum_rvec2( workspace->p2, 1.0, 1.0, workspace->r2,
beta[0], beta[1], workspace->q2, system->n );
}
else
{
Vector_Copy_rvec2( workspace->p2, workspace->r2, system->n );
}
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->p2,
workspace->y2, i == 0 ? fresh_pre : FALSE, LEFT );
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->y2,
workspace->d2, i == 0 ? fresh_pre : FALSE, RIGHT );
#if defined(NEUTRAL_TERRITORY)
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->d2,
H->NT, workspace->z2 );
#else
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->d2,
system->N, workspace->z2 );
#endif
#if defined(LOG_PERFORMANCE)
time = Get_Time( );
#endif
Dot_local_rvec2( workspace->r_hat2, workspace->z2, system->n,
&redux[0], &redux[1] );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
tmp[0] = redux[0];
tmp[1] = redux[1];
alpha[0] = rho[0] / tmp[0];
alpha[1] = rho[1] / tmp[1];
Vector_Sum_rvec2( workspace->q2, 1.0, 1.0, workspace->r2,
-1.0 * alpha[0], -1.0 * alpha[1], workspace->z2, system->n );
Dot_local_rvec2( workspace->q2, workspace->q2, system->n,
&redux[0], &redux[1] );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
tmp[0] = redux[0];
tmp[1] = redux[1];
/* early convergence check */
if ( tmp[0] < tol || tmp[1] < tol )
{
Vector_Add_rvec2( x, alpha[0], alpha[1], workspace->d2, system->n );
break;
}
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q2,
workspace->y2, FALSE, LEFT );
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->y2,
workspace->q_hat2, FALSE, RIGHT );
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->q_hat2,
H->NT, workspace->y2 );
Kurt A. O'Hearn
committed
#else
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->q_hat2,
system->N, workspace->y2 );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Dot_local_rvec2( workspace->y2, workspace->q2, system->n, &redux[0], &redux[1] );
Dot_local_rvec2( workspace->y2, workspace->y2, system->n, &redux[2], &redux[3] );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
sigma[0] = redux[0];
sigma[1] = redux[1];
tmp[0] = redux[2];
tmp[1] = redux[3];
omega[0] = sigma[0] / tmp[0];
omega[1] = sigma[1] / tmp[1];
Vector_Sum_rvec2( workspace->g2, alpha[0], alpha[1], workspace->d2,
omega[0], omega[1], workspace->q_hat2, system->n );
Vector_Add_rvec2( x, 1.0, 1.0, workspace->g2, system->n );
Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, workspace->q2,
-1.0 * omega[0], -1.0 * omega[1], workspace->y2, system->n );
Dot_local_rvec2( workspace->r2, workspace->r2, system->n, &redux[0], &redux[1] );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
r_norm[0] = SQRT( redux[0] );
r_norm[1] = SQRT( redux[1] );
if ( omega[0] == 0.0 || omega[1] == 0.0 )
{
break;
}
rho_old[0] = rho[0];
rho_old[1] = rho[1];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
}
if ( (omega[0] == 0.0 || omega[1] == 0.0) && system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
{
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
fprintf( stderr, "[WARNING] BiCGStab numeric breakdown (%d iters)\n", i );
fprintf( stderr, " [INFO] omega = %e\n", omega );
}
else if ( (rho[0] == 0.0 || rho[1] == 0.0) && system->my_rank == MASTER_NODE )
{
fprintf( stderr, "[WARNING] BiCGStab numeric breakdown (%d iters)\n", i );
fprintf( stderr, " [INFO] rho = %e\n", rho );
}
/* continue to solve the system that has not converged yet */
if ( r_norm[0] / b_norm[0] > tol )
{
Vector_Copy_From_rvec2( workspace->s, workspace->x, 0, system->n );
i += BiCGStab( system, control, data, workspace,
H, workspace->b_s, tol, workspace->s, mpi_data, FALSE );
Vector_Copy_To_rvec2( workspace->x, workspace->s, 0, system->n );
}
else if ( r_norm[1] / b_norm[1] > tol )
{
Vector_Copy_From_rvec2( workspace->t, workspace->x, 1, system->n );
i += BiCGStab( system, control, data, workspace,
H, workspace->b_t, tol, workspace->t, mpi_data, FALSE );
Vector_Copy_To_rvec2( workspace->x, workspace->t, 1, system->n );
}
if ( i >= control->cm_solver_max_iters
&& system->my_rank == MASTER_NODE )
{
fprintf( stderr, "[WARNING] BiCGStab convergence failed (%d iters)\n", i );
fprintf( stderr, " [INFO] Rel. residual error (s solve): %e\n", r_norm[0] / b_norm[0] );
fprintf( stderr, " [INFO] Rel. residual error (t solve): %e\n", r_norm[1] / b_norm[1] );
Kurt A. O'Hearn
committed
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
}
return i;
}
/* Bi-conjugate gradient stabalized method with left preconditioning for
* solving nonsymmetric linear systems
*
* system:
* workspace: struct containing storage for workspace for the linear solver
* control: struct containing parameters governing the simulation and numeric methods
* data: struct containing simulation data (e.g., atom info)
* H: sparse, symmetric matrix, lower half stored in CSR format
* b: right-hand side of the linear system
* tol: tolerence compared against the relative residual for determining convergence
* x: inital guess
* mpi_data:
*
* Reference: Netlib (in MATLAB)
* http://www.netlib.org/templates/matlab/bicgstab.m
* */
int BiCGStab( reax_system const * const system, control_params const * const control,
simulation_data * const data,
storage * const workspace, sparse_matrix * const H, real * const b,
Kurt A. O'Hearn
committed
real tol, real * const x, mpi_datatypes * const mpi_data, int fresh_pre )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, j, ret;
Kurt A. O'Hearn
committed
real tmp, alpha, beta, omega, sigma, rho, rho_old, r_norm, b_norm;
real time, redux[2];
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, x,
H->NT, workspace->d );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, x,
system->N, workspace->d );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
Vector_Sum( workspace->r, 1.0, b, -1.0, workspace->d, system->n );
redux[0] = Dot_local( b, b, system->n );
redux[1] = Dot_local( workspace->r, workspace->r, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
b_norm = SQRT( redux[0] );
r_norm = SQRT( redux[1] );
if ( b_norm == 0.0 )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
b_norm = 1.0;
Kurt A. O'Hearn
committed
}
Vector_Copy( workspace->r_hat, workspace->r, system->n );
omega = 1.0;
rho = 1.0;
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i )
Kurt A. O'Hearn
committed
{
redux[0] = Dot_local( workspace->r_hat, workspace->r, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 1, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
rho = redux[0];
if ( rho == 0.0 )
{
break;
}
if ( i > 0 )
{
beta = (rho / rho_old) * (alpha / omega);
Vector_Sum( workspace->q, 1.0, workspace->p, -1.0 * omega, workspace->z, system->n );
Vector_Sum( workspace->p, 1.0, workspace->r, beta, workspace->q, system->n );
}
else
{
Vector_Copy( workspace->p, workspace->r, system->n );
}
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->p,
workspace->y, i == 0 ? fresh_pre : FALSE, LEFT );
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->y,
workspace->d, i == 0 ? fresh_pre : FALSE, RIGHT );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->d,
H->NT, workspace->z );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->d,
system->N, workspace->z );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
redux[0] = Dot_local( workspace->r_hat, workspace->z, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 1, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
tmp = redux[0];
alpha = rho / tmp;
Vector_Sum( workspace->q, 1.0, workspace->r, -1.0 * alpha, workspace->z, system->n );
redux[0] = Dot_local( workspace->q, workspace->q, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 1, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
tmp = redux[0];
/* early convergence check */
if ( tmp < tol )
{
Vector_Add( x, alpha, workspace->d, system->n );
break;
}
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q,
workspace->y, FALSE, LEFT );
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->y,
workspace->q_hat, FALSE, RIGHT );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->q_hat,
H->NT, workspace->y );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->q_hat,
system->N, workspace->y );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
redux[0] = Dot_local( workspace->y, workspace->q, system->n );
redux[1] = Dot_local( workspace->y, workspace->y, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
sigma = redux[0];
tmp = redux[1];
omega = sigma / tmp;
Vector_Sum( workspace->g, alpha, workspace->d, omega, workspace->q_hat, system->n );
Vector_Add( x, 1.0, workspace->g, system->n );
Vector_Sum( workspace->r, 1.0, workspace->q, -1.0 * omega, workspace->y, system->n );
redux[0] = Dot_local( workspace->r, workspace->r, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, redux, 1, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
r_norm = SQRT( redux[0] );
Kurt A. O'Hearn
committed
if ( omega == 0.0 )
{
break;
}
rho_old = rho;
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
if ( omega == 0.0 && system->my_rank == MASTER_NODE )
{
fprintf( stderr, "[WARNING] BiCGStab numeric breakdown (%d iters)\n", i );
Kurt A. O'Hearn
committed
fprintf( stderr, " [INFO] omega = %e\n", omega );
Kurt A. O'Hearn
committed
}
else if ( rho == 0.0 && system->my_rank == MASTER_NODE )
{
fprintf( stderr, "[WARNING] BiCGStab numeric breakdown (%d iters)\n", i );
Kurt A. O'Hearn
committed
fprintf( stderr, " [INFO] rho = %e\n", rho );
Kurt A. O'Hearn
committed
}
else if ( i >= control->cm_solver_max_iters
&& system->my_rank == MASTER_NODE )
{
fprintf( stderr, "[WARNING] BiCGStab convergence failed (%d iters)\n", i );
Kurt A. O'Hearn
committed
fprintf( stderr, " [INFO] Rel. residual error: %e\n", r_norm / b_norm );
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
return i;
}
Kurt A. O'Hearn
committed
/* Dual iteration for the Pipelined Preconditioned Conjugate Gradient Method
* for QEq (2 simaltaneous solves)
*
* References:
* 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm,
* P. Ghysels and W. Vanroose, Parallel Computing, 2014.
* 2) Scalable Non-blocking Preconditioned Conjugate Gradient Methods,
* Paul R. Eller and William Gropp, SC '16 Proceedings of the International Conference
* for High Performance Computing, Networking, Storage and Analysis, 2016.
* */
int dual_PIPECG( reax_system const * const system, control_params const * const control,
simulation_data * const data,
storage * const workspace, sparse_matrix * const H, rvec2 * const b,
Kurt A. O'Hearn
committed
real tol, rvec2 * const x, mpi_datatypes * const mpi_data, int fresh_pre )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, j, ret;
Kurt A. O'Hearn
committed
rvec2 alpha, beta, delta, gamma_old, gamma_new, r_norm, b_norm;
Kurt A. O'Hearn
committed
real redux[8];
Kurt A. O'Hearn
committed
MPI_Request req;
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
real time;
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, H, x,
H->NT, workspace->u2 );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, H, x,
system->N, workspace->u2 );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, b, -1.0, -1.0, workspace->u2, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r2,
workspace->m2, fresh_pre, LEFT );
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->m2,
workspace->u2, fresh_pre, RIGHT );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->u2,
H->NT, workspace->w2 );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->u2,
system->N, workspace->w2 );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
for ( j = 0; j < 8; ++j )
Kurt A. O'Hearn
committed
redux[j] = 0.0;
Kurt A. O'Hearn
committed
Dot_local_rvec2( workspace->w2, workspace->u2, system->n, &redux[0], &redux[1] );
Dot_local_rvec2( workspace->r2, workspace->u2, system->n, &redux[2], &redux[3] );
Dot_local_rvec2( workspace->u2, workspace->u2, system->n, &redux[4], &redux[5] );
Dot_local_rvec2( b, b, system->n, &redux[6], &redux[7] );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 8, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD, &req );
Check_MPI_Error( ret, __FILE__, __LINE__ );
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w2,
workspace->n2, fresh_pre, LEFT );
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n2,
workspace->m2, fresh_pre, RIGHT );
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
H->NT, workspace->n2 );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
system->N, workspace->n2 );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
delta[0] = redux[0];
delta[1] = redux[1];
gamma_new[0] = redux[2];
gamma_new[1] = redux[3];
Kurt A. O'Hearn
committed
r_norm[0] = SQRT( redux[4] );
r_norm[1] = SQRT( redux[5] );
b_norm[0] = SQRT( redux[6] );
b_norm[1] = SQRT( redux[7] );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
for ( i = 0; i < control->cm_solver_max_iters; ++i )
{
Kurt A. O'Hearn
committed
if ( r_norm[0] / b_norm[0] <= tol || r_norm[1] / b_norm[1] <= tol )
Kurt A. O'Hearn
committed
{
break;
}
if ( i > 0 )
Kurt A. O'Hearn
committed
beta[0] = gamma_new[0] / gamma_old[0];
beta[1] = gamma_new[1] / gamma_old[1];
alpha[0] = gamma_new[0] / (delta[0] - beta[0] / alpha[0] * gamma_new[0]);
alpha[1] = gamma_new[1] / (delta[1] - beta[1] / alpha[1] * gamma_new[1]);
}
else
{
beta[0] = 0.0;
beta[1] = 0.0;
alpha[0] = gamma_new[0] / delta[0];
alpha[1] = gamma_new[1] / delta[1];
Kurt A. O'Hearn
committed
Vector_Sum_rvec2( workspace->z2, 1.0, 1.0, workspace->n2,
beta[0], beta[1], workspace->z2, system->n );
Vector_Sum_rvec2( workspace->q2, 1.0, 1.0, workspace->m2,
beta[0], beta[1], workspace->q2, system->n );
Vector_Sum_rvec2( workspace->p2, 1.0, 1.0, workspace->u2,
beta[0], beta[1], workspace->p2, system->n );
Vector_Sum_rvec2( workspace->d2, 1.0, 1.0, workspace->w2,
beta[0], beta[1], workspace->d2, system->n );
Vector_Sum_rvec2( x, 1.0, 1.0, x,
alpha[0], alpha[1], workspace->p2, system->n );
Vector_Sum_rvec2( workspace->u2, 1.0, 1.0, workspace->u2,
-1.0 * alpha[0], -1.0 * alpha[1], workspace->q2, system->n );
Vector_Sum_rvec2( workspace->w2, 1.0, 1.0, workspace->w2,
-1.0 * alpha[0], -1.0 * alpha[1], workspace->z2, system->n );
Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, workspace->r2,
-1.0 * alpha[0], -1.0 * alpha[1], workspace->d2, system->n );
Kurt A. O'Hearn
committed
for ( j = 0; j < 6; ++j )
Kurt A. O'Hearn
committed
redux[j] = 0.0;
}
Kurt A. O'Hearn
committed
Dot_local_rvec2( workspace->w2, workspace->u2, system->n, &redux[0], &redux[1] );
Dot_local_rvec2( workspace->r2, workspace->u2, system->n, &redux[2], &redux[3] );
Dot_local_rvec2( workspace->u2, workspace->u2, system->n, &redux[4], &redux[5] );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 6, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD, &req );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w2,
workspace->n2, fresh_pre, LEFT );
dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n2,
workspace->m2, fresh_pre, RIGHT );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
H->NT, workspace->n2 );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
system->N, workspace->n2 );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
gamma_old[0] = gamma_new[0];
gamma_old[1] = gamma_new[1];
Kurt A. O'Hearn
committed
ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
delta[0] = redux[0];
delta[1] = redux[1];
gamma_new[0] = redux[2];
gamma_new[1] = redux[3];
Kurt A. O'Hearn
committed
r_norm[0] = SQRT( redux[4] );
r_norm[1] = SQRT( redux[5] );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
/* continue to solve the system that has not converged yet */
Kurt A. O'Hearn
committed
if ( r_norm[0] / b_norm[0] > tol )
Kurt A. O'Hearn
committed
Vector_Copy_From_rvec2( workspace->s, workspace->x, 0, system->n );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
i += PIPECG( system, control, data, workspace,
Kurt A. O'Hearn
committed
H, workspace->b_s, tol, workspace->s, mpi_data, FALSE );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
Vector_Copy_To_rvec2( workspace->x, workspace->s, 0, system->n );
Kurt A. O'Hearn
committed
else if ( r_norm[1] / b_norm[1] > tol )
Kurt A. O'Hearn
committed
Vector_Copy_From_rvec2( workspace->t, workspace->x, 1, system->n );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
i += PIPECG( system, control, data, workspace,
Kurt A. O'Hearn
committed
H, workspace->b_t, tol, workspace->t, mpi_data, FALSE );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
Vector_Copy_To_rvec2( workspace->x, workspace->t, 1, system->n );
Kurt A. O'Hearn
committed
if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
fprintf( stderr, "[WARNING] PIPECG convergence failed!\n" );
return i;
Kurt A. O'Hearn
committed
return i;
Kurt A. O'Hearn
committed
/* Pipelined Preconditioned Conjugate Gradient Method
*
* References:
* 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm,
* P. Ghysels and W. Vanroose, Parallel Computing, 2014.
* 2) Scalable Non-blocking Preconditioned Conjugate Gradient Methods,
* Paul R. Eller and William Gropp, SC '16 Proceedings of the International Conference
* for High Performance Computing, Networking, Storage and Analysis, 2016.
* */
int PIPECG( reax_system const * const system, control_params const * const control,
simulation_data * const data,
storage * const workspace, sparse_matrix * const H, real * const b,
Kurt A. O'Hearn
committed
real tol, real * const x, mpi_datatypes * const mpi_data, int fresh_pre )
Kurt A. O'Hearn
committed
int i, j, ret;
Kurt A. O'Hearn
committed
real alpha, beta, delta, gamma_old, gamma_new, r_norm, b_norm;
Kurt A. O'Hearn
committed
real redux[4];
Kurt A. O'Hearn
committed
MPI_Request req;
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
real time;
#endif
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, x,
H->NT, workspace->u );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, x,
system->N, workspace->u );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
Vector_Sum( workspace->r, 1.0, b, -1.0, workspace->u, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r,
workspace->m, fresh_pre, LEFT );
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->m,
workspace->u, fresh_pre, RIGHT );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->u,
H->NT, workspace->w );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->u,
system->N, workspace->w );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
redux[0] = Dot_local( workspace->w, workspace->u, system->n );
redux[1] = Dot_local( workspace->r, workspace->u, system->n );
redux[2] = Dot_local( workspace->u, workspace->u, system->n );
redux[3] = Dot_local( b, b, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD, &req );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w,
workspace->n, FALSE, LEFT );
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n,
workspace->m, FALSE, RIGHT );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->m,
H->NT, workspace->n );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->m,
system->N, workspace->n );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
delta = redux[0];
gamma_new = redux[1];
Kurt A. O'Hearn
committed
r_norm = SQRT( redux[2] );
b_norm = SQRT( redux[3] );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
#endif
Kurt A. O'Hearn
committed
for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i )
Kurt A. O'Hearn
committed
{
if ( i > 0 )
{
beta = gamma_new / gamma_old;
alpha = gamma_new / (delta - beta / alpha * gamma_new);
}
else
{
beta = 0.0;
alpha = gamma_new / delta;
}
Vector_Sum( workspace->z, 1.0, workspace->n, beta, workspace->z, system->n );
Vector_Sum( workspace->q, 1.0, workspace->m, beta, workspace->q, system->n );
Vector_Sum( workspace->p, 1.0, workspace->u, beta, workspace->p, system->n );
Vector_Sum( workspace->d, 1.0, workspace->w, beta, workspace->d, system->n );
Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n );
Kurt A. O'Hearn
committed
Vector_Sum( workspace->u, 1.0, workspace->u, -1.0 * alpha, workspace->q, system->n );
Vector_Sum( workspace->w, 1.0, workspace->w, -1.0 * alpha, workspace->z, system->n );
Vector_Sum( workspace->r, 1.0, workspace->r, -1.0 * alpha, workspace->d, system->n );
Kurt A. O'Hearn
committed
redux[0] = Dot_local( workspace->w, workspace->u, system->n );
redux[1] = Dot_local( workspace->r, workspace->u, system->n );
redux[2] = Dot_local( workspace->u, workspace->u, system->n );
Kurt A. O'Hearn
committed
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
#endif
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 3, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD, &req );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w,
workspace->n, FALSE, LEFT );
apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n,
workspace->m, FALSE, RIGHT );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->m,
H->NT, workspace->n );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Sparse_MatVec( system, control, data, mpi_data, H, workspace->m,
system->N, workspace->n );
Kurt A. O'Hearn
committed
#endif
#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn
committed
time = Get_Time( );