Newer
Older
/*----------------------------------------------------------------------
PuReMD - Purdue ReaxFF Molecular Dynamics Program
Copyright (2010) Purdue University
Hasan Metin Aktulga, haktulga@cs.purdue.edu
Joseph Fogarty, jcfogart@mail.usf.edu
Sagar Pandit, pandit@usf.edu
Ananth Y Grama, ayg@cs.purdue.edu
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
See the GNU General Public License for more details:
<http://www.gnu.org/licenses/>.
----------------------------------------------------------------------*/
Kurt A. O'Hearn
committed
#include "reax_types.h"
Kurt A. O'Hearn
committed
#include "basic_comm.h"
#include "io_tools.h"
#include "tool_box.h"
#include "vector.h"
Kurt A. O'Hearn
committed
#if defined(HAVE_CUDA) && defined(DEBUG)
Kurt A. O'Hearn
committed
#include "cuda/cuda_validation.h"
Kurt A. O'Hearn
committed
#endif
#if defined(CG_PERFORMANCE)
real t_start, t_elapsed, matvec_time, dot_time;
#endif
void dual_Sparse_MatVec( sparse_matrix *A, rvec2 *x, rvec2 *b, int N )
{
int i, j, k, si;
real H;
for ( i = 0; i < N; ++i )
/* perform multiplication */
for ( i = 0; i < A->n; ++i )
{
si = A->start[i];
b[i][0] += A->entries[si].val * x[i][0];
b[i][1] += A->entries[si].val * x[i][1];
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->entries[k].j;
H = A->entries[k].val;
b[i][0] += H * x[j][0];
b[i][1] += H * x[j][1];
// comment out for tryQEq
//if( j < A->n ) {
b[j][0] += H * x[i][0];
b[j][1] += H * x[i][1];
//}
}
int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, rvec2
*b, real tol, rvec2 *x, mpi_datatypes* mpi_data, FILE *fout,
simulation_data *data )
Kurt A. O'Hearn
committed
int i, j, n, N, matvecs;
rvec2 tmp, alpha, beta;
rvec2 my_sum, norm_sqr, b_norm, my_dot;
rvec2 sig_old, sig_new;
MPI_Comm comm;
n = system->n;
N = system->N;
comm = mpi_data->world;
matvecs = 0;
if ( system->my_rank == MASTER_NODE )
{
matvecs = 0;
t_start = matvec_time = dot_time = 0;
t_start = Get_Time( );
}
Kurt A. O'Hearn
committed
#if defined(HAVE_CUDA) && defined(DEBUG)
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
Dist( system, mpi_data, x, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2, rvec2_packer );
Kurt A. O'Hearn
committed
#if defined(HAVE_CUDA) && defined(DEBUG)
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
Coll( system, mpi_data, workspace->q2, RVEC2_PTR_TYPE,
mpi_data->mpi_rvec2, rvec2_unpacker );
if ( system->my_rank == MASTER_NODE )
Update_Timing_Info( &t_start, &matvec_time );
{
/* residual */
workspace->r2[j][0] = b[j][0] - workspace->q2[j][0];
workspace->r2[j][1] = b[j][1] - workspace->q2[j][1];
/* apply diagonal pre-conditioner */
workspace->d2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j];
workspace->d2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j];
}
/* norm of b */
my_sum[0] = my_sum[1] = 0;
for ( j = 0; j < n; ++j )
{
my_sum[0] += SQR( b[j][0] );
my_sum[1] += SQR( b[j][1] );
//fprintf (stderr, "cg: my_sum[ %f, %f] \n", my_sum[0], my_sum[1]);
MPI_Allreduce( &my_sum, &norm_sqr, 2, MPI_DOUBLE, MPI_SUM, comm );
b_norm[0] = SQRT( norm_sqr[0] );
b_norm[1] = SQRT( norm_sqr[1] );
//fprintf( stderr, "bnorm = %f %f\n", b_norm[0], b_norm[1] );
for ( j = 0; j < n; ++j )
{
my_dot[0] += workspace->r2[j][0] * workspace->d2[j][0];
my_dot[1] += workspace->r2[j][1] * workspace->d2[j][1];
//fprintf( stderr, "my_dot: %f %f\n", my_dot[0], my_dot[1] );
MPI_Allreduce( &my_dot, &sig_new, 2, MPI_DOUBLE, MPI_SUM, comm );
//fprintf( stderr, "HOST:sig_new: %f %f\n", sig_new[0], sig_new[1] );
Kurt A. O'Hearn
committed
Dist( system, mpi_data, workspace->d2, RVEC2_PTR_TYPE,
mpi_data->mpi_rvec2, rvec2_packer );
//print_host_rvec2( workspace->d2, N );
dual_Sparse_MatVec( H, workspace->d2, workspace->q2, N );
// tryQEq
Kurt A. O'Hearn
committed
Coll( system, mpi_data, workspace->q2, RVEC2_PTR_TYPE,
mpi_data->mpi_rvec2, rvec2_unpacker );
#if defined(CG_PERFORMANCE)
if ( system->my_rank == MASTER_NODE )
Update_Timing_Info( &t_start, &matvec_time );
#endif
/* dot product: d.q */
my_dot[0] = my_dot[1] = 0;
for ( j = 0; j < n; ++j )
{
my_dot[0] += workspace->d2[j][0] * workspace->q2[j][0];
my_dot[1] += workspace->d2[j][1] * workspace->q2[j][1];
}
MPI_Allreduce( &my_dot, &tmp, 2, MPI_DOUBLE, MPI_SUM, comm );
//fprintf( stderr, "tmp: %f %f\n", tmp[0], tmp[1] );
alpha[0] = sig_new[0] / tmp[0];
alpha[1] = sig_new[1] / tmp[1];
my_dot[0] = my_dot[1] = 0;
{
/* update x */
x[j][0] += alpha[0] * workspace->d2[j][0];
Loading
Loading full blame...