Skip to content
Snippets Groups Projects
charges.c 16.4 KiB
Newer Older
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
/*----------------------------------------------------------------------
  PuReMD - Purdue ReaxFF Molecular Dynamics Program
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  Copyright (2010) Purdue University
  Hasan Metin Aktulga, haktulga@cs.purdue.edu
  Joseph Fogarty, jcfogart@mail.usf.edu
  Sagar Pandit, pandit@usf.edu
  Ananth Y Grama, ayg@cs.purdue.edu

  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License as
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  published by the Free Software Foundation; either version 2 of
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  the License, or (at your option) any later version.
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  See the GNU General Public License for more details:
  <http://www.gnu.org/licenses/>.
  ----------------------------------------------------------------------*/

#include "charges.h"

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#include "allocate.h"
#include "basic_comm.h"
#include "io_tools.h"
#include "lin_alg.h"
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#include "tool_box.h"

  #include "cuda_charges.h"
  #include "cuda_lin_alg.h"
  #include "cuda_validation.h"
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
int compare_matrix_entry(const void *v1, const void *v2)
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
}


void Sort_Matrix_Rows( sparse_matrix *A )
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    int i, si, ei;

    for ( i = 0; i < A->n; ++i )
    {
        si = A->start[i];
        ei = A->end[i];
        qsort( &(A->entries[si]), ei - si,
                sizeof(sparse_matrix_entry), compare_matrix_entry );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
}


void Calculate_Droptol( sparse_matrix *A, real *droptol, real dtol )
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    int i, j, k;
    real val;

    /* init droptol to 0 - not necessary for an upper-triangular A */
    for ( i = 0; i < A->n; ++i )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
        droptol[i] = 0;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

    /* calculate sqaure of the norm of each row */
    for ( i = 0; i < A->n; ++i )
    {
        val = A->entries[A->start[i]].val; // diagonal entry
        droptol[i] += val * val;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
        // only within my block
        for ( k = A->start[i] + 1; A->entries[k].j < A->n; ++k )
        {
            j = A->entries[k].j;
            val = A->entries[k].val;

            droptol[i] += val * val;
            droptol[j] += val * val;
        }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

    /* calculate local droptol for each row */
    //fprintf( stderr, "droptol: " );
    for ( i = 0; i < A->n; ++i )
    {
        droptol[i] = SQRT( droptol[i] ) * dtol;
        //fprintf( stderr, "%f\n", droptol[i] );
    }
    //fprintf( stderr, "\n" );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
}


int Estimate_LU_Fill( sparse_matrix *A, real *droptol )
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    int i, j, pj;
    int fillin;
    real val;

    fillin = 0;
    for ( i = 0; i < A->n; ++i )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
        for ( pj = A->start[i] + 1; A->entries[pj].j < A->n; ++pj )
        {
            j = A->entries[pj].j;
            val = A->entries[pj].val;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
                ++fillin;
        }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

    return fillin + A->n;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
void ICHOLT( sparse_matrix *A, real *droptol,
        sparse_matrix *L, sparse_matrix *U )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    sparse_matrix_entry tmp[1000];
    int i, j, pj, k1, k2, tmptop, Utop;
    real val, dval;
    int *Ltop;

    Ltop = (int*) malloc((A->n) * sizeof(int));

    // clear data structures
    Utop = 0;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    tmptop = 0;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    for ( i = 0; i < A->n; ++i )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
        U->start[i] = L->start[i] = L->end[i] = Ltop[i] = 0;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

    for ( i = A->n - 1; i >= 0; --i )
    {
        U->start[i] = Utop;
        tmptop = 0;

        for ( pj = A->end[i] - 1; A->entries[pj].j >= A->n; --pj ); // skip ghosts
        for ( ; pj > A->start[i]; --pj )
        {
            j = A->entries[pj].j;
            val = A->entries[pj].val;
            fprintf( stderr, "i: %d, j: %d  val=%f ", i, j, val );
            //fprintf( stdout, "%d %d %24.16f\n", 6540-i, 6540-j, val );
            //fprintf( stdout, "%d %d %24.16f\n", 6540-j, 6540-i, val );

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
            {
                k1 = tmptop - 1;
                k2 = U->start[j] + 1;
                while ( k1 >= 0 && k2 < U->end[j] )
                {
                    if ( tmp[k1].j < U->entries[k2].j )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
                        k1--;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
                    else if ( tmp[k1].j > U->entries[k2].j )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
                        k2++;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
                    else
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
                        val -= (tmp[k1--].val * U->entries[k2++].val);
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
                }

                // U matrix is upper triangular
                val /= U->entries[U->start[j]].val;
                fprintf( stderr, " newval=%f", val );
                tmp[tmptop].j = j;
                tmp[tmptop].val = val;
                tmptop++;
            }
            fprintf( stderr, "\n" );
        }
        //fprintf( stderr, "i = %d - tmptop = %d\n", i, tmptop );

        // compute the ith diagonal in U
        dval = A->entries[A->start[i]].val;
        //fprintf( stdout, "%d %d %24.16f\n", 6540-i, 6540-i, dval );
        for ( k1 = 0; k1 < tmptop; ++k1 )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
            dval -= SQR(tmp[k1].val);
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
        dval = SQRT(dval);
        // keep the diagonal in any case
        U->entries[Utop].j = i;
        U->entries[Utop].val = dval;
        Utop++;

        fprintf(stderr, "row%d: droptol=%.15f val=%.15f\n", i, droptol[i], dval);
        for ( k1 = tmptop - 1; k1 >= 0; --k1 )
        {
            // apply the dropping rule once again
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
            {
                U->entries[Utop].j = tmp[k1].j;
                U->entries[Utop].val = tmp[k1].val;
                Utop++;
                Ltop[tmp[k1].j]++;
                fprintf( stderr, "%d(%.15f)\n", tmp[k1].j, tmp[k1].val );
            }
        }

        U->end[i] = Utop;
        //fprintf( stderr, "i = %d - Utop = %d\n", i, Utop );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#if defined(DEBUG)
    // print matrix U
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf( stderr, "nnz(U): %d\n", Utop );
    for ( i = 0; i < U->n; ++i )
    {
        fprintf( stderr, "row%d: ", i );
        for ( pj = U->start[i]; pj < U->end[i]; ++pj )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
            fprintf( stderr, "%d ", U->entries[pj].j );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
        fprintf( stderr, "\n" );
    }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    // transpose matrix U into L
    L->start[0] = L->end[0] = 0;
    for ( i = 1; i < L->n; ++i )
    {
        L->start[i] = L->end[i] = L->start[i - 1] + Ltop[i - 1] + 1;
        //fprintf( stderr, "i=%d  L->start[i]=%d\n", i, L->start[i] );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    for ( i = 0; i < U->n; ++i )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
        for ( pj = U->start[i]; pj < U->end[i]; ++pj )
        {
            j = U->entries[pj].j;
            L->entries[L->end[j]].j = i;
            L->entries[L->end[j]].val = U->entries[pj].val;
            L->end[j]++;
        }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#if defined(DEBUG)
    // print matrix L
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf( stderr, "nnz(L): %d\n", L->end[L->n - 1] );
    for ( i = 0; i < L->n; ++i )
    {
        fprintf( stderr, "row%d: ", i );
        for ( pj = L->start[i]; pj < L->end[i]; ++pj )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
            fprintf( stderr, "%d ", L->entries[pj].j );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
        fprintf( stderr, "\n" );
    }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    sfree( Ltop, "Ltop" );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
void Init_MatVec( reax_system *system, simulation_data *data,
        control_params *control,  storage *workspace, mpi_datatypes *mpi_data )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    int i; //, fillin;
    reax_atom *atom;

    /*if( (data->step - data->prev_steps) % control->refactor == 0 ||
        workspace->L == NULL ) {
      //Print_Linear_System( system, control, workspace, data->step );
      Sort_Matrix_Rows( workspace->H );
      fprintf( stderr, "H matrix sorted\n" );
      Calculate_Droptol( workspace->H, workspace->droptol, control->droptol );
      fprintf( stderr, "drop tolerances calculated\n" );
      if( workspace->L == NULL ) {
        fillin = Estimate_LU_Fill( workspace->H, workspace->droptol );

        if( Allocate_Matrix( &(workspace->L), workspace->H->cap, fillin ) == 0 ||
      Allocate_Matrix( &(workspace->U), workspace->H->cap, fillin ) == 0 ) {
    fprintf( stderr, "not enough memory for LU matrices. terminating.\n" );
    MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY );
        }

        workspace->L->n = workspace->H->n;
        workspace->U->n = workspace->H->n;
    #if defined(DEBUG_FOCUS)
        fprintf( stderr, "p%d: n=%d, fillin = %d\n",
           system->my_rank, workspace->L->n, fillin );
        fprintf( stderr, "p%d: allocated memory: L = U = %ldMB\n",
                 system->my_rank,fillin*sizeof(sparse_matrix_entry)/(1024*1024) );
    #endif
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
      ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U );
    #if defined(DEBUG_FOCUS)
      fprintf( stderr, "p%d: icholt finished\n", system->my_rank );
      //sprintf( fname, "%s.L%d.out", control->sim_name, data->step );
      //Print_Sparse_Matrix2( workspace->L, fname );
      //Print_Sparse_Matrix( U );
    #endif
    }*/

    for ( i = 0; i < system->n; ++i )
    {
        atom = &( system->my_atoms[i] );

        /* init pre-conditioner for H and init solution vectors */
        workspace->Hdia_inv[i] = 1. / system->reax_param.sbp[ atom->type ].eta;
        workspace->b_s[i] = -system->reax_param.sbp[ atom->type ].chi;
        workspace->b_t[i] = -1.0;
        workspace->b[i][0] = -system->reax_param.sbp[ atom->type ].chi;
        workspace->b[i][1] = -1.0;

        /* linear extrapolation for s and for t */
        // newQEq: no extrapolation!
        //workspace->s[i] = 2 * atom->s[0] - atom->s[1]; //0;
        //workspace->t[i] = 2 * atom->t[0] - atom->t[1]; //0;
        //workspace->x[i][0] = 2 * atom->s[0] - atom->s[1]; //0;
        //workspace->x[i][1] = 2 * atom->t[0] - atom->t[1]; //0;

        /* quadratic extrapolation for s and t */
        // workspace->s[i] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] );
        // workspace->t[i] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] );
        //workspace->x[i][0] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] );
        workspace->x[i][1] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] );

        /* cubic extrapolation for s and t */
        workspace->x[i][0] = 4 * (atom->s[0] + atom->s[2]) - (6 * atom->s[1] + atom->s[3]);
        //workspace->x[i][1] = 4*(atom->t[0]+atom->t[2])-(6*atom->t[1]+atom->t[3]);

        // fprintf(stderr, "i=%d s=%f t=%f\n", i, workspace->s[i], workspace->t[i]);
    }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
void Calculate_Charges( reax_system *system, storage *workspace,
        mpi_datatypes *mpi_data )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
{
    int i, scale;
    real u;//, s_sum, t_sum;
    rvec2 my_sum, all_sum;
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    reax_atom *atom;
    real *q;

    scale = sizeof(real) / sizeof(void);
    q = (real*) malloc(system->N * sizeof(real));

    //s_sum = Parallel_Vector_Acc(workspace->s, system->n, mpi_data->world);
    //t_sum = Parallel_Vector_Acc(workspace->t, system->n, mpi_data->world);
    my_sum[0] = my_sum[1] = 0;
    for ( i = 0; i < system->n; ++i )
    {
        my_sum[0] += workspace->x[i][0];
        my_sum[1] += workspace->x[i][1];
    }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf (stderr, "Host : my_sum[0]: %f and %f \n", my_sum[0], my_sum[1]);
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    MPI_Allreduce( &my_sum, &all_sum, 2, MPI_DOUBLE, MPI_SUM, mpi_data->world );

    u = all_sum[0] / all_sum[1];
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf (stderr, "Host : u: %f \n", u);
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    for ( i = 0; i < system->n; ++i )
    {
        atom = &( system->my_atoms[i] );

        /* compute charge based on s & t */
        //atom->q = workspace->s[i] - u * workspace->t[i];
        q[i] = atom->q = workspace->x[i][0] - u * workspace->x[i][1];

        /* backup s & t */
        atom->s[3] = atom->s[2];
        atom->s[2] = atom->s[1];
        atom->s[1] = atom->s[0];
        //atom->s[0] = workspace->s[i];
        atom->s[0] = workspace->x[i][0];

        atom->t[3] = atom->t[2];
        atom->t[2] = atom->t[1];
        atom->t[1] = atom->t[0];
        //atom->t[0] = workspace->t[i];
        atom->t[0] = workspace->x[i][1];
    }

    Dist( system, mpi_data, q, MPI_DOUBLE, scale, real_packer );
    for ( i = system->n; i < system->N; ++i )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
        system->my_atoms[i].q = q[i];
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

    free(q);
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
void Cuda_Calculate_Charges( reax_system *system, storage *workspace,
                             mpi_datatypes *mpi_data )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    int        i, scale;
    real       u;//, s_sum, t_sum;
    rvec2      my_sum, all_sum;
    reax_atom *atom;
    real *q;
    my_sum [0] = my_sum[1] = 0.0;

    scale = sizeof(real) / sizeof(void);
    q =  (real *) host_scratch;
    memset( q, 0, system->N * sizeof (real));
    cuda_charges_x( system, my_sum );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    //fprintf (stderr, "Device: my_sum[0]: %f and %f \n", my_sum[0], my_sum[1]);

    MPI_Allreduce( &my_sum, &all_sum, 2, MPI_DOUBLE, MPI_SUM, mpi_data->world );

    u = all_sum[0] / all_sum[1];
    //fprintf (stderr, "Device: u: %f \n", u);

    cuda_charges_st( system, workspace, q, u );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

    Dist( system, mpi_data, q, MPI_DOUBLE, scale, real_packer );

    cuda_charges_updateq( system, q );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
}
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
void QEq( reax_system *system, control_params *control, simulation_data *data,
          storage *workspace, output_controls *out_control,
          mpi_datatypes *mpi_data )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    int s_matvecs, t_matvecs;

    Init_MatVec( system, data, control, workspace, mpi_data );


    //if( data->step == 50010 ) {
    //  Print_Linear_System( system, control, workspace, data->step );
    // }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#if defined(DEBUG)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf( stderr, "p%d: initialized qEq\n", system->my_rank );
    //Print_Linear_System( system, control, workspace, data->step );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    //MATRIX CHANGES
    s_matvecs = dual_CG(system, workspace, &workspace->H, workspace->b,
                        control->q_err, workspace->x, mpi_data, out_control->log, data);
    t_matvecs = 0;
    //fprintf (stderr, "Host: First CG complated with iterations: %d \n", s_matvecs);
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    //s_matvecs = CG(system, workspace, workspace->H, workspace->b_s, //newQEq sCG
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    // control->q_err, workspace->s, mpi_data, out_control->log );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    //s_matvecs = PCG( system, workspace, workspace->H, workspace->b_s,
    //   control->q_err, workspace->L, workspace->U, workspace->s,
    //   mpi_data, out_control->log );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#if defined(DEBUG)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf( stderr, "p%d: first CG completed\n", system->my_rank );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    //t_matvecs = CG(system, workspace, workspace->H, workspace->b_t, //newQEq sCG
    // control->q_err, workspace->t, mpi_data, out_control->log );
    //t_matvecs = PCG( system, workspace, workspace->H, workspace->b_t,
    //   control->q_err, workspace->L, workspace->U, workspace->t,
    //   mpi_data, out_control->log );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#if defined(DEBUG)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf( stderr, "p%d: second CG completed\n", system->my_rank );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    Calculate_Charges( system, workspace, mpi_data );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#if defined(DEBUG)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf( stderr, "p%d: computed charges\n", system->my_rank );
    //Print_Charges( system );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif

#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    if ( system->my_rank == MASTER_NODE )
    {
        data->timing.s_matvecs += s_matvecs;
        data->timing.t_matvecs += t_matvecs;
    }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif
}

void Cuda_QEq( reax_system *system, control_params *control, simulation_data
        *data, storage *workspace, output_controls *out_control, mpi_datatypes
        *mpi_data )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    int s_matvecs, t_matvecs;

    Cuda_Init_MatVec( system, workspace );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    //if (data->step > 0) {
    //    compare_rvec2 (workspace->b, dev_workspace->b, system->n, "b");
    //    compare_rvec2 (workspace->x, dev_workspace->x, system->n, "x");
    // compare_array (workspace->b_s, dev_workspace->b_s, system->n, "b_s");
    // compare_array (workspace->b_t, dev_workspace->b_t, system->n, "b_t");
    //}
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

//#ifdef __CUDA_DEBUG__
//  Init_MatVec( system, data, control, workspace, mpi_data );
//#endif

#if defined(DEBUG)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf( stderr, "p%d: initialized qEq\n", system->my_rank );
    //Print_Linear_System( system, control, workspace, data->step );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    //MATRIX CHANGES
    s_matvecs = Cuda_dual_CG(system, workspace, &dev_workspace->H,
            dev_workspace->b, control->q_err, dev_workspace->x, mpi_data,
            out_control->log, data);
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    t_matvecs = 0;
    //fprintf (stderr, "Device: First CG complated with iterations: %d \n", s_matvecs);
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

#if defined(DEBUG)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf( stderr, "p%d: first CG completed\n", system->my_rank );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    Cuda_Calculate_Charges( system, workspace, mpi_data );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#if defined(DEBUG)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    fprintf( stderr, "p%d: computed charges\n", system->my_rank );
    //Print_Charges( system );
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif

#if defined(LOG_PERFORMANCE)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    if ( system->my_rank == MASTER_NODE )
    {
        data->timing.s_matvecs += s_matvecs;
        data->timing.t_matvecs += t_matvecs;
    }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#endif
}