From c416ab52317ae7d59b487a5af7fdb370d8cbcc82 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnku@cse.msu.edu>
Date: Fri, 25 Mar 2016 21:46:36 -0400
Subject: [PATCH] sPuReMD: add preconditioning timing and analysis.

---
 puremd_rc_1003/sPuReMD/GMRES.c       |  95 +++++++++++++++++----
 puremd_rc_1003/sPuReMD/GMRES.h       |   8 +-
 puremd_rc_1003/sPuReMD/QEq.c         | 121 ++++++++++++++-------------
 puremd_rc_1003/sPuReMD/init_md.c     |   6 +-
 puremd_rc_1003/sPuReMD/mytypes.h     |   2 +
 puremd_rc_1003/sPuReMD/print_utils.c |   8 +-
 6 files changed, 162 insertions(+), 78 deletions(-)

diff --git a/puremd_rc_1003/sPuReMD/GMRES.c b/puremd_rc_1003/sPuReMD/GMRES.c
index aa40cf4a..bbc9fc0e 100644
--- a/puremd_rc_1003/sPuReMD/GMRES.c
+++ b/puremd_rc_1003/sPuReMD/GMRES.c
@@ -160,9 +160,9 @@ static void Jacobi_Iter( const sparse_matrix * const G, const TRIANGULARITY tri,
     unsigned int i, iter = 0;
     real *Dinv_b, *rp, *rp2, *rp3;
 
-    if ( (Dinv_b = (real*) malloc(sizeof(real) * n)) == NULL 
-        || (rp = (real*) malloc(sizeof(real) * n)) == NULL
-        || (rp2 = (real*) malloc(sizeof(real) * n)) == NULL )
+    if ( (Dinv_b = (real*) malloc(sizeof(real) * n)) == NULL
+            || (rp = (real*) malloc(sizeof(real) * n)) == NULL
+            || (rp2 = (real*) malloc(sizeof(real) * n)) == NULL )
     {
         fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
         exit(INSUFFICIENT_SPACE);
@@ -171,7 +171,7 @@ static void Jacobi_Iter( const sparse_matrix * const G, const TRIANGULARITY tri,
     Vector_MakeZero( rp, n );
 
     /* precompute and cache, as invariant in loop below */
-    for( i = 0; i < n; ++i )
+    for ( i = 0; i < n; ++i )
     {
         Dinv_b[i] = Dinv[i] * b[i];
     }
@@ -198,18 +198,23 @@ static void Jacobi_Iter( const sparse_matrix * const G, const TRIANGULARITY tri,
 
 
 /* generalized minimual residual iterative solver for sparse linear systems,
- * no preconditioner */
+ * diagonal preconditioner */
 int GMRES( static_storage *workspace, sparse_matrix *H,
-           real *b, real tol, real *x, FILE *fout )
+           real *b, real tol, real *x, FILE *fout, real *time )
 {
     int i, j, k, itr, N;
     real cc, tmp1, tmp2, temp, bnorm;
+    struct timeval start, stop;
 
     N = H->n;
     bnorm = Norm( b, N );
     /* apply the diagonal pre-conditioner to rhs */
+    gettimeofday( &start, NULL );
     for ( i = 0; i < N; ++i )
         workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i];
+    gettimeofday( &stop, NULL );
+    *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
+             - (start.tv_sec + start.tv_usec / 1000000.0);
 
     /* GMRES outer-loop */
     for ( itr = 0; itr < MAX_ITR; ++itr )
@@ -229,8 +234,13 @@ int GMRES( static_storage *workspace, sparse_matrix *H,
         {
             /* matvec */
             Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] );
+            /*pre-conditioner*/
+            gettimeofday( &start, NULL );
             for ( k = 0; k < N; ++k )
-                workspace->v[j + 1][k] *= workspace->Hdia_inv[k]; /*pre-conditioner*/
+                workspace->v[j + 1][k] *= workspace->Hdia_inv[k];
+            gettimeofday( &stop, NULL );
+            *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
+                     - (start.tv_sec + start.tv_usec / 1000000.0);
             //fprintf( stderr, "%d-%d: matvec done.\n", itr, j );
 
             /* apply modified Gram-Schmidt to orthogonalize the new residual */
@@ -517,10 +527,11 @@ int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H,
  * with preconditioner using factors LU \approx H
  * and forward / backward substitution */
 int PGMRES( static_storage *workspace, sparse_matrix *H, real *b, real tol,
-            sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout )
+            sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout, real *time )
 {
     int i, j, k, itr, N;
     real cc, tmp1, tmp2, temp, bnorm;
+    struct timeval start, stop;
 
     N = H->n;
     bnorm = Norm( b, N );
@@ -531,8 +542,12 @@ int PGMRES( static_storage *workspace, sparse_matrix *H, real *b, real tol,
         /* calculate r0 */
         Sparse_MatVec( H, x, workspace->b_prm );
         Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N );
+        gettimeofday( &start, NULL );
         Forward_Subs( L, workspace->v[0], workspace->v[0] );
         Backward_Subs( U, workspace->v[0], workspace->v[0] );
+        gettimeofday( &stop, NULL );
+        *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
+                 - (start.tv_sec + start.tv_usec / 1000000.0);
         workspace->g[0] = Norm( workspace->v[0], N );
         Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N );
         //fprintf( stderr, "res: %.15e\n", workspace->g[0] );
@@ -542,8 +557,12 @@ int PGMRES( static_storage *workspace, sparse_matrix *H, real *b, real tol,
         {
             /* matvec */
             Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] );
+            gettimeofday( &start, NULL );
             Forward_Subs( L, workspace->v[j + 1], workspace->v[j + 1] );
             Backward_Subs( U, workspace->v[j + 1], workspace->v[j + 1] );
+            gettimeofday( &stop, NULL );
+            *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
+                     - (start.tv_sec + start.tv_usec / 1000000.0);
 
             /* apply modified Gram-Schmidt to orthogonalize the new residual */
             for ( i = 0; i < j - 1; i++ ) workspace->h[i][j] = 0;
@@ -643,11 +662,12 @@ int PGMRES( static_storage *workspace, sparse_matrix *H, real *b, real tol,
  * with preconditioner using factors LU \approx H
  * and Jacobi iteration for approximate factor application */
 int PGMRES_Jacobi( static_storage *workspace, sparse_matrix *H, real *b, real tol,
-                   sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout )
+                   sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout, real *time )
 {
     int i, j, k, itr, N, si;
     real cc, tmp1, tmp2, temp, bnorm;
     real *Dinv_L, *Dinv_U;
+    struct timeval start, stop;
 
     N = H->n;
     bnorm = Norm( b, N );
@@ -658,8 +678,8 @@ int PGMRES_Jacobi( static_storage *workspace, sparse_matrix *H, real *b, real to
      *   G = I - D^{-1}R
      *   R = triangular matrix
      *   D = diagonal matrix, diagonals from R */
-    if ( (Dinv_L = (real*) malloc(sizeof(real) * N)) == NULL 
-        || (Dinv_U = (real*) malloc(sizeof(real) * N)) == NULL )
+    if ( (Dinv_L = (real*) malloc(sizeof(real) * N)) == NULL
+            || (Dinv_U = (real*) malloc(sizeof(real) * N)) == NULL )
     {
         fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
         exit(INSUFFICIENT_SPACE);
@@ -682,8 +702,12 @@ int PGMRES_Jacobi( static_storage *workspace, sparse_matrix *H, real *b, real to
         Sparse_MatVec( H, x, workspace->b_prm );
         Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N );
         // TODO: add parameters to config file
-        Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[0], workspace->v[0], 30 );
-        Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[0], workspace->v[0], 30 );
+        gettimeofday( &start, NULL );
+        Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[0], workspace->v[0], 50 );
+        Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[0], workspace->v[0], 50 );
+        gettimeofday( &stop, NULL );
+        *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
+                 - (start.tv_sec + start.tv_usec / 1000000.0);
         workspace->g[0] = Norm( workspace->v[0], N );
         Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N );
         //fprintf( stderr, "res: %.15e\n", workspace->g[0] );
@@ -694,8 +718,12 @@ int PGMRES_Jacobi( static_storage *workspace, sparse_matrix *H, real *b, real to
             /* matvec */
             Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] );
             // TODO: add parameters to config file
-            Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[j + 1], workspace->v[j + 1], 30 );
-            Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[j + 1], workspace->v[j + 1], 30 );
+            gettimeofday( &start, NULL );
+            Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[j + 1], workspace->v[j + 1], 50 );
+            Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[j + 1], workspace->v[j + 1], 50 );
+            gettimeofday( &stop, NULL );
+            *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
+                     - (start.tv_sec + start.tv_usec / 1000000.0);
 
             /* apply modified Gram-Schmidt to orthogonalize the new residual */
             for ( i = 0; i < j - 1; i++ )
@@ -964,3 +992,40 @@ int SDM( static_storage *workspace, sparse_matrix *H,
 
     return i;
 }
+
+
+real condest( sparse_matrix *L, sparse_matrix *U )
+{
+    unsigned int i, N;
+    real *e, c;
+
+    N = L->n;
+
+    if ( (e = (real*) malloc(sizeof(real) * N)) == NULL )
+    {
+        fprintf( stderr, "Not enough memory for condest. Terminating.\n" );
+        exit(INSUFFICIENT_SPACE);
+    }
+
+    for ( i = 0; i < N; ++i )
+    {
+        e[i] = 1.;
+    }
+
+    Forward_Subs( L, e, e );
+    Backward_Subs( U, e, e );
+
+    c = fabs(e[0]);
+    for ( i = 1; i < N; ++i)
+    {
+        if ( fabs(e[i]) > c )
+        {
+            c = fabs(e[i]);
+        }
+
+    }
+
+    free( e );
+
+    return c;
+}
diff --git a/puremd_rc_1003/sPuReMD/GMRES.h b/puremd_rc_1003/sPuReMD/GMRES.h
index 37aa994b..2a48525b 100644
--- a/puremd_rc_1003/sPuReMD/GMRES.h
+++ b/puremd_rc_1003/sPuReMD/GMRES.h
@@ -33,16 +33,16 @@ typedef enum
 } TRIANGULARITY;
 
 int GMRES( static_storage*, sparse_matrix*,
-           real*, real, real*, FILE* );
+           real*, real, real*, FILE*, real* );
 
 int GMRES_HouseHolder( static_storage*, sparse_matrix*,
                        real*, real, real*, FILE* );
 
 int PGMRES( static_storage*, sparse_matrix*, real*, real,
-            sparse_matrix*, sparse_matrix*, real*, FILE* );
+            sparse_matrix*, sparse_matrix*, real*, FILE*, real* );
 
 int PGMRES_Jacobi( static_storage*, sparse_matrix*, real*, real,
-                   sparse_matrix*, sparse_matrix*, real*, FILE* );
+                   sparse_matrix*, sparse_matrix*, real*, FILE*, real* );
 
 int PCG( static_storage*, sparse_matrix*, real*, real,
          sparse_matrix*, sparse_matrix*, real*, FILE* );
@@ -53,4 +53,6 @@ int CG( static_storage*, sparse_matrix*,
 int uyduruk_GMRES( static_storage*, sparse_matrix*,
                    real*, real, real*, int, FILE* );
 
+real condest( sparse_matrix*, sparse_matrix* );
+
 #endif
diff --git a/puremd_rc_1003/sPuReMD/QEq.c b/puremd_rc_1003/sPuReMD/QEq.c
index 6c268de2..762281b7 100644
--- a/puremd_rc_1003/sPuReMD/QEq.c
+++ b/puremd_rc_1003/sPuReMD/QEq.c
@@ -110,13 +110,16 @@ int Estimate_LU_Fill( sparse_matrix *A, real *droptol )
 
 
 /* Incomplete Cholesky factorization with thresholding */
-void ICHOLT( sparse_matrix *A, real *droptol,
-             sparse_matrix *L, sparse_matrix *U )
+real ICHOLT( sparse_matrix *A, real *droptol,
+            sparse_matrix *L, sparse_matrix *U )
 {
     sparse_matrix_entry tmp[1000];
     int i, j, pj, k1, k2, tmptop, Ltop;
     real val;
     int *Utop;
+    struct timeval start, stop;
+
+    gettimeofday( &start, NULL );
 
     Utop = (int*) malloc((A->n + 1) * sizeof(int));
 
@@ -204,7 +207,7 @@ void ICHOLT( sparse_matrix *A, real *droptol,
     }
 
     L->start[i] = Ltop;
-    //fprintf( stderr, "nnz(L): %d, max: %d\n", Ltop, L->n * 50 );
+//    fprintf( stderr, "nnz(L): %d, max: %d\n", Ltop, L->n * 50 );
 
     /* U = L^T (Cholesky factorization) */
     for ( i = 1; i <= U->n; ++i )
@@ -222,20 +225,24 @@ void ICHOLT( sparse_matrix *A, real *droptol,
         }
     }
 
-    //fprintf( stderr, "nnz(U): %d, max: %d\n", Utop[U->n], U->n * 50 );
+//    fprintf( stderr, "nnz(U): %d, max: %d\n", Utop[U->n], U->n * 50 );
 
     free(Utop);
+
+    gettimeofday( &stop, NULL );
+    return (stop.tv_sec + stop.tv_usec / 1000000.0)
+        - (start.tv_sec + start.tv_usec / 1000000.0);
 }
 
 
 /* Fine-grained (parallel) incomplete Cholesky factorization
- * 
+ *
  * Reference:
  * Edmond Chow and Aftab Patel
  * Fine-Grained Parallel Incomplete LU Factorization
  * SIAM J. Sci. Comp. */
 static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
-             sparse_matrix * const U_t, sparse_matrix * const U )
+                       sparse_matrix * const U_t, sparse_matrix * const U )
 {
     unsigned int i, j, k, pj, x = 0, y = 0, ei_x, ei_y;
     real *D, *D_inv, sum;
@@ -265,16 +272,16 @@ static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     }
 
     /* to get convergence, A must have unit diagonal, so apply
-     * transformation DAD, where D = D(1./sqrt(D(A))) */ 
-    memcpy( DAD->start, A->start, sizeof(int) * (A->n+1) );
+     * transformation DAD, where D = D(1./sqrt(D(A))) */
+    memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) );
     for ( i = 0; i < A->n; ++i )
     {
         /* non-diagonals */
         for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj )
         {
-           DAD->entries[pj].j = A->entries[pj].j;
-           DAD->entries[pj].val =
-                   A->entries[pj].val * D[i] * D[A->entries[pj].j];
+            DAD->entries[pj].j = A->entries[pj].j;
+            DAD->entries[pj].val =
+                A->entries[pj].val * D[i] * D[A->entries[pj].j];
         }
         /* diagonal */
         DAD->entries[pj].j = A->entries[pj].j;
@@ -283,7 +290,7 @@ static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
 
     /* initial guesses for U^T,
      * assume: A and DAD symmetric and stored lower triangular */
-    memcpy( U_t->start, DAD->start, sizeof(int) * (DAD->n+1) );
+    memcpy( U_t->start, DAD->start, sizeof(int) * (DAD->n + 1) );
     memcpy( U_t->entries, DAD->entries, sizeof(sparse_matrix_entry) * (DAD->m) );
 
     for ( i = 0; i < sweeps; ++i )
@@ -298,7 +305,7 @@ static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
             ei_x = 0;
             for ( k = 0; k <= A->n; ++k )
             {
-                if( U_t->start[k] > j )
+                if ( U_t->start[k] > j )
                 {
                     x = U_t->start[k - 1];
                     ei_x = U_t->start[k];
@@ -310,17 +317,17 @@ static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
             ei_y = U_t->start[U_t->entries[j].j + 1];
 
             /* sparse dot product: dot( U^T(i,1:j-1), U^T(j,1:j-1) ) */
-            while( U_t->entries[x].j < U_t->entries[j].j && 
-                    U_t->entries[y].j < U_t->entries[j].j && 
+            while ( U_t->entries[x].j < U_t->entries[j].j &&
+                    U_t->entries[y].j < U_t->entries[j].j &&
                     x < ei_x && y < ei_y )
             {
-                if( U_t->entries[x].j == U_t->entries[y].j )
+                if ( U_t->entries[x].j == U_t->entries[y].j )
                 {
                     sum += (U_t->entries[x].val * U_t->entries[y].val);
                     ++x;
                     ++y;
                 }
-                else if( U_t->entries[x].j < U_t->entries[y].j )
+                else if ( U_t->entries[x].j < U_t->entries[y].j )
                 {
                     ++x;
                 }
@@ -333,10 +340,10 @@ static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
             sum = DAD->entries[j].val - sum;
 
             /* diagonal entries */
-            if( (k - 1) == U_t->entries[j].j )
+            if ( (k - 1) == U_t->entries[j].j )
             {
                 /* sanity check */
-                if( sum < ZERO )
+                if ( sum < ZERO )
                 {
                     fprintf( stderr, "Numeric breakdown in ICHOL. Terminating.\n" );
                     exit(NUMERIC_BREAKDOWN);
@@ -359,7 +366,7 @@ static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     {
         for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj )
         {
-           U_t->entries[pj].val *= D_inv[i];
+            U_t->entries[pj].val *= D_inv[i];
         }
     }
 
@@ -406,7 +413,7 @@ void Init_MatVec( reax_system *system, control_params *control,
 {
     int i, fillin;
     real s_tmp, t_tmp;
-    char fname[100];
+//    char fname[100];
 
     if (control->refactor > 0 &&
             ((data->step - data->prev_steps) % control->refactor == 0 || workspace->L == NULL))
@@ -414,24 +421,24 @@ void Init_MatVec( reax_system *system, control_params *control,
         //Print_Linear_System( system, control, workspace, data->step );
         Sort_Matrix_Rows( workspace->H );
         //fprintf( stderr, "H matrix sorted\n" );
-//        Calculate_Droptol( workspace->H, workspace->droptol, control->droptol );
+        Calculate_Droptol( workspace->H, workspace->droptol, control->droptol );
         //fprintf( stderr, "drop tolerances calculated\n" );
         if ( workspace->L == NULL )
         {
-//            fillin = Estimate_LU_Fill( workspace->H, workspace->droptol );
-//            if ( Allocate_Matrix( &(workspace->L), far_nbrs->n, fillin ) == 0 ||
-//                    Allocate_Matrix( &(workspace->U), far_nbrs->n, fillin ) == 0 )
+            fillin = Estimate_LU_Fill( workspace->H, workspace->droptol );
+            if ( Allocate_Matrix( &(workspace->L), far_nbrs->n, fillin ) == 0 ||
+                    Allocate_Matrix( &(workspace->U), far_nbrs->n, fillin ) == 0 )
+            {
+                fprintf( stderr, "not enough memory for LU matrices. terminating.\n" );
+                exit(INSUFFICIENT_SPACE);
+            }
+            /* factors have sparsity pattern as H */
+//            if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 ||
+//                    Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 )
 //            {
-//                fprintf( stderr, "not enough memory for LU matrices. terminating.\n" );
+//                fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
 //                exit(INSUFFICIENT_SPACE);
 //            }
-           /* factors have sparsity pattern as H */
-           if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 ||
-                   Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 )
-           {
-               fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
-               exit(INSUFFICIENT_SPACE);
-           }
 #if defined(DEBUG_FOCUS)
             fprintf( stderr, "fillin = %d\n", fillin );
             fprintf( stderr, "allocated memory: L = U = %ldMB\n",
@@ -439,9 +446,11 @@ void Init_MatVec( reax_system *system, control_params *control,
 #endif
         }
 
-//        ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U );
+        data->timing.pre_comp += ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U );
         // TODO: add parameters for sweeps to control file
-        ICHOL_PAR( workspace->H, 1, workspace->L, workspace->U );
+//        ICHOL_PAR( workspace->H, 1, workspace->L, workspace->U );
+
+        fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) );
 
 #if defined(DEBUG_FOCUS)
         sprintf( fname, "%s.L%d.out", control->sim_name, data->step );
@@ -525,29 +534,29 @@ void QEq( reax_system *system, control_params *control, simulation_data *data,
 
     Init_MatVec( system, control, data, workspace, far_nbrs );
 
-//    if( data->step % 10 == 0 )
-//      Print_Linear_System( system, control, workspace, data->step );
+    if( data->step == 0 || data->step == 100 )
+      Print_Linear_System( system, control, workspace, data->step );
 
     //TODO: add parameters in control file for solver choice and options
-//  matvecs = GMRES( workspace, workspace->H,
-//    workspace->b_s, control->q_err, workspace->s[0], out_control->log );
-//  matvecs += GMRES( workspace, workspace->H,
-//    workspace->b_t, control->q_err, workspace->t[0], out_control->log );
-
-    //matvecs = GMRES_HouseHolder( workspace, workspace->H,
-    //    workspace->b_s, control->q_err, workspace->s[0], out_control->log );
-    //matvecs += GMRES_HouseHolder( workspace, workspace->H,
-    //    workspace->b_t, control->q_err, workspace->t[0], out_control->log );
-
-//   matvecs = PGMRES( workspace, workspace->H, workspace->b_s, control->q_err,
-//     workspace->L, workspace->U, workspace->s[0], out_control->log );
-//   matvecs += PGMRES( workspace, workspace->H, workspace->b_t, control->q_err,
-//     workspace->L, workspace->U, workspace->t[0], out_control->log );
-
-    matvecs = PGMRES_Jacobi( workspace, workspace->H, workspace->b_s, control->q_err,
-                             workspace->L, workspace->U, workspace->s[0], out_control->log );
-    matvecs += PGMRES_Jacobi( workspace, workspace->H, workspace->b_t, control->q_err,
-                              workspace->L, workspace->U, workspace->t[0], out_control->log );
+//    matvecs = GMRES( workspace, workspace->H,
+//                     workspace->b_s, control->q_err, workspace->s[0], out_control->log, &(data->timing.pre_app) );
+//    matvecs += GMRES( workspace, workspace->H,
+//                      workspace->b_t, control->q_err, workspace->t[0], out_control->log, &(data->timing.pre_app) );
+
+//    matvecs = GMRES_HouseHolder( workspace, workspace->H,
+//                                 workspace->b_s, control->q_err, workspace->s[0], out_control->log );
+//    matvecs += GMRES_HouseHolder( workspace, workspace->H,
+//                                  workspace->b_t, control->q_err, workspace->t[0], out_control->log );
+
+    matvecs = PGMRES( workspace, workspace->H, workspace->b_s, control->q_err,
+                      workspace->L, workspace->U, workspace->s[0], out_control->log, &(data->timing.pre_app) );
+    matvecs += PGMRES( workspace, workspace->H, workspace->b_t, control->q_err,
+                       workspace->L, workspace->U, workspace->t[0], out_control->log, &(data->timing.pre_app) );
+
+//    matvecs = PGMRES_Jacobi( workspace, workspace->H, workspace->b_s, control->q_err,
+//                             workspace->L, workspace->U, workspace->s[0], out_control->log, &(data->timing.pre_app) );
+//    matvecs += PGMRES_Jacobi( workspace, workspace->H, workspace->b_t, control->q_err,
+//                              workspace->L, workspace->U, workspace->t[0], out_control->log, &(data->timing.pre_app) );
 
     //matvecs=PCG( workspace, workspace->H, workspace->b_s, control->q_err,
     //      workspace->L, workspace->U, workspace->s[0], out_control->log ) + 1;
diff --git a/puremd_rc_1003/sPuReMD/init_md.c b/puremd_rc_1003/sPuReMD/init_md.c
index d6f15a1e..f56b886a 100644
--- a/puremd_rc_1003/sPuReMD/init_md.c
+++ b/puremd_rc_1003/sPuReMD/init_md.c
@@ -211,6 +211,8 @@ void Init_Simulation_Data( reax_system *system, control_params *control,
     data->timing.nonb = 0;
     data->timing.QEq = 0;
     data->timing.matvecs = 0;
+    data->timing.pre_comp = ZERO;
+    data->timing.pre_app = ZERO;
 }
 
 
@@ -482,9 +484,9 @@ void Init_Out_Controls(reax_system *system, control_params *control,
         strcpy( temp, control->sim_name );
         strcat( temp, ".log" );
         out_control->log = fopen( temp, "w" );
-        fprintf( out_control->log, "%-6s%10s%10s%10s%10s%10s%10s%10s\n",
+        fprintf( out_control->log, "%-6s%10s%10s%10s%10s%10s%10s%10s%10s%10s\n",
                  "step", "total", "neighbors", "init", "bonded",
-                 "nonbonded", "QEq", "matvec" );
+                 "nonbonded", "QEq", "matvec", "pre comp", "pre app" );
     }
 
     /* Init pressure file */
diff --git a/puremd_rc_1003/sPuReMD/mytypes.h b/puremd_rc_1003/sPuReMD/mytypes.h
index 5618418c..2969a160 100644
--- a/puremd_rc_1003/sPuReMD/mytypes.h
+++ b/puremd_rc_1003/sPuReMD/mytypes.h
@@ -502,6 +502,8 @@ typedef struct
     real nonb;
     real QEq;
     int  matvecs;
+    real  pre_comp;
+    real  pre_app;
 } reax_timing;
 
 
diff --git a/puremd_rc_1003/sPuReMD/print_utils.c b/puremd_rc_1003/sPuReMD/print_utils.c
index 20d58e75..b7c4f06d 100644
--- a/puremd_rc_1003/sPuReMD/print_utils.c
+++ b/puremd_rc_1003/sPuReMD/print_utils.c
@@ -631,14 +631,16 @@ void Output_Results( reax_system *system, control_params *control,
             f_update = 1;
         else f_update = out_control->energy_update_freq;
 
-        fprintf( out_control->log, "%6d%10.2f%10.2f%10.2f%10.2f%10.2f%10.2f%10.2f\n",
+        fprintf( out_control->log, "%6d%10.2f%10.2f%10.2f%10.2f%10.2f%10.2f%10.2f%10.6f%10.6f\n",
                  data->step, t_elapsed / f_update,
                  data->timing.nbrs / f_update,
                  data->timing.init_forces / f_update,
                  data->timing.bonded / f_update,
                  data->timing.nonb / f_update,
                  data->timing.QEq / f_update,
-                 (double)data->timing.matvecs / f_update );
+                 (double)data->timing.matvecs / f_update,
+                 data->timing.pre_comp / f_update,
+                 data->timing.pre_app / f_update );
 
         data->timing.total = Get_Time( );
         data->timing.nbrs = 0;
@@ -647,6 +649,8 @@ void Output_Results( reax_system *system, control_params *control,
         data->timing.nonb = 0;
         data->timing.QEq = 0;
         data->timing.matvecs = 0;
+        data->timing.pre_comp = ZERO;
+        data->timing.pre_app = ZERO;
 
         fflush( out_control->out );
         fflush( out_control->pot );
-- 
GitLab