From 5968dd991cb3146140a8fcc1a06b40ab1de9f238 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnk@msu.edu>
Date: Tue, 15 May 2018 21:42:58 -0400
Subject: [PATCH] PG-PuReMD: begin merging preconditioner code from sPuReMD.
 Make data structures between sPuReMD and PG-PuReMD consistent. Update control
 file parsing.

---
 PG-PuReMD/src/charges.c               | 1098 +++++++++++++++++--------
 PG-PuReMD/src/charges.h               |    5 +-
 PG-PuReMD/src/control.c               |   17 +
 PG-PuReMD/src/cuda/cuda_charges.cu    |   26 +-
 PG-PuReMD/src/forces.c                |    2 +-
 PG-PuReMD/src/io_tools.c              |    2 +-
 PG-PuReMD/src/lin_alg.c               |   25 +
 PG-PuReMD/src/lin_alg.h               |    3 +
 PG-PuReMD/src/reax_types.h            |  104 ++-
 PG-PuReMD/src/reset_tools.c           |    3 +-
 PG-PuReMD/src/tool_box.c              |   18 +-
 sPuReMD/src/analyze.c                 |   11 +-
 sPuReMD/src/charges.c                 |   69 +-
 sPuReMD/src/charges.h                 |    3 +-
 sPuReMD/src/control.c                 |  760 ++++++++---------
 sPuReMD/src/forces.c                  |    5 +-
 sPuReMD/src/four_body_interactions.c  |    3 +-
 sPuReMD/src/init_md.c                 |    2 +-
 sPuReMD/src/integrate.c               |  119 +--
 sPuReMD/src/mytypes.h                 |   91 +-
 sPuReMD/src/three_body_interactions.c |    6 +-
 sPuReMD/src/two_body_interactions.c   |   12 +-
 22 files changed, 1448 insertions(+), 936 deletions(-)

diff --git a/PG-PuReMD/src/charges.c b/PG-PuReMD/src/charges.c
index 9c87b6ea..31f6889d 100644
--- a/PG-PuReMD/src/charges.c
+++ b/PG-PuReMD/src/charges.c
@@ -30,9 +30,9 @@
 #include "tool_box.h"
 
 
-int compare_matrix_entry(const void *v1, const void *v2)
+int compare_matrix_entry( const void *v1, const void *v2 )
 {
-    return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j;
+    return ((sparse_matrix_entry *) v1)->j - ((sparse_matrix_entry *) v2)->j;
 }
 
 
@@ -44,487 +44,891 @@ void Sort_Matrix_Rows( sparse_matrix *A )
     {
         si = A->start[i];
         ei = A->end[i];
-        qsort( &(A->entries[si]), ei - si,
+        qsort( &A->entries[si], ei - si,
                 sizeof(sparse_matrix_entry), compare_matrix_entry );
     }
 }
 
 
-void Calculate_Droptol( sparse_matrix *A, real *droptol, real dtol )
+static void Init_Linear_Solver( reax_system *system, simulation_data *data,
+        control_params *control, storage *workspace, mpi_datatypes *mpi_data )
 {
-    int i, j, k;
-    real val;
+    int i;
+    reax_atom *atom;
 
-    /* init droptol to 0 - not necessary for an upper-triangular A */
-    for ( i = 0; i < A->n; ++i )
+    /* initialize solution vectors for linear solves in charge method */
+    switch ( control->charge_method )
     {
-        droptol[i] = 0;
+        case QEQ_CM:
+            for ( i = 0; i < system->n; ++i )
+            {
+                atom = &system->my_atoms[i];
+
+                workspace->b_s[i] = -1.0 * system->reax_param.sbp[ atom->type ].chi;
+                workspace->b_t[i] = -1.0;
+                workspace->b[i][0] = -1.0 * system->reax_param.sbp[ atom->type ].chi;
+                workspace->b[i][1] = -1.0;
+            }
+            break;
+
+        case EE_CM:
+            for ( i = 0; i < system->n; ++i )
+            {
+                atom = &system->my_atoms[i];
+
+                workspace->b_s[i] = -1.0 * system->reax_param.sbp[ atom->type ].chi;
+
+                //TODO: check if unused (redundant)
+                workspace->b[i][0] = -1.0 * system->reax_param.sbp[ atom->type ].chi;
+            }
+
+            if ( system->my_rank == 0 )
+            {
+                workspace->b_s[system->n] = control->cm_q_net;
+                workspace->b[system->n][0] = control->cm_q_net;
+            }
+            break;
+
+        case ACKS2_CM:
+            for ( i = 0; i < system->n; ++i )
+            {
+                atom = &system->my_atoms[i];
+
+                workspace->b_s[i] = -1.0 * system->reax_param.sbp[ atom->type ].chi;
+
+                //TODO: check if unused (redundant)
+                workspace->b[i][0] = -1.0 * system->reax_param.sbp[ atom->type ].chi;
+            }
+
+            if ( system->my_rank == 0 )
+            {
+                workspace->b_s[system->n] = control->cm_q_net;
+                workspace->b[system->n][0] = control->cm_q_net;
+            }
+
+            for ( i = system->n + 1; i < system->N_cm; ++i )
+            {
+                atom = &system->my_atoms[i];
+
+                workspace->b_s[i] = 0.0;
+
+                //TODO: check if unused (redundant)
+                workspace->b[i][0] = 0.0;
+            }
+
+            if ( system->my_rank == 0 )
+            {
+                workspace->b_s[system->n] = control->cm_q_net;
+                workspace->b[system->n][0] = control->cm_q_net;
+            }
+            break;
+
+        default:
+            fprintf( stderr, "[ERROR] Unknown charge method type. Terminating...\n" );
+            exit( UNKNOWN_OPTION );
+            break;
     }
+}
 
-    /* calculate sqaure of the norm of each row */
-    for ( i = 0; i < A->n; ++i )
-    {
-        val = A->entries[A->start[i]].val; // diagonal entry
-        droptol[i] += val * val;
 
-        // only within my block
-        for ( k = A->start[i] + 1; A->entries[k].j < A->n; ++k )
+static void Extrapolate_Charges_QEq( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
+{
+    int i;
+    real s_tmp, t_tmp;
+
+    /* spline extrapolation for s & t */
+    //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer
+#ifdef _OPENMP
+    #pragma omp parallel for schedule(static) \
+        default(none) private(i, s_tmp, t_tmp)
+#endif
+    for ( i = 0; i < system->N_cm; ++i )
+    {
+        /* no extrapolation, previous solution as initial guess */
+        if ( control->cm_init_guess_extrap1 == 0 )
+        {
+            s_tmp = system->my_atoms[i].s[0];
+        }
+        /* linear */
+        else if ( control->cm_init_guess_extrap1 == 1 )
         {
-            j = A->entries[k].j;
-            val = A->entries[k].val;
+            s_tmp = 2.0 * system->my_atoms[i].s[0] - system->my_atoms[i].s[1];
+        }
+        /* quadratic */
+        else if ( control->cm_init_guess_extrap1 == 2 )
+        {
+            s_tmp = system->my_atoms[i].s[2] + 3.0 * (system->my_atoms[i].s[0] - system->my_atoms[i].s[1]);
+        }
+        /* cubic */
+        else if ( control->cm_init_guess_extrap1 == 3 )
+        {
+            s_tmp = 4.0 * (system->my_atoms[i].s[0] + system->my_atoms[i].s[2]) -
+                    (6.0 * system->my_atoms[i].s[1] + system->my_atoms[i].s[3]);
+        }
+        /* 4th order */
+        else if ( control->cm_init_guess_extrap1 == 4 )
+        {
+            s_tmp = 5.0 * (system->my_atoms[i].s[0] - system->my_atoms[i].s[3]) +
+                10.0 * (-1.0 * system->my_atoms[i].s[1] + system->my_atoms[i].s[2]) + system->my_atoms[i].s[4];
+        }
+        else
+        {
+            s_tmp = 0.0;
+        }
 
-            droptol[i] += val * val;
-            droptol[j] += val * val;
+        /* no extrapolation, previous solution as initial guess */
+        if ( control->cm_init_guess_extrap2 == 0 )
+        {
+            t_tmp = system->my_atoms[i].t[0];
+        }
+        /* linear */
+        else if ( control->cm_init_guess_extrap2 == 1 )
+        {
+            t_tmp = 2.0 * system->my_atoms[i].t[0] - system->my_atoms[i].t[1];
+        }
+        /* quadratic */
+        else if ( control->cm_init_guess_extrap2 == 2 )
+        {
+            t_tmp = system->my_atoms[i].t[2] + 3.0 * (system->my_atoms[i].t[0] - system->my_atoms[i].t[1]);
+        }
+        /* cubic */
+        else if ( control->cm_init_guess_extrap2 == 3 )
+        {
+            t_tmp = 4.0 * (system->my_atoms[i].t[0] + system->my_atoms[i].t[2]) -
+                (6.0 * system->my_atoms[i].t[1] + system->my_atoms[i].t[3]);
+        }
+        /* 4th order */
+        else if ( control->cm_init_guess_extrap2 == 4 )
+        {
+            t_tmp = 5.0 * (system->my_atoms[i].t[0] - system->my_atoms[i].t[3]) +
+                10.0 * (-1.0 * system->my_atoms[i].t[1] + system->my_atoms[i].t[2]) + system->my_atoms[i].t[4];
+        }
+        else
+        {
+            t_tmp = 0.0;
         }
-    }
 
-    /* calculate local droptol for each row */
-    //fprintf( stderr, "droptol: " );
-    for ( i = 0; i < A->n; ++i )
-    {
-        droptol[i] = SQRT( droptol[i] ) * dtol;
-        //fprintf( stderr, "%f\n", droptol[i] );
+        system->my_atoms[i].s[4] = system->my_atoms[i].s[3];
+        system->my_atoms[i].s[3] = system->my_atoms[i].s[2];
+        system->my_atoms[i].s[2] = system->my_atoms[i].s[1];
+        system->my_atoms[i].s[1] = system->my_atoms[i].s[0];
+        system->my_atoms[i].s[0] = s_tmp;
+        /* x is used as initial guess to solver */
+        workspace->x[i][0] = s_tmp;
+
+        system->my_atoms[i].t[4] = system->my_atoms[i].t[3];
+        system->my_atoms[i].t[3] = system->my_atoms[i].t[2];
+        system->my_atoms[i].t[2] = system->my_atoms[i].t[1];
+        system->my_atoms[i].t[1] = system->my_atoms[i].t[0];
+        system->my_atoms[i].t[0] = t_tmp;
+        /* x is used as initial guess to solver */
+        workspace->x[i][1] = t_tmp;
     }
-    //fprintf( stderr, "\n" );
 }
 
 
-int Estimate_LU_Fill( sparse_matrix *A, real *droptol )
+static void Extrapolate_Charges_EE( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
 {
-    int i, j, pj;
-    int fillin;
-    real val;
+    int i;
+    real s_tmp;
 
-    fillin = 0;
-    for ( i = 0; i < A->n; ++i )
+    /* spline extrapolation for s */
+    //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer
+#ifdef _OPENMP
+    #pragma omp parallel for schedule(static) \
+        default(none) private(i, s_tmp)
+#endif
+    for ( i = 0; i < system->N_cm; ++i )
     {
-        for ( pj = A->start[i] + 1; A->entries[pj].j < A->n; ++pj )
+        /* no extrapolation, previous solution as initial guess */
+        if ( control->cm_init_guess_extrap1 == 0 )
         {
-            j = A->entries[pj].j;
-            val = A->entries[pj].val;
-            if ( FABS(val) > droptol[i] )
-                ++fillin;
+            s_tmp = system->my_atoms[i].s[0];
+        }
+        /* linear */
+        else if ( control->cm_init_guess_extrap1 == 1 )
+        {
+            s_tmp = 2.0 * system->my_atoms[i].s[0] - system->my_atoms[i].s[1];
+        }
+        /* quadratic */
+        else if ( control->cm_init_guess_extrap1 == 2 )
+        {
+            s_tmp = system->my_atoms[i].s[2] + 3.0 * (system->my_atoms[i].s[0] - system->my_atoms[i].s[1]);
+        }
+        /* cubic */
+        else if ( control->cm_init_guess_extrap1 == 3 )
+        {
+            s_tmp = 4.0 * (system->my_atoms[i].s[0] + system->my_atoms[i].s[2]) -
+                    (6.0 * system->my_atoms[i].s[1] + system->my_atoms[i].s[3]);
+        }
+        /* 4th order */
+        else if ( control->cm_init_guess_extrap1 == 4 )
+        {
+            s_tmp = 5.0 * (system->my_atoms[i].s[0] - system->my_atoms[i].s[3]) +
+                10.0 * (-1.0 * system->my_atoms[i].s[1] + system->my_atoms[i].s[2]) + system->my_atoms[i].s[4];
+        }
+        else
+        {
+            s_tmp = 0.0;
         }
-    }
 
-    return fillin + A->n;
+        system->my_atoms[i].s[4] = system->my_atoms[i].s[3];
+        system->my_atoms[i].s[3] = system->my_atoms[i].s[2];
+        system->my_atoms[i].s[2] = system->my_atoms[i].s[1];
+        system->my_atoms[i].s[1] = system->my_atoms[i].s[0];
+        system->my_atoms[i].s[0] = s_tmp;
+        /* x is used as initial guess to solver */
+        workspace->x[i][0] = s_tmp;
+    }
 }
 
 
-void ICHOLT( sparse_matrix *A, real *droptol,
-        sparse_matrix *L, sparse_matrix *U )
+/* Compute preconditioner for QEq
+ */
+static void Compute_Preconditioner_QEq( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
 {
-    sparse_matrix_entry tmp[1000];
-    int i, j, pj, k1, k2, tmptop, Utop;
-    real val, dval;
-    int *Ltop;
-
-    Ltop = (int*) smalloc( A->n * sizeof(int), "ICHOLT::Ltop" );
+    sparse_matrix *Hptr;
 
-    // clear data structures
-    Utop = 0;
-    tmptop = 0;
-    for ( i = 0; i < A->n; ++i )
+    if ( control->cm_domain_sparsify_enabled == TRUE )
     {
-        U->start[i] = L->start[i] = L->end[i] = Ltop[i] = 0;
+        Hptr = &workspace->H_sp;
+    }
+    else
+    {
+        Hptr = &workspace->H;
     }
 
-    for ( i = A->n - 1; i >= 0; --i )
+    switch ( control->cm_solver_pre_comp_type )
     {
-        U->start[i] = Utop;
-        tmptop = 0;
+        case NONE_PC:
+            break;
 
-        for ( pj = A->end[i] - 1; A->entries[pj].j >= A->n; --pj ); // skip ghosts
-        for ( ; pj > A->start[i]; --pj )
-        {
-            j = A->entries[pj].j;
-            val = A->entries[pj].val;
-            fprintf( stderr, "i: %d, j: %d  val=%f ", i, j, val );
-            //fprintf( stdout, "%d %d %24.16f\n", 6540-i, 6540-j, val );
-            //fprintf( stdout, "%d %d %24.16f\n", 6540-j, 6540-i, val );
+        case DIAG_PC:
+            data->timing.cm_solver_pre_comp +=
+                diag_pre_comp( system, workspace->Hdia_inv );
+//                diag_pre_comp( Hptr, workspace->Hdia_inv );
+            break;
 
-            if ( FABS(val) > droptol[i] )
-            {
-                k1 = tmptop - 1;
-                k2 = U->start[j] + 1;
-                while ( k1 >= 0 && k2 < U->end[j] )
-                {
-                    if ( tmp[k1].j < U->entries[k2].j )
-                    {
-                        k1--;
-                    }
-                    else if ( tmp[k1].j > U->entries[k2].j )
-                    {
-                        k2++;
-                    }
-                    else
-                    {
-                        val -= (tmp[k1--].val * U->entries[k2++].val);
-                    }
-                }
-
-                // U matrix is upper triangular
-                val /= U->entries[U->start[j]].val;
-                fprintf( stderr, " newval=%f", val );
-                tmp[tmptop].j = j;
-                tmp[tmptop].val = val;
-                tmptop++;
-            }
-            fprintf( stderr, "\n" );
-        }
-        //fprintf( stderr, "i = %d - tmptop = %d\n", i, tmptop );
+        case ICHOLT_PC:
+        case ILU_PAR_PC:
+        case ILUT_PAR_PC:
+        case ILU_SUPERLU_MT_PC:
+            fprintf( stderr, "[ERROR] Unsupported preconditioner computation method. Terminating...\n" );
+            exit( INVALID_INPUT );
+            break;
 
-        // compute the ith diagonal in U
-        dval = A->entries[A->start[i]].val;
-        //fprintf( stdout, "%d %d %24.16f\n", 6540-i, 6540-i, dval );
-        for ( k1 = 0; k1 < tmptop; ++k1 )
-        {
-            //if( FABS(tmp[k1].val) > droptol[i] )
-            dval -= SQR(tmp[k1].val);
-        }
-        dval = SQRT(dval);
-        // keep the diagonal in any case
-        U->entries[Utop].j = i;
-        U->entries[Utop].val = dval;
-        Utop++;
-
-        fprintf(stderr, "row%d: droptol=%.15f val=%.15f\n", i, droptol[i], dval);
-        for ( k1 = tmptop - 1; k1 >= 0; --k1 )
-        {
-            // apply the dropping rule once again
-            if ( FABS(tmp[k1].val) > droptol[i] / dval )
-            {
-                U->entries[Utop].j = tmp[k1].j;
-                U->entries[Utop].val = tmp[k1].val;
-                Utop++;
-                Ltop[tmp[k1].j]++;
-                fprintf( stderr, "%d(%.15f)\n", tmp[k1].j, tmp[k1].val );
-            }
-        }
+        case SAI_PC:
+#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
+            //TODO: implement
+//            data->timing.cm_solver_pre_comp +=
+//                sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full,
+//                        &workspace->H_app_inv );
+#else
+            fprintf( stderr, "[ERROR] LAPACKE support disabled. Re-compile before enabling. Terminating...\n" );
+            exit( INVALID_INPUT );
+#endif
+            break;
 
-        U->end[i] = Utop;
-        //fprintf( stderr, "i = %d - Utop = %d\n", i, Utop );
+        default:
+            fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" );
+            exit( UNKNOWN_OPTION );
+            break;
     }
+}
+
 
-#if defined(DEBUG)
-    // print matrix U
-    fprintf( stderr, "nnz(U): %d\n", Utop );
-    for ( i = 0; i < U->n; ++i )
+/* Compute preconditioner for EE
+ */
+static void Compute_Preconditioner_EE( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
+{
+    sparse_matrix *Hptr;
+
+    if ( control->cm_domain_sparsify_enabled == TRUE )
     {
-        fprintf( stderr, "row%d: ", i );
-        for ( pj = U->start[i]; pj < U->end[i]; ++pj )
-        {
-            fprintf( stderr, "%d ", U->entries[pj].j );
-        }
-        fprintf( stderr, "\n" );
+        Hptr = &workspace->H_sp;
+    }
+    else
+    {
+        Hptr = &workspace->H;
     }
+
+    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
+            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
+            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
+    {
+        Hptr->entries[Hptr->start[system->N + 1] - 1].val = 1.0;
+    }
+    
+    switch ( control->cm_solver_pre_comp_type )
+    {
+        case NONE_PC:
+            break;
+
+        case DIAG_PC:
+            data->timing.cm_solver_pre_comp +=
+                diag_pre_comp( system, workspace->Hdia_inv );
+//                diag_pre_comp( Hptr, workspace->Hdia_inv );
+            break;
+
+        case ICHOLT_PC:
+        case ILU_PAR_PC:
+        case ILUT_PAR_PC:
+        case ILU_SUPERLU_MT_PC:
+            fprintf( stderr, "[ERROR] Unsupported preconditioner computation method. Terminating...\n" );
+            exit( INVALID_INPUT );
+            break;
+
+        case SAI_PC:
+#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
+            //TODO: implement
+//            data->timing.cm_solver_pre_comp +=
+//                sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full,
+//                        &workspace->H_app_inv );
+#else
+            fprintf( stderr, "[ERROR] LAPACKE support disabled. Re-compile before enabling. Terminating...\n" );
+            exit( INVALID_INPUT );
 #endif
+            break;
+
+        default:
+            fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" );
+            exit( UNKNOWN_OPTION );
+            break;
+    }
 
-    // transpose matrix U into L
-    L->start[0] = L->end[0] = 0;
-    for ( i = 1; i < L->n; ++i )
+    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
+            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
+            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
     {
-        L->start[i] = L->end[i] = L->start[i - 1] + Ltop[i - 1] + 1;
-        //fprintf( stderr, "i=%d  L->start[i]=%d\n", i, L->start[i] );
+        Hptr->entries[Hptr->start[system->N + 1] - 1].val = 0.0;
     }
+}
+
 
-    for ( i = 0; i < U->n; ++i )
+/* Compute preconditioner for ACKS2
+ */
+static void Compute_Preconditioner_ACKS2( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
+{
+    sparse_matrix *Hptr;
+
+    if ( control->cm_domain_sparsify_enabled == TRUE )
     {
-        for ( pj = U->start[i]; pj < U->end[i]; ++pj )
-        {
-            j = U->entries[pj].j;
-            L->entries[L->end[j]].j = i;
-            L->entries[L->end[j]].val = U->entries[pj].val;
-            L->end[j]++;
-        }
+        Hptr = &workspace->H_sp;
+    }
+    else
+    {
+        Hptr = &workspace->H;
     }
 
-#if defined(DEBUG)
-    // print matrix L
-    fprintf( stderr, "nnz(L): %d\n", L->end[L->n - 1] );
-    for ( i = 0; i < L->n; ++i )
+    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
+            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
+            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
     {
-        fprintf( stderr, "row%d: ", i );
-        for ( pj = L->start[i]; pj < L->end[i]; ++pj )
-        {
-            fprintf( stderr, "%d ", L->entries[pj].j );
-        }
-        fprintf( stderr, "\n" );
+        Hptr->entries[Hptr->start[system->N + 1] - 1].val = 1.0;
+        Hptr->entries[Hptr->start[system->N_cm] - 1].val = 1.0;
     }
+    
+    switch ( control->cm_solver_pre_comp_type )
+    {
+        case NONE_PC:
+            break;
+
+        case DIAG_PC:
+            data->timing.cm_solver_pre_comp +=
+                diag_pre_comp( system, workspace->Hdia_inv );
+//                diag_pre_comp( Hptr, workspace->Hdia_inv );
+            break;
+
+        case ICHOLT_PC:
+        case ILU_PAR_PC:
+        case ILUT_PAR_PC:
+        case ILU_SUPERLU_MT_PC:
+            fprintf( stderr, "[ERROR] Unsupported preconditioner computation method. Terminating...\n" );
+            exit( INVALID_INPUT );
+            break;
+
+        case SAI_PC:
+#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
+            //TODO: implement
+//            data->timing.cm_solver_pre_comp +=
+//                sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full,
+//                        &workspace->H_app_inv );
+#else
+            fprintf( stderr, "[ERROR] LAPACKE support disabled. Re-compile before enabling. Terminating...\n" );
+            exit( INVALID_INPUT );
 #endif
+            break;
 
-    sfree( Ltop, "Ltop" );
+        default:
+            fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" );
+            exit( UNKNOWN_OPTION );
+            break;
+    }
+
+    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
+            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
+            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
+    {
+        Hptr->entries[Hptr->start[system->N + 1] - 1].val = 0.0;
+        Hptr->entries[Hptr->start[system->N_cm] - 1].val = 0.0;
+    }
 }
 
 
-void Init_MatVec( reax_system *system, simulation_data *data,
-        control_params *control, storage *workspace, mpi_datatypes *mpi_data )
+static void Setup_Preconditioner_QEq( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
 {
-    int i; //, fillin;
-    reax_atom *atom;
+    real time;
+    sparse_matrix *Hptr;
 
-//    if( (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ||
-//            workspace->L == NULL )
-//    {
-////        Print_Linear_System( system, control, workspace, data->step );
-//        Sort_Matrix_Rows( workspace->H );
-//        fprintf( stderr, "H matrix sorted\n" );
-//
-//        Calculate_Droptol( workspace->H, workspace->droptol, control->droptol );
-//        fprintf( stderr, "drop tolerances calculated\n" );
-//
-//        if( workspace->L == NULL )
-//        {
-//            fillin = Estimate_LU_Fill( workspace->H, workspace->droptol );
-//
-//            if( Allocate_Matrix( &(workspace->L), workspace->H->cap, fillin ) == 0 ||
-//                    Allocate_Matrix( &(workspace->U), workspace->H->cap, fillin ) == 0 )
-//            {
-//                fprintf( stderr, "not enough memory for LU matrices. terminating.\n" );
-//                MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY );
-//            }
-//
-//            workspace->L->n = workspace->H->n;
-//            workspace->U->n = workspace->H->n;
-//
-//#if defined(DEBUG_FOCUS)
-//            fprintf( stderr, "p%d: n=%d, fillin = %d\n",x
-//                    system->my_rank, workspace->L->n, fillin );
-//            fprintf( stderr, "p%d: allocated memory: L = U = %ldMB\n",
-//                    system->my_rank,fillin*sizeof(sparse_matrix_entry)/(1024*1024) );
-//#endif
-//        }
-//
-//      ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U );
-//#if defined(DEBUG_FOCUS)
-//    fprintf( stderr, "p%d: icholt finished\n", system->my_rank );
-////    sprintf( fname, "%s.L%d.out", control->sim_name, data->step );
-////    Print_Sparse_Matrix2( workspace->L, fname );
-////    Print_Sparse_Matrix( U );
-//#endif
-//    }
+    if ( control->cm_domain_sparsify_enabled == TRUE )
+    {
+        Hptr = &workspace->H_sp;
+    }
+    else
+    {
+        Hptr = &workspace->H;
+    }
 
-    for ( i = 0; i < system->n; ++i )
+    /* sort H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */
+    time = Get_Time( );
+    Sort_Matrix_Rows( &workspace->H );
+    if ( control->cm_domain_sparsify_enabled == TRUE )
     {
-        atom = &( system->my_atoms[i] );
+        Sort_Matrix_Rows( &workspace->H_sp );
+    }
+    data->timing.cm_sort_mat_rows += Get_Timing_Info( time );
 
-        /* initialize diagonal inverse preconditioner vectors */
-        workspace->Hdia_inv[i] = 1. / system->reax_param.sbp[ atom->type ].eta;
+    switch ( control->cm_solver_pre_comp_type )
+    {
+        case NONE_PC:
+            break;
 
-        /* linear extrapolation for s and for t */
-        // newQEq: no extrapolation!
-        //workspace->s[i] = 2 * atom->s[0] - atom->s[1]; //0;
-        //workspace->t[i] = 2 * atom->t[0] - atom->t[1]; //0;
-        //workspace->x[i][0] = 2 * atom->s[0] - atom->s[1]; //0;
-        //workspace->x[i][1] = 2 * atom->t[0] - atom->t[1]; //0;
+        case DIAG_PC:
+            if ( workspace->Hdia_inv == NULL )
+            {
+//                workspace->Hdia_inv = scalloc( Hptr->n, sizeof( real ),
+//                        "Setup_Preconditioner_QEq::workspace->Hdiv_inv" );
+            }
+            break;
 
-        /* quadratic extrapolation for s and t */
-        // workspace->s[i] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] );
-        // workspace->t[i] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] );
-        //workspace->x[i][0] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] );
-        workspace->x[i][1] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] );
+        case ICHOLT_PC:
+        case ILU_PAR_PC:
+        case ILUT_PAR_PC:
+        case ILU_SUPERLU_MT_PC:
+            fprintf( stderr, "[ERROR] Unsupported preconditioner computation method. Terminating...\n" );
+            exit( INVALID_INPUT );
+            break;
 
-        /* cubic extrapolation for s and t */
-        workspace->x[i][0] = 4 * (atom->s[0] + atom->s[2]) - (6 * atom->s[1] + atom->s[3]);
-        //workspace->x[i][1] = 4*(atom->t[0]+atom->t[2])-(6*atom->t[1]+atom->t[3]);
+        case SAI_PC:
+            //TODO: implement
+//            setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt,
+//                    &workspace->H_spar_patt_full, &workspace->H_app_inv,
+//                    control->cm_solver_pre_comp_sai_thres );
+            break;
 
-//        fprintf(stderr, "i=%d s=%f t=%f\n", i, workspace->s[i], workspace->t[i]);
+        default:
+            fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" );
+            exit( UNKNOWN_OPTION );
+            break;
     }
+}
 
-    /* initialize solution vectors for linear solves in charge method */
-    switch ( control->charge_method )
+
+/* Setup routines before computing the preconditioner for EE
+ */
+static void Setup_Preconditioner_EE( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
+{
+    real time;
+    sparse_matrix *Hptr;
+
+    if ( control->cm_domain_sparsify_enabled == TRUE )
     {
-        case QEQ_CM:
-            for ( i = 0; i < system->n; ++i )
-            {
-                atom = &( system->my_atoms[i] );
+        Hptr = &workspace->H_sp;
+    }
+    else
+    {
+        Hptr = &workspace->H;
+    }
 
-                workspace->b_s[i] = -system->reax_param.sbp[ atom->type ].chi;
-                workspace->b_t[i] = -1.0;
-                workspace->b[i][0] = -system->reax_param.sbp[ atom->type ].chi;
-                workspace->b[i][1] = -1.0;
-            }
+    /* sorted H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */
+    time = Get_Time( );
+    Sort_Matrix_Rows( &workspace->H );
+    if ( control->cm_domain_sparsify_enabled == TRUE )
+    {
+        Sort_Matrix_Rows( &workspace->H_sp );
+    }
+    data->timing.cm_sort_mat_rows += Get_Timing_Info( time );
+
+    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
+            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
+            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
+    {
+        Hptr->entries[Hptr->start[system->N + 1] - 1].val = 1.0;
+    }
+
+    switch ( control->cm_solver_pre_comp_type )
+    {
+        case NONE_PC:
             break;
 
-        case EE_CM:
-            for ( i = 0; i < system->n; ++i )
+        case DIAG_PC:
+            if ( workspace->Hdia_inv == NULL )
             {
-                atom = &( system->my_atoms[i] );
+//                workspace->Hdia_inv = scalloc( system->N_cm, sizeof( real ),
+//                        "Setup_Preconditioner_QEq::workspace->Hdiv_inv" );
+            }
+            break;
 
-                workspace->b_s[i] = -system->reax_param.sbp[ atom->type ].chi;
+        case ICHOLT_PC:
+        case ILU_PAR_PC:
+        case ILUT_PAR_PC:
+        case ILU_SUPERLU_MT_PC:
+            fprintf( stderr, "[ERROR] Unsupported preconditioner computation method. Terminating...\n" );
+            exit( INVALID_INPUT );
+            break;
 
-                //TODO: check if unused (redundant)
-                workspace->b[i][0] = -system->reax_param.sbp[ atom->type ].chi;
-            }
+        case SAI_PC:
+            //TODO: implement
+//            setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt,
+//                    &workspace->H_spar_patt_full, &workspace->H_app_inv,
+//                    control->cm_solver_pre_comp_sai_thres );
+            break;
 
-            if ( system->my_rank == 0 )
-            {
-                workspace->b_s[system->n] = control->cm_q_net;
-                workspace->b[system->n][0] = control->cm_q_net;
-            }
+        default:
+            fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" );
+            exit( UNKNOWN_OPTION );
             break;
+    }
 
-        case ACKS2_CM:
-            for ( i = 0; i < system->n; ++i )
-            {
-                atom = &( system->my_atoms[i] );
+    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
+            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
+            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
+    {
+        Hptr->entries[Hptr->start[system->N + 1] - 1].val = 0.0;
+    }
+}
 
-                workspace->b_s[i] = -system->reax_param.sbp[ atom->type ].chi;
 
-                //TODO: check if unused (redundant)
-                workspace->b[i][0] = -system->reax_param.sbp[ atom->type ].chi;
-            }
+/* Setup routines before computing the preconditioner for ACKS2
+ */
+static void Setup_Preconditioner_ACKS2( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
+{
+    real time;
+    sparse_matrix *Hptr;
 
-            if ( system->my_rank == 0 )
-            {
-                workspace->b_s[system->n] = control->cm_q_net;
-                workspace->b[system->n][0] = control->cm_q_net;
-            }
+    if ( control->cm_domain_sparsify_enabled == TRUE )
+    {
+        Hptr = &workspace->H_sp;
+    }
+    else
+    {
+        Hptr = &workspace->H;
+    }
 
-            for ( i = system->n + 1; i < system->N_cm; ++i )
-            {
-                atom = &( system->my_atoms[i] );
+    /* sort H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */
+    time = Get_Time( );
+    Sort_Matrix_Rows( &workspace->H );
+    if ( control->cm_domain_sparsify_enabled == TRUE )
+    {
+        Sort_Matrix_Rows( &workspace->H_sp );
+    }
+    data->timing.cm_sort_mat_rows += Get_Timing_Info( time );
 
-                workspace->b_s[i] = 0.0;
+    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
+            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
+            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
+    {
+        Hptr->entries[Hptr->start[system->N + 1] - 1].val = 1.0;
+        Hptr->entries[Hptr->start[system->N_cm] - 1].val = 1.0;
+    }
 
-                //TODO: check if unused (redundant)
-                workspace->b[i][0] = 0.0;
-            }
+    switch ( control->cm_solver_pre_comp_type )
+    {
+        case NONE_PC:
+            break;
 
-            if ( system->my_rank == 0 )
+        case DIAG_PC:
+            if ( workspace->Hdia_inv == NULL )
             {
-                workspace->b_s[system->n] = control->cm_q_net;
-                workspace->b[system->n][0] = control->cm_q_net;
+//                workspace->Hdia_inv = scalloc( Hptr->n, sizeof( real ),
+//                        "Setup_Preconditioner_QEq::workspace->Hdiv_inv" );
             }
             break;
 
-        default:
-            fprintf( stderr, "Unknown charge method type. Terminating...\n" );
+        case ICHOLT_PC:
+        case ILU_PAR_PC:
+        case ILUT_PAR_PC:
+        case ILU_SUPERLU_MT_PC:
+            fprintf( stderr, "[ERROR] Unsupported preconditioner computation method. Terminating...\n" );
             exit( INVALID_INPUT );
             break;
+
+        case SAI_PC:
+            //TODO: implement
+//            setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt,
+//                    &workspace->H_spar_patt_full, &workspace->H_app_inv,
+//                    control->cm_solver_pre_comp_sai_thres );
+            break;
+
+        default:
+            fprintf( stderr, "[ERROR] Unrecognized preconditioner computation method. Terminating...\n" );
+            exit( UNKNOWN_OPTION );
+            break;
+    }
+
+    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
+            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
+            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
+    {
+        Hptr->entries[Hptr->start[system->N + 1] - 1].val = 0.0;
+        Hptr->entries[Hptr->start[system->N_cm] - 1].val = 0.0;
     }
 }
 
 
-void Calculate_Charges( reax_system *system, storage *workspace,
-        mpi_datatypes *mpi_data )
+/* Combine ficticious charges s and t to get atomic charge q for QEq method
+ */
+static void Calculate_Charges_QEq( const reax_system * const system,
+        storage * const workspace, const mpi_datatypes * const mpi_data )
 {
     int i;
-    real u;//, s_sum, t_sum;
+    real u;
     rvec2 my_sum, all_sum;
-    reax_atom *atom;
     real *q;
 
-    q = (real*) smalloc( system->N * sizeof(real), "Calculate_Charges::q" );
+    q = smalloc( sizeof(real) * system->N, "Calculate_Charges_QEq::q" );
 
-    //s_sum = Parallel_Vector_Acc(workspace->s, system->n, mpi_data->world);
-    //t_sum = Parallel_Vector_Acc(workspace->t, system->n, mpi_data->world);
-    my_sum[0] = my_sum[1] = 0;
+    my_sum[0] = 0.0;
+    my_sum[1] = 0.0;
     for ( i = 0; i < system->n; ++i )
     {
         my_sum[0] += workspace->x[i][0];
         my_sum[1] += workspace->x[i][1];
     }
 
-#if defined(DEBUG)
-    fprintf( stderr, "Host : my_sum[0]: %f and %f \n", my_sum[0], my_sum[1] );
-#endif
-
     MPI_Allreduce( &my_sum, &all_sum, 2, MPI_DOUBLE, MPI_SUM, mpi_data->world );
 
     u = all_sum[0] / all_sum[1];
-
-#if defined(DEBUG)
-    fprintf( stderr, "Host : u: %f \n", u );
-#endif
-
     for ( i = 0; i < system->n; ++i )
     {
-        atom = &( system->my_atoms[i] );
-
-        /* compute charge based on s & t */
-        //atom->q = workspace->s[i] - u * workspace->t[i];
-        q[i] = atom->q = workspace->x[i][0] - u * workspace->x[i][1];
-
-        /* backup s & t */
-        atom->s[3] = atom->s[2];
-        atom->s[2] = atom->s[1];
-        atom->s[1] = atom->s[0];
-        //atom->s[0] = workspace->s[i];
-        atom->s[0] = workspace->x[i][0];
-
-        atom->t[3] = atom->t[2];
-        atom->t[2] = atom->t[1];
-        atom->t[1] = atom->t[0];
-        //atom->t[0] = workspace->t[i];
-        atom->t[0] = workspace->x[i][1];
+        q[i] = workspace->x[i][0] - u * workspace->x[i][1];
+        system->my_atoms[i].q = q[i];
     }
 
     Dist( system, mpi_data, q, REAL_PTR_TYPE, MPI_DOUBLE, real_packer );
+
     for ( i = system->n; i < system->N; ++i )
     {
         system->my_atoms[i].q = q[i];
     }
 
-    sfree( q, "Calculate_Charges::q" );
+    sfree( q, "Calculate_Charges_QEq::q" );
 }
 
 
-void QEq( reax_system *system, control_params *control, simulation_data *data,
-        storage *workspace, output_controls *out_control,
-        mpi_datatypes *mpi_data )
+/* Get atomic charge q for EE method
+ */
+static void Calculate_Charges_EE( const reax_system * const system,
+        storage * const workspace, const mpi_datatypes * const mpi_data )
 {
-    int s_matvecs, t_matvecs;
-
-    Init_MatVec( system, data, control, workspace, mpi_data );
-
-    //if( data->step == 50010 ) {
-    //  Print_Linear_System( system, control, workspace, data->step );
-    // }
+    int i;
 
-#if defined(DEBUG)
-    fprintf( stderr, "p%d: initialized qEq\n", system->my_rank );
-    //Print_Linear_System( system, control, workspace, data->step );
-#endif
+    for ( i = 0; i < system->N; ++i )
+    {
+        system->my_atoms[i].q = workspace->s[i];
+    }
+}
 
-    //MATRIX CHANGES
-    s_matvecs = dual_CG( system, workspace, &workspace->H, workspace->b,
-            control->cm_solver_q_err, workspace->x, mpi_data, out_control->log, data );
-    t_matvecs = 0;
-    //fprintf (stderr, "Host: First CG complated with iterations: %d \n", s_matvecs);
 
-    //s_matvecs = CG(system, workspace, workspace->H, workspace->b_s, //newQEq sCG
-    // control->cm_solver_q_err, workspace->s, mpi_data, out_control->log );
-    //s_matvecs = PCG( system, workspace, workspace->H, workspace->b_s,
-    //   control->cm_solver_q_err, workspace->L, workspace->U, workspace->s,
-    //   mpi_data, out_control->log );
+/* Main driver method for QEq kernel
+ *  1) init / setup routines for preconditioning of linear solver
+ *  2) compute preconditioner
+ *  3) extrapolate charges
+ *  4) perform 2 linear solves
+ *  5) compute atomic charges based on output of (4)
+ */
+static void QEq( reax_system * const system, control_params * const control,
+        simulation_data * const data, storage * const workspace,
+        const output_controls * const out_control,
+        const mpi_datatypes * const mpi_data )
+{
+    int iters;
 
-#if defined(DEBUG)
-    fprintf( stderr, "p%d: first CG completed\n", system->my_rank );
-#endif
+    Init_Linear_Solver( system, data, control, workspace, mpi_data );
 
-    //t_matvecs = CG(system, workspace, workspace->H, workspace->b_t, //newQEq sCG
-    // control->cm_solver_q_err, workspace->t, mpi_data, out_control->log );
-    //t_matvecs = PCG( system, workspace, workspace->H, workspace->b_t,
-    //   control->cm_solver_q_err, workspace->L, workspace->U, workspace->t,
-    //   mpi_data, out_control->log );
+    if ( control->cm_solver_pre_comp_refactor > 0 &&
+            ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) )
+        
+    {
+        Setup_Preconditioner_QEq( system, control, data, workspace );
 
-#if defined(DEBUG)
-    fprintf( stderr, "p%d: second CG completed\n", system->my_rank );
-#endif
+        Compute_Preconditioner_QEq( system, control, data, workspace );
+    }
 
-    Calculate_Charges( system, workspace, mpi_data );
+    Extrapolate_Charges_QEq( system, control, data, workspace );
 
-#if defined(DEBUG)
-    fprintf( stderr, "p%d: computed charges\n", system->my_rank );
-    //Print_Charges( system );
-#endif
+    switch ( control->cm_solver_type )
+    {
+    case CG_S:
+        iters = dual_CG( system, workspace, &workspace->H, workspace->b,
+                control->cm_solver_q_err, workspace->x, mpi_data, out_control->log, data );
+
+//        iters = CG( system, workspace, workspace->H, workspace->b_s, //newQEq sCG
+//                control->cm_solver_q_err, workspace->s, mpi_data, out_control->log );
+//        iters += PCG( system, workspace, workspace->H, workspace->b_s,
+//                control->cm_solver_q_err, workspace->L, workspace->U, workspace->s,
+//                mpi_data, out_control->log );
+        break;
+
+    case GMRES_S:
+    case GMRES_H_S:
+    case SDM_S:
+    case BiCGStab_S:
+    default:
+        fprintf( stderr, "[ERROR] Unrecognized QEq solver selection. Terminating...\n" );
+        exit( INVALID_INPUT );
+        break;
+    }
 
 #if defined(LOG_PERFORMANCE)
     if ( system->my_rank == MASTER_NODE )
     {
-        data->timing.s_matvecs += s_matvecs;
-        data->timing.t_matvecs += t_matvecs;
+        data->timing.cm_solver_iters += iters;
     }
 #endif
+
+    Calculate_Charges_QEq( system, workspace, mpi_data );
+}
+
+
+/* Main driver method for EE kernel
+ *  1) init / setup routines for preconditioning of linear solver
+ *  2) compute preconditioner
+ *  3) extrapolate charges
+ *  4) perform 1 linear solve
+ *  5) compute atomic charges based on output of (4)
+ */
+static void EE( reax_system * const system, control_params * const control,
+        simulation_data * const data, storage * const workspace,
+        const output_controls * const out_control,
+        const mpi_datatypes * const mpi_data )
+{
+    int iters;
+
+    Init_Linear_Solver( system, data, control, workspace, mpi_data );
+
+    if ( control->cm_solver_pre_comp_refactor > 0 &&
+            ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) )
+    {
+        Setup_Preconditioner_EE( system, control, data, workspace );
+
+        Compute_Preconditioner_EE( system, control, data, workspace );
+    }
+
+    Extrapolate_Charges_EE( system, control, data, workspace );
+
+    switch ( control->cm_solver_type )
+    {
+    case GMRES_S:
+    case GMRES_H_S:
+    case CG_S:
+    case SDM_S:
+    case BiCGStab_S:
+    default:
+        fprintf( stderr, "[ERROR] Unrecognized EE solver selection. Terminating...\n" );
+        exit( INVALID_INPUT );
+        break;
+    }
+
+    data->timing.cm_solver_iters += iters;
+
+    Calculate_Charges_EE( system, workspace, mpi_data );
+}
+
+
+/* Main driver method for ACKS2 kernel
+ *  1) init / setup routines for preconditioning of linear solver
+ *  2) compute preconditioner
+ *  3) extrapolate charges
+ *  4) perform 1 linear solve
+ *  5) compute atomic charges based on output of (4)
+ */
+static void ACKS2( reax_system * const system, control_params * const control,
+        simulation_data * const data, storage * const workspace,
+        const output_controls * const out_control,
+        const mpi_datatypes * const mpi_data )
+{
+    int iters;
+
+    Init_Linear_Solver( system, data, control, workspace, mpi_data );
+
+    if ( control->cm_solver_pre_comp_refactor > 0 &&
+            ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) )
+    {
+        Setup_Preconditioner_ACKS2( system, control, data, workspace );
+
+        Compute_Preconditioner_ACKS2( system, control, data, workspace );
+    }
+
+    Extrapolate_Charges_EE( system, control, data, workspace );
+
+    switch ( control->cm_solver_type )
+    {
+    case GMRES_S:
+    case GMRES_H_S:
+    case CG_S:
+    case SDM_S:
+    case BiCGStab_S:
+    default:
+        fprintf( stderr, "[ERROR] Unrecognized ACKS2 solver selection. Terminating...\n" );
+        exit( INVALID_INPUT );
+        break;
+    }
+
+    data->timing.cm_solver_iters += iters;
+
+    Calculate_Charges_EE( system, workspace, mpi_data );
+}
+
+
+void Compute_Charges( reax_system * const system, control_params * const control,
+        simulation_data * const data, storage * const workspace,
+        const output_controls * const out_control,
+        const mpi_datatypes * const mpi_data )
+{
+    switch ( control->charge_method )
+    {
+    case QEQ_CM:
+        QEq( system, control, data, workspace, out_control, mpi_data );
+        break;
+
+    case EE_CM:
+        EE( system, control, data, workspace, out_control, mpi_data );
+        break;
+
+    case ACKS2_CM:
+        ACKS2( system, control, data, workspace, out_control, mpi_data );
+        break;
+
+    default:
+        fprintf( stderr, "[ERROR] Invalid charge method. Terminating...\n" );
+        exit( UNKNOWN_OPTION );
+        break;
+    }
 }
diff --git a/PG-PuReMD/src/charges.h b/PG-PuReMD/src/charges.h
index 08af5641..83ffe17d 100644
--- a/PG-PuReMD/src/charges.h
+++ b/PG-PuReMD/src/charges.h
@@ -29,8 +29,9 @@
 extern "C" {
 #endif
 
-void QEq( reax_system*, control_params*, simulation_data*,
-        storage*, output_controls*, mpi_datatypes* );
+void Compute_Charges( reax_system * const, control_params * const,
+        simulation_data * const, storage * const,
+        const output_controls * const, const mpi_datatypes * const );
 
 #ifdef __cplusplus
 }
diff --git a/PG-PuReMD/src/control.c b/PG-PuReMD/src/control.c
index 863032b5..de09e775 100644
--- a/PG-PuReMD/src/control.c
+++ b/PG-PuReMD/src/control.c
@@ -85,6 +85,8 @@ void Read_Control_File( char *control_file, control_params* control,
     control->cm_solver_restart = 50;
     control->cm_solver_q_err = 0.000001;
     control->cm_domain_sparsify_enabled = FALSE;
+    control->cm_init_guess_extrap1 = 3;
+    control->cm_init_guess_extrap2 = 2;
     control->cm_domain_sparsity = 1.0;
     control->cm_solver_pre_comp_type = DIAG_PC;
     control->cm_solver_pre_comp_sweeps = 3;
@@ -309,6 +311,16 @@ void Read_Control_File( char *control_file, control_params* control,
                     control->cm_domain_sparsify_enabled = TRUE;
                 }
             }
+            else if ( strncmp(tmp[0], "cm_init_guess_extrap1", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_init_guess_extrap1 = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_init_guess_extrap2", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_init_guess_extrap2 = ival;
+            }
             else if ( strcmp(tmp[0], "cm_solver_pre_comp_type") == 0 )
             {
                 ival = atoi( tmp[1] );
@@ -329,6 +341,11 @@ void Read_Control_File( char *control_file, control_params* control,
                 ival = atoi( tmp[1] );
                 control->cm_solver_pre_comp_sweeps = ival;
             }
+            else if ( strncmp(tmp[0], "cm_solver_pre_comp_sai_thres", MAX_LINE) == 0 )
+            {
+                val = atof( tmp[1] );
+                control->cm_solver_pre_comp_sai_thres = val;
+            }
             else if ( strcmp(tmp[0], "cm_solver_pre_app_type") == 0 )
             {
                 ival = atoi( tmp[1] );
diff --git a/PG-PuReMD/src/cuda/cuda_charges.cu b/PG-PuReMD/src/cuda/cuda_charges.cu
index c22058e9..bf67d734 100644
--- a/PG-PuReMD/src/cuda/cuda_charges.cu
+++ b/PG-PuReMD/src/cuda/cuda_charges.cu
@@ -250,7 +250,7 @@ void Cuda_QEq( reax_system *system, control_params *control, simulation_data
         *data, storage *workspace, output_controls *out_control, mpi_datatypes
         *mpi_data )
 {
-    int s_matvecs, t_matvecs;
+    int iters;
 
     Cuda_Init_MatVec( system, workspace );
 
@@ -271,10 +271,9 @@ void Cuda_QEq( reax_system *system, control_params *control, simulation_data
         break;
 
     case CG_S:
-        s_matvecs = Cuda_dual_CG( system, control, workspace, &dev_workspace->H,
+        iters = Cuda_dual_CG( system, control, workspace, &dev_workspace->H,
                 dev_workspace->b, control->cm_solver_q_err, dev_workspace->x, mpi_data,
                 out_control->log, data );
-        t_matvecs = 0;
         break;
 
 
@@ -289,8 +288,7 @@ void Cuda_QEq( reax_system *system, control_params *control, simulation_data
 #if defined(LOG_PERFORMANCE)
     if ( system->my_rank == MASTER_NODE )
     {
-        data->timing.s_matvecs += s_matvecs;
-        data->timing.t_matvecs += t_matvecs;
+        data->timing.cm_solver_iters += iters;
     }
 #endif
 }
@@ -300,7 +298,7 @@ void Cuda_EE( reax_system *system, control_params *control, simulation_data
         *data, storage *workspace, output_controls *out_control, mpi_datatypes
         *mpi_data )
 {
-    int s_matvecs, t_matvecs;
+    int iters;
 
     Cuda_Init_MatVec( system, workspace );
 
@@ -314,9 +312,8 @@ void Cuda_EE( reax_system *system, control_params *control, simulation_data
         break;
 
     case CG_S:
-        s_matvecs = Cuda_CG( system, control, workspace, &dev_workspace->H,
+        iters = Cuda_CG( system, control, workspace, &dev_workspace->H,
                 dev_workspace->b_s, control->cm_solver_q_err, dev_workspace->s, mpi_data );
-        t_matvecs = 0;
         break;
 
 
@@ -331,8 +328,7 @@ void Cuda_EE( reax_system *system, control_params *control, simulation_data
 #if defined(LOG_PERFORMANCE)
     if ( system->my_rank == MASTER_NODE )
     {
-        data->timing.s_matvecs += s_matvecs;
-        data->timing.t_matvecs += t_matvecs;
+        data->timing.cm_solver_iters += iters;
     }
 #endif
 }
@@ -342,7 +338,7 @@ void Cuda_ACKS2( reax_system *system, control_params *control, simulation_data
         *data, storage *workspace, output_controls *out_control, mpi_datatypes
         *mpi_data )
 {
-    int s_matvecs, t_matvecs;
+    int iters;
 
     Cuda_Init_MatVec( system, workspace );
 
@@ -356,14 +352,13 @@ void Cuda_ACKS2( reax_system *system, control_params *control, simulation_data
         break;
 
     case CG_S:
-        s_matvecs = Cuda_CG( system, control, workspace, &dev_workspace->H,
+        iters = Cuda_CG( system, control, workspace, &dev_workspace->H,
                 dev_workspace->b_s, control->cm_solver_q_err, dev_workspace->s, mpi_data );
-        t_matvecs = 0;
         break;
 
 
     default:
-        fprintf( stderr, "Unrecognized QEq solver selection. Terminating...\n" );
+        fprintf( stderr, "[ERROR] Unrecognized QEq solver selection. Terminating...\n" );
         exit( INVALID_INPUT );
         break;
     }
@@ -373,8 +368,7 @@ void Cuda_ACKS2( reax_system *system, control_params *control, simulation_data
 #if defined(LOG_PERFORMANCE)
     if ( system->my_rank == MASTER_NODE )
     {
-        data->timing.s_matvecs += s_matvecs;
-        data->timing.t_matvecs += t_matvecs;
+        data->timing.cm_solver_iters += iters;
     }
 #endif
 }
diff --git a/PG-PuReMD/src/forces.c b/PG-PuReMD/src/forces.c
index f8b6e7f3..491f888e 100644
--- a/PG-PuReMD/src/forces.c
+++ b/PG-PuReMD/src/forces.c
@@ -966,7 +966,7 @@ int Compute_Forces( reax_system *system, control_params *control,
 #if defined(PURE_REAX)
         if ( charge_flag == TRUE )
         {
-            QEq( system, control, data, workspace, out_control, mpi_data );
+            Compute_Charges( system, control, data, workspace, out_control, mpi_data );
         }
 
 #if defined(LOG_PERFORMANCE)
diff --git a/PG-PuReMD/src/io_tools.c b/PG-PuReMD/src/io_tools.c
index e0e763af..7f704aee 100644
--- a/PG-PuReMD/src/io_tools.c
+++ b/PG-PuReMD/src/io_tools.c
@@ -1343,7 +1343,7 @@ void Output_Results( reax_system *system, control_params *control,
                     data->timing.nbrs * denom, data->timing.init_forces * denom,
                     data->timing.bonded * denom, data->timing.nonb * denom,
                     data->timing.cm * denom,
-                    (int)((data->timing.s_matvecs + data->timing.t_matvecs) * denom),
+                    (int)(data->timing.cm_solver_iters * denom),
                     data->timing.num_retries );
 
             Reset_Timing( &(data->timing) );
diff --git a/PG-PuReMD/src/lin_alg.c b/PG-PuReMD/src/lin_alg.c
index f16d9e23..7686e2d3 100644
--- a/PG-PuReMD/src/lin_alg.c
+++ b/PG-PuReMD/src/lin_alg.c
@@ -73,6 +73,31 @@ void dual_Sparse_MatVec( sparse_matrix *A, rvec2 *x, rvec2 *b, int N )
 }
 
 
+/* Diagonal (Jacobi) preconditioner computation */
+real diag_pre_comp( const reax_system * const system, real * const Hdia_inv )
+{
+    unsigned int i;
+    real start;
+
+    start = Get_Time( );
+
+    for ( i = 0; i < system->n; ++i )
+    {
+//        if ( H->entries[H->start[i + 1] - 1].val != 0.0 )
+//        {
+//            Hdia_inv[i] = 1.0 / H->entries[H->start[i + 1] - 1].val;
+            Hdia_inv[i] = 1.0 / system->reax_param.sbp[ system->my_atoms[i].type ].eta;
+//        }
+//        else
+//        {
+//            Hdia_inv[i] = 1.0;
+//        }
+    }
+
+    return Get_Timing_Info( start );
+}
+
+
 int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, rvec2
         *b, real tol, rvec2 *x, mpi_datatypes* mpi_data, FILE *fout,
         simulation_data *data )
diff --git a/PG-PuReMD/src/lin_alg.h b/PG-PuReMD/src/lin_alg.h
index 4b7ba2f0..1047686d 100644
--- a/PG-PuReMD/src/lin_alg.h
+++ b/PG-PuReMD/src/lin_alg.h
@@ -29,6 +29,9 @@
 extern "C" {
 #endif
 
+//real diag_pre_comp( const sparse_matrix * const, real * const );
+real diag_pre_comp( const reax_system * const, real * const );
+
 int GMRES( reax_system*, storage*, sparse_matrix*,
         real*, real, real*, mpi_datatypes*, FILE* );
 
diff --git a/PG-PuReMD/src/reax_types.h b/PG-PuReMD/src/reax_types.h
index e0ffa7bb..f984eb39 100644
--- a/PG-PuReMD/src/reax_types.h
+++ b/PG-PuReMD/src/reax_types.h
@@ -315,7 +315,6 @@
 #endif
 
 
-/******************* ENUMERATIONS *************************/
 /* ensemble type */
 enum ensemble
 {
@@ -331,11 +330,11 @@ enum ensemble
 /* interaction list type */
 enum lists
 {
-    BONDS = 0,
-    OLD_BONDS = 1,
-    THREE_BODIES = 2,
-    HBONDS = 3,
-    FAR_NBRS = 4,
+    FAR_NBRS = 0,
+    BONDS = 1,
+    OLD_BONDS = 2,
+    THREE_BODIES = 3,
+    HBONDS = 4,
     DBOS = 5,
     DDELTAS = 6,
     LIST_N = 7,
@@ -422,6 +421,7 @@ enum solver
     GMRES_H_S = 1,
     CG_S = 2,
     SDM_S = 3,
+    BiCGStab_S = 4,
 };
 
 enum pre_comp
@@ -432,6 +432,7 @@ enum pre_comp
     ILU_PAR_PC = 3,
     ILUT_PAR_PC = 4,
     ILU_SUPERLU_MT_PC = 5,
+    SAI_PC = 6,
 };
 
 enum pre_app
@@ -480,13 +481,6 @@ enum traj_methods
     TF_N = 2,
 };
 
-/* ??? */
-enum molecules
-{
-    UNKNOWN = 0,
-    WATER = 1,
-};
-
 
 /********************** TYPE DEFINITIONS ********************/
 /* 3D vector, integer values */
@@ -1390,27 +1384,34 @@ typedef struct
     /* flag to control if force computations are tablulated */
     int tabulate;
 
-    /**/
+    /* method for computing atomic charges */
     unsigned int charge_method;
     /* frequency (in terms of simulation time steps) at which to
      * re-compute atomic charge distribution */
     int charge_freq;
-    /**/
+    /* iterative linear solver type */
     unsigned int cm_solver_type;
-    /**/
+    /* system net charge */
     real cm_q_net;
-    /**/
+    /* max. iterations for linear solver */
     unsigned int cm_solver_max_iters;
-    /**/
+    /* max. iterations before restarting in specific solvers, e.g., GMRES(k) */
     unsigned int cm_solver_restart;
     /* error tolerance of solution produced by charge distribution
      * sparse iterative linear solver */
     real cm_solver_q_err;
-    /**/
+    /* ratio used in computing sparser charge matrix,
+     * between 0.0 and 1.0 */
     real cm_domain_sparsity;
-    /**/
+    /* TRUE if enabled, FALSE otherwise */
     unsigned int cm_domain_sparsify_enabled;
-    /**/
+    /* order of spline extrapolation used for computing initial guess
+     * to linear solver */
+    unsigned int cm_init_guess_extrap1;
+    /* order of spline extrapolation used for computing initial guess
+     * to linear solver */
+    unsigned int cm_init_guess_extrap2;
+    /* preconditioner type for linear solver */
     unsigned int cm_solver_pre_comp_type;
     /* frequency (in terms of simulation time steps) at which to recompute
      * incomplete factorizations */
@@ -1418,11 +1419,17 @@ typedef struct
     /* drop tolerance of incomplete factorization schemes (ILUT, ICHOLT, etc.)
      * used for preconditioning the iterative linear solver used in charge distribution */
     real cm_solver_pre_comp_droptol;
-    /**/
+    /* num. of sweeps for computing preconditioner factors
+     * in fine-grained iterative methods (FG-ICHOL, FG-ILU) */
     unsigned int cm_solver_pre_comp_sweeps;
-    /**/
+    /* relative num. of non-zeros to charge matrix used to
+     * compute the sparse approximate inverse preconditioner,
+     * between 0.0 and 1.0 */
+    real cm_solver_pre_comp_sai_thres;
+    /* preconditioner application type */
     unsigned int cm_solver_pre_app_type;
-    /**/
+    /* num. of iterations used to apply preconditioner via
+     * Jacobi relaxation scheme (truncated Neumann series) */
     unsigned int cm_solver_pre_app_jacobi_iters;
 
     /* initial temperature of simulation, in Kelvin */
@@ -1544,12 +1551,11 @@ typedef struct
     real end;
     /* total elapsed time of event */
     real elapsed;
-
     /* total simulation time */
     real total;
     /* communication time */
     real comm;
-    /* neighbor (i.e., Verlet) list generation time */
+    /* neighbor list generation time */
     real nbrs;
     /* force initialization time */
     real init_forces;
@@ -1559,10 +1565,22 @@ typedef struct
     real nonb;
     /* atomic charge distribution calculation time */
     real cm;
-    /* num. of steps in iterative linear solver for charge distribution (QEq, first solve) */
-    int s_matvecs;
-    /* num. of steps in iterative linear solver for charge distribution (QEq, second solve) */
-    int t_matvecs;
+    /**/
+    real cm_sort_mat_rows;
+    /**/
+    real cm_solver_pre_comp;
+    /**/
+    real cm_solver_pre_app;
+    /* num. of steps in iterative linear solver for charge distribution */
+    int cm_solver_iters;
+    /**/
+    real cm_solver_spmv;
+    /**/
+    real cm_solver_vector_ops;
+    /**/
+    real cm_solver_orthog;
+    /**/
+    real cm_solver_tri_solve;
     /* num. of retries in main sim. loop */
     int num_retries;
 } reax_timing;
@@ -1948,15 +1966,31 @@ typedef struct
     /**/
     int *bond_mark;
 
-    /* charge matrix storage */
+    /* charge method storage */
     /* charge matrix */
     sparse_matrix H;
-    /* preconditioner */
+    /* charge matrix (full) */
+    sparse_matrix H_full;
+    /* sparser charge matrix */
+    sparse_matrix H_sp;
+    /* permuted charge matrix (graph coloring) */
+    sparse_matrix H_p;
+    /* sparsity pattern of charge matrix, used in
+     * computing a sparse approximate inverse preconditioner */
+    sparse_matrix H_spar_patt;
+    /* sparsity pattern of charge matrix (full), used in
+     * computing a sparse approximate inverse preconditioner */
+    sparse_matrix H_spar_patt_full;
+    /* sparse approximate inverse preconditioner */
+    sparse_matrix H_app_inv;
+    /* incomplete Cholesky or LU preconditioner */
     sparse_matrix L;
-    /* preconditioner */
+    /* incomplete Cholesky or LU preconditioner */
     sparse_matrix U;
-    /* preconditioner */
+    /* Jacobi preconditioner */
     real *Hdia_inv;
+    /* row drop tolerences for incomplete Cholesky preconditioner */
+    real *droptol;
     /**/
     real *b_s;
     /**/
@@ -1970,8 +2004,6 @@ typedef struct
     /**/
     real *t;
     /**/
-    real *droptol;
-    /**/
     rvec2 *b;
     /**/
     rvec2 *x;
diff --git a/PG-PuReMD/src/reset_tools.c b/PG-PuReMD/src/reset_tools.c
index 75863438..79386be5 100644
--- a/PG-PuReMD/src/reset_tools.c
+++ b/PG-PuReMD/src/reset_tools.c
@@ -118,8 +118,7 @@ void Reset_Timing( reax_timing *rt )
     rt->bonded = 0.0;
     rt->nonb = 0.0;
     rt->cm = 0.0;
-    rt->s_matvecs = 0;
-    rt->t_matvecs = 0;
+    rt->cm_solver_iters = 0;
     rt->num_retries = 0;
 }
 
diff --git a/PG-PuReMD/src/tool_box.c b/PG-PuReMD/src/tool_box.c
index 1093a99e..36022bcd 100644
--- a/PG-PuReMD/src/tool_box.c
+++ b/PG-PuReMD/src/tool_box.c
@@ -223,13 +223,13 @@ int Get_Atom_Type( reax_interaction *reax_param, char *s )
 
 char *Get_Element( reax_system *system, int i )
 {
-    return &( system->reax_param.sbp[system->my_atoms[i].type].name[0] );
+    return &system->reax_param.sbp[system->my_atoms[i].type].name[0];
 }
 
 
 char *Get_Atom_Name( reax_system *system, int i )
 {
-    return &(system->my_atoms[i].name[0]);
+    return &system->my_atoms[i].name[0];
 }
 
 
@@ -254,12 +254,12 @@ int Tokenize( const char* s, char*** tok )
 {
     char test[MAX_LINE];
     char *sep = "\t \n!=";
-    char *word;
+    char *word, *saveptr;
     int count = 0;
 
     strncpy( test, s, MAX_LINE );
 
-    for ( word = strtok(test, sep); word; word = strtok(NULL, sep) )
+    for ( word = strtok_r(test, sep, &saveptr); word; word = strtok_r(NULL, sep, &saveptr) )
     {
         strncpy( (*tok)[count], word, MAX_LINE );
         count++;
@@ -302,7 +302,7 @@ void * smalloc( size_t n, const char *name )
     }
 
 #if defined(DEBUG)
-    fprintf( stderr, "[INFO] granted memory at address: %p\n", (void *) ptr );
+    fprintf( stderr, "[INFO] granted memory at address: %p\n", ptr );
     fflush( stderr );
 #endif
 
@@ -335,7 +335,7 @@ void * srealloc( void *ptr, size_t n, const char *name )
                 n, name );
     }
 
-    fprintf( stderr, "[INFO] requesting memory for %s\n", name );
+    fprintf( stderr, "[INFO] requesting %zu bytes for %s\n", n, name );
     fflush( stderr );
 #endif
 
@@ -351,7 +351,7 @@ void * srealloc( void *ptr, size_t n, const char *name )
     }
 
 #if defined(DEBUG)
-    fprintf( stderr, "[INFO] address: %p [SREALLOC]\n", (void *) new_ptr );
+    fprintf( stderr, "[INFO] granted memory at address: %p\n", new_ptr );
     fflush( stderr );
 #endif
 
@@ -379,7 +379,7 @@ void * scalloc( size_t n, size_t size, const char *name )
     }
 
 #if defined(DEBUG)
-    fprintf( stderr, "[INFO] requesting memory for %s\n", name );
+    fprintf( stderr, "[INFO] requesting %zu bytes for %s\n", n * size, name );
     fflush( stderr );
 #endif
 
@@ -393,7 +393,7 @@ void * scalloc( size_t n, size_t size, const char *name )
     }
 
 #if defined(DEBUG)
-    fprintf( stderr, "[INFO] address: %p [SCALLOC]\n", (void *) ptr );
+    fprintf( stderr, "[INFO] granted memory at address: %p\n", ptr );
     fflush( stderr );
 #endif
 
diff --git a/sPuReMD/src/analyze.c b/sPuReMD/src/analyze.c
index 1d0b9c36..c2a6a6ba 100644
--- a/sPuReMD/src/analyze.c
+++ b/sPuReMD/src/analyze.c
@@ -34,14 +34,9 @@
 
 enum atoms
 {
-    C_ATOM = 0,
-    H_ATOM = 1,
-    O_ATOM = 2,
-    N_ATOM = 3,
-    S_ATOM = 4,
-    SI_ATOM = 5,
-    GE_ATOM = 6,
-    X_ATOM = 7,
+    O_ATOM = 0,
+    N_ATOM = 1,
+    SI_ATOM = 2,
 };
 
 enum molecule_type
diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c
index 4ff5a342..875ce97e 100644
--- a/sPuReMD/src/charges.c
+++ b/sPuReMD/src/charges.c
@@ -55,24 +55,24 @@ static void Extrapolate_Charges_QEq( const reax_system * const system,
         /* linear */
         else if ( control->cm_init_guess_extrap1 == 1 )
         {
-            s_tmp = 2 * workspace->s[0][i] - workspace->s[1][i];
+            s_tmp = 2.0 * workspace->s[0][i] - workspace->s[1][i];
         }
         /* quadratic */
         else if ( control->cm_init_guess_extrap1 == 2 )
         {
-            s_tmp = workspace->s[2][i] + 3 * (workspace->s[0][i]-workspace->s[1][i]);
+            s_tmp = workspace->s[2][i] + 3.0 * (workspace->s[0][i]-workspace->s[1][i]);
         }
         /* cubic */
         else if ( control->cm_init_guess_extrap1 == 3 )
         {
-            s_tmp = 4 * (workspace->s[0][i] + workspace->s[2][i]) -
-                    (6 * workspace->s[1][i] + workspace->s[3][i] );
+            s_tmp = 4.0 * (workspace->s[0][i] + workspace->s[2][i]) -
+                    (6.0 * workspace->s[1][i] + workspace->s[3][i]);
         }
         /* 4th order */
         else if ( control->cm_init_guess_extrap1 == 4 )
         {
-            s_tmp = 5 * (workspace->s[0][i] - workspace->s[3][i]) +
-                10 * (-workspace->s[1][i] + workspace->s[2][i] ) + workspace->s[4][i];
+            s_tmp = 5.0 * (workspace->s[0][i] - workspace->s[3][i]) +
+                10.0 * (-1.0 * workspace->s[1][i] + workspace->s[2][i]) + workspace->s[4][i];
         }
         else
         {
@@ -92,19 +92,19 @@ static void Extrapolate_Charges_QEq( const reax_system * const system,
         /* quadratic */
         else if ( control->cm_init_guess_extrap2 == 2 )
         {
-            t_tmp = workspace->t[2][i] + 3 * (workspace->t[0][i] - workspace->t[1][i]);
+            t_tmp = workspace->t[2][i] + 3.0 * (workspace->t[0][i] - workspace->t[1][i]);
         }
         /* cubic */
         else if ( control->cm_init_guess_extrap2 == 3 )
         {
             t_tmp = 4.0 * (workspace->t[0][i] + workspace->t[2][i]) -
-                (6.0 * workspace->t[1][i] + workspace->t[3][i] );
+                (6.0 * workspace->t[1][i] + workspace->t[3][i]);
         }
         /* 4th order */
         else if ( control->cm_init_guess_extrap2 == 4 )
         {
             t_tmp = 5.0 * (workspace->t[0][i] - workspace->t[3][i]) +
-                10.0 * (-workspace->t[1][i] + workspace->t[2][i] ) + workspace->t[4][i];
+                10.0 * (-1.0 * workspace->t[1][i] + workspace->t[2][i]) + workspace->t[4][i];
         }
         else
         {
@@ -186,8 +186,7 @@ static void Extrapolate_Charges_EE( const reax_system * const system,
  */
 static void Compute_Preconditioner_QEq( const reax_system * const system,
         const control_params * const control,
-        simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs )
+        simulation_data * const data, static_storage * const workspace )
 {
     real time;
     sparse_matrix *Hptr;
@@ -292,8 +291,7 @@ static void Compute_Preconditioner_QEq( const reax_system * const system,
  */
 //static void Compute_Preconditioner_EE( const reax_system * const system,
 //        const control_params * const control,
-//        simulation_data * const data, static_storage * const workspace,
-//        const reax_list * const far_nbrs )
+//        simulation_data * const data, static_storage * const workspace )
 //{
 //    int i, top;
 //    static real * ones = NULL, * x = NULL, * y = NULL;
@@ -536,8 +534,7 @@ static void Compute_Preconditioner_QEq( const reax_system * const system,
  */
 static void Compute_Preconditioner_EE( const reax_system * const system,
         const control_params * const control,
-        simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs )
+        simulation_data * const data, static_storage * const workspace )
 {
     real time;
     sparse_matrix *Hptr;
@@ -668,8 +665,7 @@ static void Compute_Preconditioner_EE( const reax_system * const system,
  */
 static void Compute_Preconditioner_ACKS2( const reax_system * const system,
         const control_params * const control,
-        simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs )
+        simulation_data * const data, static_storage * const workspace )
 {
     real time;
     sparse_matrix *Hptr;
@@ -800,8 +796,7 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system,
 
 static void Setup_Preconditioner_QEq( const reax_system * const system,
         const control_params * const control,
-        simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs )
+        simulation_data * const data, static_storage * const workspace )
 {
     int fillin;
     real time;
@@ -934,8 +929,7 @@ static void Setup_Preconditioner_QEq( const reax_system * const system,
  */
 static void Setup_Preconditioner_EE( const reax_system * const system,
         const control_params * const control,
-        simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs )
+        simulation_data * const data, static_storage * const workspace )
 {
     int fillin;
     real time;
@@ -1086,8 +1080,7 @@ static void Setup_Preconditioner_EE( const reax_system * const system,
  */
 static void Setup_Preconditioner_ACKS2( const reax_system * const system,
         const control_params * const control,
-        simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs )
+        simulation_data * const data, static_storage * const workspace )
 {
     int fillin;
     real time;
@@ -1291,7 +1284,7 @@ static void Calculate_Charges_EE( const reax_system * const system,
  */
 static void QEq( reax_system * const system, control_params * const control,
         simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs, const output_controls * const out_control )
+        const output_controls * const out_control )
 {
     int iters;
 
@@ -1299,9 +1292,9 @@ static void QEq( reax_system * const system, control_params * const control,
             ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) )
         
     {
-        Setup_Preconditioner_QEq( system, control, data, workspace, far_nbrs );
+        Setup_Preconditioner_QEq( system, control, data, workspace );
 
-        Compute_Preconditioner_QEq( system, control, data, workspace, far_nbrs );
+        Compute_Preconditioner_QEq( system, control, data, workspace );
     }
 
     Extrapolate_Charges_QEq( system, control, data, workspace );
@@ -1382,16 +1375,16 @@ static void QEq( reax_system * const system, control_params * const control,
  */
 static void EE( reax_system * const system, control_params * const control,
         simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs, const output_controls * const out_control )
+        const output_controls * const out_control )
 {
     int iters;
 
     if ( control->cm_solver_pre_comp_refactor > 0 &&
             ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) )
     {
-        Setup_Preconditioner_EE( system, control, data, workspace, far_nbrs );
+        Setup_Preconditioner_EE( system, control, data, workspace );
 
-        Compute_Preconditioner_EE( system, control, data, workspace, far_nbrs );
+        Compute_Preconditioner_EE( system, control, data, workspace );
     }
 
     Extrapolate_Charges_EE( system, control, data, workspace );
@@ -1454,16 +1447,16 @@ static void EE( reax_system * const system, control_params * const control,
  */
 static void ACKS2( reax_system * const system, control_params * const control,
         simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs, const output_controls * const out_control )
+        const output_controls * const out_control )
 {
     int iters;
 
     if ( control->cm_solver_pre_comp_refactor > 0 &&
             ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) )
     {
-        Setup_Preconditioner_ACKS2( system, control, data, workspace, far_nbrs );
+        Setup_Preconditioner_ACKS2( system, control, data, workspace );
 
-        Compute_Preconditioner_ACKS2( system, control, data, workspace, far_nbrs );
+        Compute_Preconditioner_ACKS2( system, control, data, workspace );
     }
 
 //   Print_Linear_System( system, control, workspace, data->step );
@@ -1533,7 +1526,7 @@ static void ACKS2( reax_system * const system, control_params * const control,
 
 void Compute_Charges( reax_system * const system, control_params * const control,
         simulation_data * const data, static_storage * const workspace,
-        const reax_list * const far_nbrs, const output_controls * const out_control )
+        const output_controls * const out_control )
 {
 #if defined(DEBUG_FOCUS)
 #define SIZE (200)
@@ -1562,20 +1555,20 @@ void Compute_Charges( reax_system * const system, control_params * const control
     switch ( control->charge_method )
     {
     case QEQ_CM:
-        QEq( system, control, data, workspace, far_nbrs, out_control );
+        QEq( system, control, data, workspace, out_control );
         break;
 
     case EE_CM:
-        EE( system, control, data, workspace, far_nbrs, out_control );
+        EE( system, control, data, workspace, out_control );
         break;
 
     case ACKS2_CM:
-        ACKS2( system, control, data, workspace, far_nbrs, out_control );
+        ACKS2( system, control, data, workspace, out_control );
         break;
 
     default:
-        fprintf( stderr, "Invalid charge method. Terminating...\n" );
-        exit( INVALID_INPUT );
+        fprintf( stderr, "[ERROR] Invalid charge method. Terminating...\n" );
+        exit( UNKNOWN_OPTION );
         break;
     }
 }
diff --git a/sPuReMD/src/charges.h b/sPuReMD/src/charges.h
index 48028b0c..85ac44d3 100644
--- a/sPuReMD/src/charges.h
+++ b/sPuReMD/src/charges.h
@@ -26,8 +26,7 @@
 
 
 void Compute_Charges( reax_system* const, control_params* const, simulation_data* const,
-          static_storage* const, const reax_list* const,
-          const output_controls* const );
+          static_storage* const, const output_controls* const );
 
 
 #endif
diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c
index bd95e4e9..baacc07b 100644
--- a/sPuReMD/src/control.c
+++ b/sPuReMD/src/control.c
@@ -32,9 +32,8 @@ void Read_Control_File( FILE* fp, reax_system *system, control_params* control,
         output_controls *out_control )
 {
     char *s, **tmp;
-    int i;
+    int c, i, ival;
     real val;
-    int ival;
 
     /* assign default values */
     strncpy( control->sim_name, "default.sim", MAX_STR );
@@ -149,410 +148,413 @@ void Read_Control_File( FILE* fp, reax_system *system, control_params* control,
     /* read control parameters file */
     while ( fgets( s, MAX_LINE, fp ) )
     {
-        Tokenize( s, &tmp );
+        c = Tokenize( s, &tmp );
 
-        if ( strncmp(tmp[0], "simulation_name", MAX_LINE) == 0 )
+        if ( c > 0 )
         {
-            strncpy( control->sim_name, tmp[1], MAX_STR );
-        }
-        //else if( strncmp(tmp[0], "restart", MAX_LINE) == 0 ) {
-        //  ival = atoi(tmp[1]);
-        //  control->restart = ival;
-        //}
-        else if ( strncmp(tmp[0], "restart_format", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->restart_format = ival;
-        }
-        else if ( strncmp(tmp[0], "restart_freq", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->restart_freq = ival;
-        }
-        else if ( strncmp(tmp[0], "random_vel", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->random_vel = ival;
-        }
-        else if ( strncmp(tmp[0], "reposition_atoms", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->reposition_atoms = ival;
-        }
-        else if ( strncmp(tmp[0], "ensemble_type", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->ensemble = ival;
-        }
-        else if ( strncmp(tmp[0], "nsteps", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->nsteps = ival;
-        }
-        else if ( strncmp(tmp[0], "dt", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->dt = val * 1.e-3;  // convert dt from fs to ps!
-        }
-        else if ( strncmp(tmp[0], "periodic_boundaries", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->periodic_boundaries = ival;
-        }
-        else if ( strncmp(tmp[0], "geo_format", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->geo_format = ival;
-        }
-        else if ( strncmp(tmp[0], "restrict_bonds", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->restrict_bonds = ival;
-        }
-        else if ( strncmp(tmp[0], "tabulate_long_range", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->tabulate = ival;
-        }
-        else if ( strncmp(tmp[0], "reneighbor", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->reneighbor = ival;
-        }
-        else if ( strncmp(tmp[0], "vlist_buffer", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->vlist_cut = val;
-        }
-        else if ( strncmp(tmp[0], "nbrhood_cutoff", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->nbr_cut = val;
-        }
-        else if ( strncmp(tmp[0], "thb_cutoff", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->thb_cut = val;
-        }
-        else if ( strncmp(tmp[0], "hbond_cutoff", MAX_LINE) == 0 )
-        {
-            val = atof( tmp[1] );
-            control->hb_cut = val;
-        }
-        else if ( strncmp(tmp[0], "charge_method", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->charge_method = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_q_net", MAX_LINE) == 0 )
-        {
-            val = atof( tmp[1] );
-            control->cm_q_net = val;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_type", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_solver_type = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_max_iters", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_solver_max_iters = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_restart", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_solver_restart = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_q_err", MAX_LINE) == 0 )
-        {
-            val = atof( tmp[1] );
-            control->cm_solver_q_err = val;
-        }
-        else if ( strncmp(tmp[0], "cm_domain_sparsity", MAX_LINE) == 0 )
-        {
-            val = atof( tmp[1] );
-            control->cm_domain_sparsity = val;
-            if ( val < 1.0 )
+            if ( strncmp(tmp[0], "simulation_name", MAX_LINE) == 0 )
             {
-                control->cm_domain_sparsify_enabled = TRUE;
+                strncpy( control->sim_name, tmp[1], MAX_STR );
             }
-        }
-        else if ( strncmp(tmp[0], "cm_init_guess_extrap1", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_init_guess_extrap1 = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_init_guess_extrap2", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_init_guess_extrap2 = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_pre_comp_type", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_solver_pre_comp_type = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_pre_comp_refactor", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_solver_pre_comp_refactor = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_pre_comp_droptol", MAX_LINE) == 0 )
-        {
-            val = atof( tmp[1] );
-            control->cm_solver_pre_comp_droptol = val;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_pre_comp_sweeps", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_solver_pre_comp_sweeps = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_pre_comp_sai_thres", MAX_LINE) == 0 )
-        {
-            val = atof( tmp[1] );
-            control->cm_solver_pre_comp_sai_thres = val;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_pre_app_type", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_solver_pre_app_type = ival;
-        }
-        else if ( strncmp(tmp[0], "cm_solver_pre_app_jacobi_iters", MAX_LINE) == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->cm_solver_pre_app_jacobi_iters = ival;
-        }
-        else if ( strncmp(tmp[0], "temp_init", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->T_init = val;
-
-            if ( control->T_init < 0.001 )
+            //else if( strncmp(tmp[0], "restart", MAX_LINE) == 0 ) {
+            //  ival = atoi(tmp[1]);
+            //  control->restart = ival;
+            //}
+            else if ( strncmp(tmp[0], "restart_format", MAX_LINE) == 0 )
             {
-                control->T_init = 0.001;
+                ival = atoi(tmp[1]);
+                out_control->restart_format = ival;
             }
-        }
-        else if ( strncmp(tmp[0], "temp_final", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->T_final = val;
-
-            if ( control->T_final < 0.1 )
+            else if ( strncmp(tmp[0], "restart_freq", MAX_LINE) == 0 )
             {
-                control->T_final = 0.1;
+                ival = atoi(tmp[1]);
+                out_control->restart_freq = ival;
             }
-        }
-        else if ( strncmp(tmp[0], "t_mass", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            /* convert t_mass from fs to ps */
-            control->Tau_T = val * 1.e-3;
-        }
-        else if ( strncmp(tmp[0], "t_mode", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->T_mode = ival;
-        }
-        else if ( strncmp(tmp[0], "t_rate", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->T_rate = val;
-        }
-        else if ( strncmp(tmp[0], "t_freq", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->T_freq = val;
-        }
-        else if ( strncmp(tmp[0], "pressure", MAX_LINE) == 0 )
-        {
-            if ( control->ensemble == iNPT )
+            else if ( strncmp(tmp[0], "random_vel", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->random_vel = ival;
+            }
+            else if ( strncmp(tmp[0], "reposition_atoms", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->reposition_atoms = ival;
+            }
+            else if ( strncmp(tmp[0], "ensemble_type", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->ensemble = ival;
+            }
+            else if ( strncmp(tmp[0], "nsteps", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->nsteps = ival;
+            }
+            else if ( strncmp(tmp[0], "dt", MAX_LINE) == 0 )
             {
                 val = atof(tmp[1]);
-                control->P[0] = control->P[1] = control->P[2] = val;
+                control->dt = val * 1.e-3;  // convert dt from fs to ps!
+            }
+            else if ( strncmp(tmp[0], "periodic_boundaries", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->periodic_boundaries = ival;
+            }
+            else if ( strncmp(tmp[0], "geo_format", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->geo_format = ival;
+            }
+            else if ( strncmp(tmp[0], "restrict_bonds", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->restrict_bonds = ival;
+            }
+            else if ( strncmp(tmp[0], "tabulate_long_range", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->tabulate = ival;
+            }
+            else if ( strncmp(tmp[0], "reneighbor", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->reneighbor = ival;
             }
-            else if ( control->ensemble == sNPT )
+            else if ( strncmp(tmp[0], "vlist_buffer", MAX_LINE) == 0 )
             {
                 val = atof(tmp[1]);
-                control->P[0] = val;
-
-                val = atof(tmp[2]);
-                control->P[1] = val;
-
-                val = atof(tmp[3]);
-                control->P[2] = val;
+                control->vlist_cut = val;
             }
-        }
-        else if ( strncmp(tmp[0], "p_mass", MAX_LINE) == 0 )
-        {
-            if ( control->ensemble == iNPT )
+            else if ( strncmp(tmp[0], "nbrhood_cutoff", MAX_LINE) == 0 )
             {
                 val = atof(tmp[1]);
-                control->Tau_P[0] = val * 1.e-3;   // convert p_mass from fs to ps
+                control->nbr_cut = val;
             }
-            else if ( control->ensemble == sNPT )
+            else if ( strncmp(tmp[0], "thb_cutoff", MAX_LINE) == 0 )
             {
                 val = atof(tmp[1]);
-                control->Tau_P[0] = val * 1.e-3;   // convert p_mass from fs to ps
-
-                val = atof(tmp[2]);
-                control->Tau_P[1] = val * 1.e-3;   // convert p_mass from fs to ps
+                control->thb_cut = val;
+            }
+            else if ( strncmp(tmp[0], "hbond_cutoff", MAX_LINE) == 0 )
+            {
+                val = atof( tmp[1] );
+                control->hb_cut = val;
+            }
+            else if ( strncmp(tmp[0], "charge_method", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->charge_method = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_q_net", MAX_LINE) == 0 )
+            {
+                val = atof( tmp[1] );
+                control->cm_q_net = val;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_type", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_solver_type = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_max_iters", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_solver_max_iters = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_restart", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_solver_restart = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_q_err", MAX_LINE) == 0 )
+            {
+                val = atof( tmp[1] );
+                control->cm_solver_q_err = val;
+            }
+            else if ( strncmp(tmp[0], "cm_domain_sparsity", MAX_LINE) == 0 )
+            {
+                val = atof( tmp[1] );
+                control->cm_domain_sparsity = val;
+                if ( val < 1.0 )
+                {
+                    control->cm_domain_sparsify_enabled = TRUE;
+                }
+            }
+            else if ( strncmp(tmp[0], "cm_init_guess_extrap1", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_init_guess_extrap1 = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_init_guess_extrap2", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_init_guess_extrap2 = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_pre_comp_type", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_solver_pre_comp_type = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_pre_comp_refactor", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_solver_pre_comp_refactor = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_pre_comp_droptol", MAX_LINE) == 0 )
+            {
+                val = atof( tmp[1] );
+                control->cm_solver_pre_comp_droptol = val;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_pre_comp_sweeps", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_solver_pre_comp_sweeps = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_pre_comp_sai_thres", MAX_LINE) == 0 )
+            {
+                val = atof( tmp[1] );
+                control->cm_solver_pre_comp_sai_thres = val;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_pre_app_type", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_solver_pre_app_type = ival;
+            }
+            else if ( strncmp(tmp[0], "cm_solver_pre_app_jacobi_iters", MAX_LINE) == 0 )
+            {
+                ival = atoi( tmp[1] );
+                control->cm_solver_pre_app_jacobi_iters = ival;
+            }
+            else if ( strncmp(tmp[0], "temp_init", MAX_LINE) == 0 )
+            {
+                val = atof(tmp[1]);
+                control->T_init = val;
 
-                val = atof(tmp[3]);
-                control->Tau_P[2] = val * 1.e-3;   // convert p_mass from fs to ps
+                if ( control->T_init < 0.001 )
+                {
+                    control->T_init = 0.001;
+                }
             }
-        }
-        else if ( strncmp(tmp[0], "pt_mass", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->Tau_PT = val * 1.e-3;  // convert pt_mass from fs to ps
-        }
-        else if ( strncmp(tmp[0], "compress", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->compressibility = val;
-        }
-        else if ( strncmp(tmp[0], "press_mode", MAX_LINE) == 0 )
-        {
-            val = atoi(tmp[1]);
-            control->press_mode = val;
-        }
-        else if ( strncmp(tmp[0], "remove_CoM_vel", MAX_LINE) == 0 )
-        {
-            val = atoi(tmp[1]);
-            control->remove_CoM_vel = val;
-        }
-        else if ( strncmp(tmp[0], "debug_level", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->debug_level = ival;
-        }
-        else if ( strncmp(tmp[0], "energy_update_freq", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->energy_update_freq = ival;
-        }
-        else if ( strncmp(tmp[0], "write_freq", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->write_steps = ival;
-        }
-        else if ( strncmp(tmp[0], "traj_compress", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->traj_compress = ival;
+            else if ( strncmp(tmp[0], "temp_final", MAX_LINE) == 0 )
+            {
+                val = atof(tmp[1]);
+                control->T_final = val;
 
-            if ( out_control->traj_compress )
-                out_control->write = (int (*)(FILE *, const char *, ...)) gzprintf;
-            else out_control->write = fprintf;
-        }
-        else if ( strncmp(tmp[0], "traj_format", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->traj_format = ival;
+                if ( control->T_final < 0.1 )
+                {
+                    control->T_final = 0.1;
+                }
+            }
+            else if ( strncmp(tmp[0], "t_mass", MAX_LINE) == 0 )
+            {
+                val = atof(tmp[1]);
+                /* convert t_mass from fs to ps */
+                control->Tau_T = val * 1.e-3;
+            }
+            else if ( strncmp(tmp[0], "t_mode", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->T_mode = ival;
+            }
+            else if ( strncmp(tmp[0], "t_rate", MAX_LINE) == 0 )
+            {
+                val = atof(tmp[1]);
+                control->T_rate = val;
+            }
+            else if ( strncmp(tmp[0], "t_freq", MAX_LINE) == 0 )
+            {
+                val = atof(tmp[1]);
+                control->T_freq = val;
+            }
+            else if ( strncmp(tmp[0], "pressure", MAX_LINE) == 0 )
+            {
+                if ( control->ensemble == iNPT )
+                {
+                    val = atof(tmp[1]);
+                    control->P[0] = control->P[1] = control->P[2] = val;
+                }
+                else if ( control->ensemble == sNPT )
+                {
+                    val = atof(tmp[1]);
+                    control->P[0] = val;
+
+                    val = atof(tmp[2]);
+                    control->P[1] = val;
+
+                    val = atof(tmp[3]);
+                    control->P[2] = val;
+                }
+            }
+            else if ( strncmp(tmp[0], "p_mass", MAX_LINE) == 0 )
+            {
+                if ( control->ensemble == iNPT )
+                {
+                    val = atof(tmp[1]);
+                    control->Tau_P[0] = val * 1.e-3;   // convert p_mass from fs to ps
+                }
+                else if ( control->ensemble == sNPT )
+                {
+                    val = atof(tmp[1]);
+                    control->Tau_P[0] = val * 1.e-3;   // convert p_mass from fs to ps
+
+                    val = atof(tmp[2]);
+                    control->Tau_P[1] = val * 1.e-3;   // convert p_mass from fs to ps
+
+                    val = atof(tmp[3]);
+                    control->Tau_P[2] = val * 1.e-3;   // convert p_mass from fs to ps
+                }
+            }
+            else if ( strncmp(tmp[0], "pt_mass", MAX_LINE) == 0 )
+            {
+                val = atof(tmp[1]);
+                control->Tau_PT = val * 1.e-3;  // convert pt_mass from fs to ps
+            }
+            else if ( strncmp(tmp[0], "compress", MAX_LINE) == 0 )
+            {
+                val = atof(tmp[1]);
+                control->compressibility = val;
+            }
+            else if ( strncmp(tmp[0], "press_mode", MAX_LINE) == 0 )
+            {
+                val = atoi(tmp[1]);
+                control->press_mode = val;
+            }
+            else if ( strncmp(tmp[0], "remove_CoM_vel", MAX_LINE) == 0 )
+            {
+                val = atoi(tmp[1]);
+                control->remove_CoM_vel = val;
+            }
+            else if ( strncmp(tmp[0], "debug_level", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                out_control->debug_level = ival;
+            }
+            else if ( strncmp(tmp[0], "energy_update_freq", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                out_control->energy_update_freq = ival;
+            }
+            else if ( strncmp(tmp[0], "write_freq", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                out_control->write_steps = ival;
+            }
+            else if ( strncmp(tmp[0], "traj_compress", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                out_control->traj_compress = ival;
 
-            if ( out_control->traj_format == 0 )
+                if ( out_control->traj_compress )
+                    out_control->write = (int (*)(FILE *, const char *, ...)) gzprintf;
+                else out_control->write = fprintf;
+            }
+            else if ( strncmp(tmp[0], "traj_format", MAX_LINE) == 0 )
             {
-                out_control->write_header =
-                    (int (*)( reax_system*, control_params*,
-                              static_storage*, void* )) Write_Custom_Header;
-                out_control->append_traj_frame =
-                    (int (*)(reax_system*, control_params*, simulation_data*,
-                             static_storage*, reax_list **, void*)) Append_Custom_Frame;
+                ival = atoi(tmp[1]);
+                out_control->traj_format = ival;
+
+                if ( out_control->traj_format == 0 )
+                {
+                    out_control->write_header =
+                        (int (*)( reax_system*, control_params*,
+                                  static_storage*, void* )) Write_Custom_Header;
+                    out_control->append_traj_frame =
+                        (int (*)(reax_system*, control_params*, simulation_data*,
+                                 static_storage*, reax_list **, void*)) Append_Custom_Frame;
+                }
+                else if ( out_control->traj_format == 1 )
+                {
+                    out_control->write_header =
+                        (int (*)( reax_system*, control_params*,
+                                  static_storage*, void* )) Write_xyz_Header;
+                    out_control->append_traj_frame =
+                        (int (*)( reax_system*,  control_params*, simulation_data*,
+                                  static_storage*, reax_list **, void* )) Append_xyz_Frame;
+                }
             }
-            else if ( out_control->traj_format == 1 )
+            else if ( strncmp(tmp[0], "traj_title", MAX_LINE) == 0 )
             {
-                out_control->write_header =
-                    (int (*)( reax_system*, control_params*,
-                              static_storage*, void* )) Write_xyz_Header;
-                out_control->append_traj_frame =
-                    (int (*)( reax_system*,  control_params*, simulation_data*,
-                              static_storage*, reax_list **, void* )) Append_xyz_Frame;
+                strncpy( out_control->traj_title, tmp[1], 81 );
+            }
+            else if ( strncmp(tmp[0], "atom_info", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                out_control->atom_format += ival * 4;
+            }
+            else if ( strncmp(tmp[0], "atom_velocities", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                out_control->atom_format += ival * 2;
+            }
+            else if ( strncmp(tmp[0], "atom_forces", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                out_control->atom_format += ival * 1;
+            }
+            else if ( strncmp(tmp[0], "bond_info", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                out_control->bond_info = ival;
+            }
+            else if ( strncmp(tmp[0], "angle_info", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                out_control->angle_info = ival;
+            }
+            else if ( strncmp(tmp[0], "test_forces", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+            }
+            else if ( strncmp(tmp[0], "molec_anal", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->molec_anal = ival;
+            }
+            else if ( strncmp(tmp[0], "freq_molec_anal", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->freq_molec_anal = ival;
+            }
+            else if ( strncmp(tmp[0], "bond_graph_cutoff", MAX_LINE) == 0 )
+            {
+                val = atof(tmp[1]);
+                control->bg_cut = val;
+            }
+            else if ( strncmp(tmp[0], "ignore", MAX_LINE) == 0 )
+            {
+                control->num_ignored = atoi(tmp[1]);
+                for ( i = 0; i < control->num_ignored; ++i )
+                    control->ignore[atoi(tmp[i + 2])] = 1;
+            }
+            else if ( strncmp(tmp[0], "dipole_anal", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->dipole_anal = ival;
+            }
+            else if ( strncmp(tmp[0], "freq_dipole_anal", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->freq_dipole_anal = ival;
+            }
+            else if ( strncmp(tmp[0], "diffusion_coef", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->diffusion_coef = ival;
+            }
+            else if ( strncmp(tmp[0], "freq_diffusion_coef", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->freq_diffusion_coef = ival;
+            }
+            else if ( strncmp(tmp[0], "restrict_type", MAX_LINE) == 0 )
+            {
+                ival = atoi(tmp[1]);
+                control->restrict_type = ival;
+            }
+            else
+            {
+                fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] );
+                exit( UNKNOWN_OPTION );
             }
-        }
-        else if ( strncmp(tmp[0], "traj_title", MAX_LINE) == 0 )
-        {
-            strncpy( out_control->traj_title, tmp[1], 81 );
-        }
-        else if ( strncmp(tmp[0], "atom_info", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->atom_format += ival * 4;
-        }
-        else if ( strncmp(tmp[0], "atom_velocities", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->atom_format += ival * 2;
-        }
-        else if ( strncmp(tmp[0], "atom_forces", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->atom_format += ival * 1;
-        }
-        else if ( strncmp(tmp[0], "bond_info", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->bond_info = ival;
-        }
-        else if ( strncmp(tmp[0], "angle_info", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->angle_info = ival;
-        }
-        else if ( strncmp(tmp[0], "test_forces", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-        }
-        else if ( strncmp(tmp[0], "molec_anal", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->molec_anal = ival;
-        }
-        else if ( strncmp(tmp[0], "freq_molec_anal", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->freq_molec_anal = ival;
-        }
-        else if ( strncmp(tmp[0], "bond_graph_cutoff", MAX_LINE) == 0 )
-        {
-            val = atof(tmp[1]);
-            control->bg_cut = val;
-        }
-        else if ( strncmp(tmp[0], "ignore", MAX_LINE) == 0 )
-        {
-            control->num_ignored = atoi(tmp[1]);
-            for ( i = 0; i < control->num_ignored; ++i )
-                control->ignore[atoi(tmp[i + 2])] = 1;
-        }
-        else if ( strncmp(tmp[0], "dipole_anal", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->dipole_anal = ival;
-        }
-        else if ( strncmp(tmp[0], "freq_dipole_anal", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->freq_dipole_anal = ival;
-        }
-        else if ( strncmp(tmp[0], "diffusion_coef", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->diffusion_coef = ival;
-        }
-        else if ( strncmp(tmp[0], "freq_diffusion_coef", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->freq_diffusion_coef = ival;
-        }
-        else if ( strncmp(tmp[0], "restrict_type", MAX_LINE) == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->restrict_type = ival;
-        }
-        else
-        {
-            fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] );
-            exit( UNKNOWN_OPTION );
         }
     }
 
diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c
index 78783798..5b2f0a7d 100644
--- a/sPuReMD/src/forces.c
+++ b/sPuReMD/src/forces.c
@@ -136,7 +136,7 @@ static void Compute_NonBonded_Forces( reax_system *system, control_params *contr
 #endif
 
     t_start = Get_Time( );
-    Compute_Charges( system, control, data, workspace, lists[FAR_NBRS], out_control );
+    Compute_Charges( system, control, data, workspace, out_control );
     t_elapsed = Get_Timing_Info( t_start );
     data->timing.cm += t_elapsed;
 
@@ -185,7 +185,8 @@ static void Compute_Total_Force( reax_system *system, control_params *control,
             {
                 if ( i < bonds->select.bond_list[pj].nbr )
                 {
-                    if ( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT)
+                    if ( control->ensemble == NVE || control->ensemble == nhNVT
+                            || control->ensemble == bNVT )
                     {
                         Add_dBond_to_Forces( i, pj, system, data, workspace, lists );
                     }
diff --git a/sPuReMD/src/four_body_interactions.c b/sPuReMD/src/four_body_interactions.c
index c46e9bc6..2444c43f 100644
--- a/sPuReMD/src/four_body_interactions.c
+++ b/sPuReMD/src/four_body_interactions.c
@@ -494,7 +494,8 @@ void Four_Body_Interactions( reax_system *system, control_params *control,
 #endif
                                         bo_kl->Cdbo += (CEtors6 + CEconj3);
 
-                                        if ( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT )
+                                        if ( control->ensemble == NVE || control->ensemble == nhNVT
+                                                || control->ensemble == bNVT )
                                         {
                                             /* dcos_theta_ijk */
                                             rvec_ScaledAdd( *f_i,
diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c
index cc0ce302..6670dfc0 100644
--- a/sPuReMD/src/init_md.c
+++ b/sPuReMD/src/init_md.c
@@ -149,7 +149,7 @@ static void Init_Simulation_Data( reax_system *system, control_params *control,
         break;
 
 
-    case NVT:
+    case nhNVT:
         data->N_f = 3 * system->N + 1;
         //control->Tau_T = 100 * data->N_f * K_B * control->T_final;
         if ( !control->restart || (control->restart && control->random_vel) )
diff --git a/sPuReMD/src/integrate.c b/sPuReMD/src/integrate.c
index 4e948406..af2d87bd 100644
--- a/sPuReMD/src/integrate.c
+++ b/sPuReMD/src/integrate.c
@@ -58,11 +58,11 @@ void Velocity_Verlet_NVE(reax_system *system, control_params *control,
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
 
         rvec_ScaledSum( dx, dt, system->atoms[i].v,
-                0.5 * dt_sqr * -F_CONV * inv_m, system->atoms[i].f );
+                -0.5 * dt_sqr * F_CONV * inv_m, system->atoms[i].f );
         Inc_on_T3( system->atoms[i].x, dx, &( system->box ) );
 
         rvec_ScaledAdd( system->atoms[i].v,
-                0.5 * dt * -F_CONV * inv_m, system->atoms[i].f );
+                -0.5 * dt * F_CONV * inv_m, system->atoms[i].f );
     }
 
 #if defined(DEBUG_FOCUS)
@@ -83,7 +83,7 @@ void Velocity_Verlet_NVE(reax_system *system, control_params *control,
     {
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
         rvec_ScaledAdd( system->atoms[i].v,
-                0.5 * dt * -F_CONV * inv_m, system->atoms[i].f );
+                -0.5 * dt * F_CONV * inv_m, system->atoms[i].f );
     }
 
 #if defined(DEBUG_FOCUS)
@@ -118,7 +118,7 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params*
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
 
         rvec_ScaledSum( dx, dt - 0.5 * dt_sqr * therm->v_xi, system->atoms[i].v,
-                0.5 * dt_sqr * inv_m * -F_CONV, system->atoms[i].f );
+                -0.5 * dt_sqr * inv_m * F_CONV, system->atoms[i].f );
 
         Inc_on_T3( system->atoms[i].x, dx, &(system->box) );
 
@@ -150,14 +150,14 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params*
         rvec_Scale( workspace->v_const[i],
                     1.0 - 0.5 * dt * therm->v_xi, system->atoms[i].v );
         rvec_ScaledAdd( workspace->v_const[i],
-                        0.5 * dt * inv_m * -F_CONV, workspace->f_old[i] );
+                -0.5 * dt * inv_m * F_CONV, workspace->f_old[i] );
         rvec_ScaledAdd( workspace->v_const[i],
-                        0.5 * dt * inv_m * -F_CONV, system->atoms[i].f );
+                -0.5 * dt * inv_m * F_CONV, system->atoms[i].f );
 #if defined(DEBUG)
         fprintf( stderr, "atom%d: inv_m=%f, C1=%f, C2=%f, v_const=%f %f %f\n",
-                 i, inv_m, 1.0 - 0.5 * dt * therm->v_xi,
-                 0.5 * dt * inv_m * -F_CONV, workspace->v_const[i][0],
-                 workspace->v_const[i][1], workspace->v_const[i][2] );
+                i, inv_m, 1.0 - 0.5 * dt * therm->v_xi,
+                -0.5 * dt * inv_m * F_CONV, workspace->v_const[i][0],
+                workspace->v_const[i][1], workspace->v_const[i][2] );
 #endif
     }
 
@@ -219,30 +219,17 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system,
     steps = data->step - data->prev_steps;
     renbr = (steps % control->reneighbor == 0);
 
-#if defined(DEBUG_FOCUS)
-    //fprintf( out_control->prs,
-    //         "tau_t: %g  tau_p: %g  dt/tau_t: %g  dt/tau_p: %g\n",
-    //control->Tau_T, control->Tau_P, dt / control->Tau_T, dt / control->Tau_P );
-    fprintf( stderr, "step %d: ", data->step );
-#endif
-
     /* velocity verlet, 1st part */
     for ( i = 0; i < system->N; i++ )
     {
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
         /* Compute x(t + dt) */
         rvec_ScaledSum( dx, dt, system->atoms[i].v,
-                        0.5 * -F_CONV * inv_m * SQR(dt), system->atoms[i].f );
+                -0.5 * F_CONV * inv_m * SQR(dt), system->atoms[i].f );
         Inc_on_T3( system->atoms[i].x, dx, &(system->box) );
         /* Compute v(t + dt/2) */
         rvec_ScaledAdd( system->atoms[i].v,
-                        0.5 * -F_CONV * inv_m * dt, system->atoms[i].f );
-        /*fprintf( stderr, "%6d   %15.8f %15.8f %15.8f   %15.8f %15.8f %15.8f\n",
-          workspace->orig_id[i],
-          system->atoms[i].x[0], system->atoms[i].x[1], system->atoms[i].x[2],
-          0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[0],
-          0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[1],
-          0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[2] ); */
+                -0.5 * F_CONV * inv_m * dt, system->atoms[i].f );
     }
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "verlet1 - " );
@@ -265,13 +252,7 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system,
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
         /* Compute v(t + dt) */
         rvec_ScaledAdd( system->atoms[i].v,
-                        0.5 * dt * -F_CONV * inv_m, system->atoms[i].f );
-        /* fprintf( stderr, "%6d   %15f %15f %15f   %15.8f %15.8f %15.8f\n",
-           workspace->orig_id[i],
-           system->atoms[i].v[0], system->atoms[i].v[1], system->atoms[i].v[2],
-           0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[0],
-           0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[1],
-           0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[2] );*/
+                -0.5 * dt * F_CONV * inv_m, system->atoms[i].f );
     }
     Compute_Kinetic_Energy( system, data );
     Compute_Pressure_Isotropic( system, control, data, out_control );
@@ -350,17 +331,11 @@ void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system,
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
         /* Compute x(t + dt) */
         rvec_ScaledSum( dx, dt, system->atoms[i].v,
-                        0.5 * -F_CONV * inv_m * SQR(dt), system->atoms[i].f );
+                -0.5 * F_CONV * inv_m * SQR(dt), system->atoms[i].f );
         Inc_on_T3( system->atoms[i].x, dx, &(system->box) );
         /* Compute v(t + dt/2) */
         rvec_ScaledAdd( system->atoms[i].v,
-                        0.5 * -F_CONV * inv_m * dt, system->atoms[i].f );
-        /*fprintf( stderr, "%6d   %15.8f %15.8f %15.8f   %15.8f %15.8f %15.8f\n",
-          workspace->orig_id[i],
-          system->atoms[i].x[0], system->atoms[i].x[1], system->atoms[i].x[2],
-          0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[0],
-          0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[1],
-          0.5 * SQR(dt) * -F_CONV * inv_m * system->atoms[i].f[2] ); */
+                -0.5 * F_CONV * inv_m * dt, system->atoms[i].f );
     }
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "verlet1 - " );
@@ -383,19 +358,12 @@ void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system,
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
         /* Compute v(t + dt) */
         rvec_ScaledAdd( system->atoms[i].v,
-                        0.5 * dt * -F_CONV * inv_m, system->atoms[i].f );
-        /* fprintf( stderr, "%6d   %15f %15f %15f   %15.8f %15.8f %15.8f\n",
-           workspace->orig_id[i],
-           system->atoms[i].v[0], system->atoms[i].v[1], system->atoms[i].v[2],
-           0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[0],
-           0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[1],
-           0.5 * dt * -F_CONV * inv_m * system->atoms[i].f[2] );*/
+                -0.5 * dt * F_CONV * inv_m, system->atoms[i].f );
     }
+
     Compute_Kinetic_Energy( system, data );
+
     Compute_Pressure_Isotropic( system, control, data, out_control );
-#if defined(DEBUG_FOCUS)
-    fprintf( stderr, "verlet2 - " );
-#endif
 
     /* pressure scaler */
     for ( d = 0; d < 3; ++d )
@@ -460,26 +428,26 @@ void Velocity_Verlet_Nose_Hoover_NVT( reax_system* system,
     real dt_sqr = SQR(dt);
     rvec dx;
 
-    for (i = 0; i < system->N; i++)
+    for ( i = 0; i < system->N; i++ )
     {
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
 
-        // Compute x(t + dt)
+        /* Compute x(t + dt) */
         rvec_ScaledSum( dx, dt, system->atoms[i].v,
-                        0.5 * dt_sqr * -F_CONV * inv_m, system->atoms[i].f );
+                -0.5 * dt_sqr * F_CONV * inv_m, system->atoms[i].f );
         Inc_on_T3_Gen( system->atoms[i].x, dx, &(system->box) );
 
-        // Compute v(t + dt/2)
+        /* Compute v(t + dt/2) */
         rvec_ScaledAdd( system->atoms[i].v,
-                        -0.5 * dt * data->therm.xi, system->atoms[i].v );
+                -0.5 * dt * data->therm.xi, system->atoms[i].v );
         rvec_ScaledAdd( system->atoms[i].v,
-                        0.5 * dt * -F_CONV * inv_m, system->atoms[i].f );
+                -0.5 * dt * F_CONV * inv_m, system->atoms[i].f );
     }
 
     // Compute zeta(t + dt/2), E_Kininetic(t + dt/2)
     // IMPORTANT: What will be the initial value of zeta? and what is g?
     data->therm.xi += 0.5 * dt * control->Tau_T  *
-                      ( 2.0 * data->E_Kin - data->N_f * K_B * control->T );
+        ( 2.0 * data->E_Kin - data->N_f * K_B * control->T );
 
     Reset( system, control, data, workspace );
     fprintf(out_control->log, "reset-");
@@ -490,7 +458,7 @@ void Velocity_Verlet_Nose_Hoover_NVT( reax_system* system,
     fprintf(out_control->log, "nbrs-");
     fflush( out_control->log );
 
-    /* Compute_Charges( system, control, workspace, lists[FAR_NBRS], out_control );
+    /* Compute_Charges( system, control, workspace, out_control );
        fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */
 
     Compute_Forces( system, control, data, workspace, lists, out_control,
@@ -504,14 +472,14 @@ void Velocity_Verlet_Nose_Hoover_NVT( reax_system* system,
     {
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
 
-        // compute v(t + dt)
+        /* compute v(t + dt) */
         rvec_ScaledAdd( system->atoms[i].v,
-                        -0.5 * dt * data->therm.xi, system->atoms[i].v );
+                -0.5 * dt * data->therm.xi, system->atoms[i].v );
         rvec_ScaledAdd( system->atoms[i].v,
-                        0.5 * dt * -F_CONV * inv_m, system->atoms[i].f );
+                -0.5 * dt * F_CONV * inv_m, system->atoms[i].f );
     }
 
-    // Compute zeta(t + dt)
+    /* Compute zeta(t + dt) */
     data->therm.xi += 0.5 * dt * control->Tau_T  * ( 2.0 * data->E_Kin -
                       data->N_f * K_B * control->T );
 
@@ -567,12 +535,12 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system,
     {
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
 
-        // Compute x(t + dt)
-        rvec_ScaledSum( workspace->a[i], -F_CONV * inv_m, system->atoms[i].f,
-                        -( (2.0 + 3.0 / data->N_f) * iso_bar->v_eps + therm->v_xi ),
-                        system->atoms[i].v );
+        /* Compute x(t + dt) */
+        rvec_ScaledSum( workspace->a[i], -1.0 * F_CONV * inv_m, system->atoms[i].f,
+                -1.0 * ( (2.0 + 3.0 / data->N_f) * iso_bar->v_eps + therm->v_xi ),
+                system->atoms[i].v );
         rvec_ScaledSum( dx, dt, system->atoms[i].v,
-                        0.5 * dt_sqr, workspace->a[i] );
+                0.5 * dt_sqr, workspace->a[i] );
         Inc_on_T3( system->atoms[i].x, dx, &(system->box) );
         rvec_Scale( system->atoms[i].x, exp_deps, system->atoms[i].x );
     }
@@ -593,7 +561,7 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system,
     fprintf(out_control->log, "nbrs-");
     fflush( out_control->log );
 
-    /* Compute_Charges( system, control, workspace, lists[FAR_NBRS], out_control );
+    /* Compute_Charges( system, control, workspace, out_control );
        fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */
 
     Compute_Forces( system, control, data, workspace, lists,
@@ -609,15 +577,14 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system,
         inv_m = 1.0 / system->reaxprm.sbp[system->atoms[i].type].mass;
 
         rvec_ScaledSum( dv, 0.5 * dt, workspace->a[i],
-                        0.5 * dt * -F_CONV * inv_m, system->atoms[i].f );
+                -0.5 * dt * F_CONV * inv_m, system->atoms[i].f );
         rvec_Add( dv, system->atoms[i].v );
         rvec_Scale( workspace->v_const[i], exp_deps, dv );
 
-        P_int_const += ( -F_CONV *
-                         rvec_Dot( system->atoms[i].f, system->atoms[i].x ) );
+        P_int_const += ( -1.0 * F_CONV * rvec_Dot( system->atoms[i].f, system->atoms[i].x ) );
 
-        E_kin += (0.5 * system->reaxprm.sbp[system->atoms[i].type].mass *
-                  rvec_Dot( system->atoms[i].v, system->atoms[i].v ) );
+        E_kin += (0.5 * system->reaxprm.sbp[system->atoms[i].type].mass
+                * rvec_Dot( system->atoms[i].v, system->atoms[i].v ) );
     }
 
     // Compute initial p_int
@@ -716,15 +683,15 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system,
         atom = &(system->atoms[i]);
         inv_m = 1.0 / system->reaxprm.sbp[atom->type].mass;
         /* Compute x(t + dt) */
-        rvec_ScaledSum( dx, dt, atom->v, 0.5 * -F_CONV * inv_m * SQR(dt), atom->f );
+        rvec_ScaledSum( dx, dt, atom->v, -0.5 * F_CONV * inv_m * SQR(dt), atom->f );
 
         /* bNVT fix - Metin's suggestion */
         /* ORIGINAL CHANGE -- CHECK THE branch serial-bnvt for the fix */
         //rvec_Add( atom->x, dx );
-        Inc_on_T3( atom->x, dx, &( system->box ) );
+        Inc_on_T3( atom->x, dx, &system->box );
 
         /* Compute v(t + dt/2) */
-        rvec_ScaledAdd( atom->v, 0.5 * -F_CONV * inv_m * dt, atom->f );
+        rvec_ScaledAdd( atom->v, -0.5 * F_CONV * inv_m * dt, atom->f );
     }
 
 #if defined(DEBUG_FOCUS)
@@ -748,7 +715,7 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system,
         atom = &(system->atoms[i]);
         inv_m = 1.0 / system->reaxprm.sbp[atom->type].mass;
         /* Compute v(t + dt) */
-        rvec_ScaledAdd( atom->v, 0.5 * dt * -F_CONV * inv_m, atom->f );
+        rvec_ScaledAdd( atom->v, -0.5 * dt * F_CONV * inv_m, atom->f );
     }
 #if defined(DEBUG_FOCUS)
     fprintf(stderr, "step%d: verlet2 done\n", data->step);
diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h
index 26870dee..754e0c93 100644
--- a/sPuReMD/src/mytypes.h
+++ b/sPuReMD/src/mytypes.h
@@ -129,18 +129,19 @@
 #define LOOSE_ZONE     (0.75)
 
 
-/* config params */
+/* ensemble type */
 enum ensemble
 {
     NVE = 0,
-    NVT = 1,
-    NPT = 2,
+    bNVT = 1,
+    nhNVT = 2,
     sNPT = 3,
     iNPT = 4,
-    ensNR = 5,
-    bNVT = 6,
+    NPT = 5,
+    ens_N = 6,
 };
 
+/* interaction list type */
 enum interaction_list_offets
 {
     FAR_NBRS = 0,
@@ -154,6 +155,7 @@ enum interaction_list_offets
     LIST_N = 8,
 };
 
+/* interaction type */
 enum interaction_type
 {
     TYP_VOID = 0,
@@ -167,6 +169,7 @@ enum interaction_type
     TYP_N = 8,
 };
 
+/* error codes for simulation termination */
 enum errors
 {
     FILE_NOT_FOUND = -10,
@@ -190,13 +193,15 @@ enum molecular_analysis_type
     NUM_ANALYSIS = 3,
 };
 
-enum restart_format
+/* restart file format */
+enum restart_formats
 {
     WRITE_ASCII = 0,
     WRITE_BINARY = 1,
     RF_N = 2,
 };
 
+/* geometry file format */
 enum geo_formats
 {
     CUSTOM = 0,
@@ -243,6 +248,15 @@ enum pre_app
 };
 
 
+/* atom types as pertains to hydrogen bonding */
+enum hydrogen_bonding_atom_types
+{
+    NON_H_BONDING_ATOM = 0,
+    H_ATOM = 1,
+    H_BONDING_ATOM = 2,
+};
+
+
 typedef double real;
 typedef real rvec[3];
 typedef int ivec[3];
@@ -603,22 +617,52 @@ typedef struct
     int freq_diffusion_coef;
     int restrict_type;
 
+    /* method for computing atomic charges */
     unsigned int charge_method;
+    /* frequency (in terms of simulation time steps) at which to
+     * re-compute atomic charge distribution */
+    int charge_freq;
+    /* iterative linear solver type */
     unsigned int cm_solver_type;
+    /* system net charge */
     real cm_q_net;
+    /* max. iterations for linear solver */
     unsigned int cm_solver_max_iters;
+    /* max. iterations before restarting in specific solvers, e.g., GMRES(k) */
     unsigned int cm_solver_restart;
+    /* error tolerance of solution produced by charge distribution
+     * sparse iterative linear solver */
     real cm_solver_q_err;
+    /* ratio used in computing sparser charge matrix,
+     * between 0.0 and 1.0 */
     real cm_domain_sparsity;
+    /* TRUE if enabled, FALSE otherwise */
     unsigned int cm_domain_sparsify_enabled;
+    /* order of spline extrapolation used for computing initial guess
+     * to linear solver */
     unsigned int cm_init_guess_extrap1;
+    /* order of spline extrapolation used for computing initial guess
+     * to linear solver */
     unsigned int cm_init_guess_extrap2;
+    /* preconditioner type for linear solver */
     unsigned int cm_solver_pre_comp_type;
+    /* frequency (in terms of simulation time steps) at which to recompute
+     * incomplete factorizations */
     unsigned int cm_solver_pre_comp_refactor;
+    /* drop tolerance of incomplete factorization schemes (ILUT, ICHOLT, etc.)
+     * used for preconditioning the iterative linear solver used in charge distribution */
     real cm_solver_pre_comp_droptol;
+    /* num. of sweeps for computing preconditioner factors
+     * in fine-grained iterative methods (FG-ICHOL, FG-ILU) */
     unsigned int cm_solver_pre_comp_sweeps;
+    /* relative num. of non-zeros to charge matrix used to
+     * compute the sparse approximate inverse preconditioner,
+     * between 0.0 and 1.0 */
     real cm_solver_pre_comp_sai_thres;
+    /* preconditioner application type */
     unsigned int cm_solver_pre_app_type;
+    /* num. of iterations used to apply preconditioner via
+     * Jacobi relaxation scheme (truncated Neumann series) */
     unsigned int cm_solver_pre_app_jacobi_iters;
 
     int molec_anal;
@@ -671,24 +715,42 @@ typedef struct
 
 typedef struct
 {
+    /* start time of event */
     real start;
+    /* end time of event */
     real end;
+    /* total elapsed time of event */
     real elapsed;
-
+    /* total simulation time */
     real total;
+    /* neighbor list generation time */
     real nbrs;
+    /* force initialization time */
     real init_forces;
+    /* bonded force calculation time */
     real bonded;
+    /* non-bonded force calculation time */
     real nonb;
+    /* atomic charge distribution calculation time */
     real cm;
+    /**/
     real cm_sort_mat_rows;
+    /**/
     real cm_solver_pre_comp;
+    /**/
     real cm_solver_pre_app;
+    /* num. of steps in iterative linear solver for charge distribution */
     int cm_solver_iters;
+    /**/
     real cm_solver_spmv;
+    /**/
     real cm_solver_vector_ops;
+    /**/
     real cm_solver_orthog;
+    /**/
     real cm_solver_tri_solve;
+    /* num. of retries in main sim. loop */
+    int num_retries;
 } reax_timing;
 
 
@@ -953,17 +1015,30 @@ typedef struct
     rvec *dDeltap_self;
 
     /* charge method storage */
+    /* charge matrix */
     sparse_matrix *H;
+    /* charge matrix (full) */
     sparse_matrix *H_full;
+    /* sparser charge matrix */
     sparse_matrix *H_sp;
+    /* permuted charge matrix (graph coloring) */
     sparse_matrix *H_p;
+    /* sparsity pattern of charge matrix, used in
+     * computing a sparse approximate inverse preconditioner */
     sparse_matrix *H_spar_patt;
+    /* sparsity pattern of charge matrix (full), used in
+     * computing a sparse approximate inverse preconditioner */
     sparse_matrix *H_spar_patt_full;
+    /* sparse approximate inverse preconditioner */
     sparse_matrix *H_app_inv;
+    /* incomplete Cholesky or LU preconditioner */
     sparse_matrix *L;
+    /* incomplete Cholesky or LU preconditioner */
     sparse_matrix *U;
-    real *droptol;
+    /* Jacobi preconditioner */
     real *Hdia_inv;
+    /* row drop tolerences for incomplete Cholesky preconditioner */
+    real *droptol;
     real *b;
     real *b_s;
     real *b_t;
diff --git a/sPuReMD/src/three_body_interactions.c b/sPuReMD/src/three_body_interactions.c
index 5837e3ea..8e8fc825 100644
--- a/sPuReMD/src/three_body_interactions.c
+++ b/sPuReMD/src/three_body_interactions.c
@@ -477,7 +477,8 @@ void Three_Body_Interactions( reax_system *system, control_params *control,
                                     }
 
 
-                                    if ( control->ensemble == NVE || control->ensemble == NVT  || control->ensemble == bNVT)
+                                    if ( control->ensemble == NVE || control->ensemble == nhNVT
+                                            || control->ensemble == bNVT )
                                     {
                                         rvec_ScaledAdd( *f_i, CEval8, p_ijk->dcos_di );
                                         rvec_ScaledAdd( *f_j, CEval8, p_ijk->dcos_dj );
@@ -820,7 +821,8 @@ void Hydrogen_Bonds( reax_system *system, control_params *control,
 #endif
                             bo_ij->Cdbo += CEhb1;
 
-                            if ( control->ensemble == NVE || control->ensemble == NVT  || control->ensemble == bNVT)
+                            if ( control->ensemble == NVE || control->ensemble == nhNVT
+                                    || control->ensemble == bNVT )
                             {
                                 /* dcos terms */
                                 rvec_ScaledAdd( *f_i, +CEhb2, dcos_theta_di );
diff --git a/sPuReMD/src/two_body_interactions.c b/sPuReMD/src/two_body_interactions.c
index 9a04bb71..8fb702c3 100644
--- a/sPuReMD/src/two_body_interactions.c
+++ b/sPuReMD/src/two_body_interactions.c
@@ -220,11 +220,11 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control,
             {
                 if ( far_nbrs->select.far_nbr_list[pj].d <= control->r_cut )
                 {
-                    nbr_pj = &( far_nbrs->select.far_nbr_list[pj] );
+                    nbr_pj = &far_nbrs->select.far_nbr_list[pj];
                     j = nbr_pj->nbr;
                     r_ij = nbr_pj->d;
-                    twbp = &(system->reaxprm.tbp[ system->atoms[i].type ]
-                             [ system->atoms[j].type ]);
+                    twbp = &system->reaxprm.tbp[ system->atoms[i].type ]
+                             [ system->atoms[j].type ];
                     self_coef = (i == j) ? 0.5 : 1.0; // for supporting small boxes!
 
                     /* Calculate Taper and its derivative */
@@ -301,7 +301,8 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control,
                     CEclmb = self_coef * C_ele * system->atoms[i].q * system->atoms[j].q *
                              ( dTap -  Tap * r_ij / dr3gamij_1 ) / dr3gamij_3;
 
-                    if ( control->ensemble == NVE || control->ensemble == NVT || control->ensemble == bNVT )
+                    if ( control->ensemble == NVE || control->ensemble == nhNVT
+                            || control->ensemble == bNVT )
                     {
 #ifndef _OPENMP
                         rvec_ScaledAdd( system->atoms[i].f,
@@ -606,7 +607,8 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control,
                              t->CEclmb[r].a;
                     CEclmb *= self_coef * system->atoms[i].q * system->atoms[j].q;
 
-                    if ( control->ensemble == NVE || control->ensemble == NVT  || control->ensemble == bNVT)
+                    if ( control->ensemble == NVE || control->ensemble == nhNVT
+                            || control->ensemble == bNVT )
                     {
 #ifndef _OPENMP
                         rvec_ScaledAdd( system->atoms[i].f, -(CEvd + CEclmb), nbr_pj->dvec );
-- 
GitLab