diff --git a/PG-PuReMD/src/allocate.c b/PG-PuReMD/src/allocate.c
index 9a02f53853d6d3bc149f47df5625b7abb6c595ab..74e6b9394c949ee1e5d4cb54e74c480b5abe1993 100644
--- a/PG-PuReMD/src/allocate.c
+++ b/PG-PuReMD/src/allocate.c
@@ -206,10 +206,12 @@ static void Deallocate_Workspace_Part2( control_params * const control,
             sfree( workspace->d, "Deallocate_Workspace_Part2::workspace->d" );
             sfree( workspace->q, "Deallocate_Workspace_Part2::workspace->q" );
             sfree( workspace->p, "Deallocate_Workspace_Part2::workspace->p" );
+#if defined(DUAL_SOLVER)
             sfree( workspace->r2, "Deallocate_Workspace_Part2::workspace->r2" );
             sfree( workspace->d2, "Deallocate_Workspace_Part2::workspace->d2" );
             sfree( workspace->q2, "Deallocate_Workspace_Part2::workspace->q2" );
             sfree( workspace->p2, "Deallocate_Workspace_Part2::workspace->p2" );
+#endif
             break;
 
         case SDM_S:
@@ -217,10 +219,12 @@ static void Deallocate_Workspace_Part2( control_params * const control,
             sfree( workspace->d, "Deallocate_Workspace_Part2::workspace->d" );
             sfree( workspace->q, "Deallocate_Workspace_Part2::workspace->q" );
             sfree( workspace->p, "Deallocate_Workspace_Part2::workspace->p" );
+#if defined(DUAL_SOLVER)
             sfree( workspace->r2, "Deallocate_Workspace_Part2::workspace->r2" );
             sfree( workspace->d2, "Deallocate_Workspace_Part2::workspace->d2" );
             sfree( workspace->q2, "Deallocate_Workspace_Part2::workspace->q2" );
             sfree( workspace->p2, "Deallocate_Workspace_Part2::workspace->p2" );
+#endif
             break;
 
         case BiCGStab_S:
@@ -233,6 +237,17 @@ static void Deallocate_Workspace_Part2( control_params * const control,
             sfree( workspace->p, "Deallocate_Workspace_Part2::workspace->p" );
             sfree( workspace->r_hat, "Deallocate_Workspace_Part2::workspace->r_hat" );
             sfree( workspace->q_hat, "Deallocate_Workspace_Part2::workspace->q_hat" );
+#if defined(DUAL_SOLVER)
+            sfree( workspace->y2, "Deallocate_Workspace_Part2::workspace->y2" );
+            sfree( workspace->g2, "Deallocate_Workspace_Part2::workspace->g2" );
+            sfree( workspace->z2, "Deallocate_Workspace_Part2::workspace->z2" );
+            sfree( workspace->r2, "Deallocate_Workspace_Part2::workspace->r2" );
+            sfree( workspace->d2, "Deallocate_Workspace_Part2::workspace->d2" );
+            sfree( workspace->q2, "Deallocate_Workspace_Part2::workspace->q2" );
+            sfree( workspace->p2, "Deallocate_Workspace_Part2::workspace->p2" );
+            sfree( workspace->r_hat2, "Deallocate_Workspace_Part2::workspace->r_hat2" );
+            sfree( workspace->q_hat2, "Deallocate_Workspace_Part2::workspace->q_hat2" );
+#endif
             break;
 
         case PIPECG_S:
@@ -245,6 +260,7 @@ static void Deallocate_Workspace_Part2( control_params * const control,
             sfree( workspace->n, "Deallocate_Workspace_Part2::workspace->n" );
             sfree( workspace->u, "Deallocate_Workspace_Part2::workspace->u" );
             sfree( workspace->w, "Deallocate_Workspace_Part2::workspace->w" );
+#if defined(DUAL_SOLVER)
             sfree( workspace->z2, "Deallocate_Workspace_Part2::workspace->z2" );
             sfree( workspace->r2, "Deallocate_Workspace_Part2::workspace->r2" );
             sfree( workspace->d2, "Deallocate_Workspace_Part2::workspace->d2" );
@@ -254,6 +270,7 @@ static void Deallocate_Workspace_Part2( control_params * const control,
             sfree( workspace->n2, "Deallocate_Workspace_Part2::workspace->n2" );
             sfree( workspace->u2, "Deallocate_Workspace_Part2::workspace->u2" );
             sfree( workspace->w2, "Deallocate_Workspace_Part2::workspace->w2" );
+#endif
             break;
 
         case PIPECR_S:
@@ -266,6 +283,17 @@ static void Deallocate_Workspace_Part2( control_params * const control,
             sfree( workspace->n, "Deallocate_Workspace_Part2::workspace->n" );
             sfree( workspace->u, "Deallocate_Workspace_Part2::workspace->u" );
             sfree( workspace->w, "Deallocate_Workspace_Part2::workspace->w" );
+#if defined(DUAL_SOLVER)
+            sfree( workspace->z2, "Deallocate_Workspace_Part2::workspace->z2" );
+            sfree( workspace->r2, "Deallocate_Workspace_Part2::workspace->r2" );
+            sfree( workspace->d2, "Deallocate_Workspace_Part2::workspace->d2" );
+            sfree( workspace->q2, "Deallocate_Workspace_Part2::workspace->q2" );
+            sfree( workspace->p2, "Deallocate_Workspace_Part2::workspace->p2" );
+            sfree( workspace->m2, "Deallocate_Workspace_Part2::workspace->m2" );
+            sfree( workspace->n2, "Deallocate_Workspace_Part2::workspace->n2" );
+            sfree( workspace->u2, "Deallocate_Workspace_Part2::workspace->u2" );
+            sfree( workspace->w2, "Deallocate_Workspace_Part2::workspace->w2" );
+#endif
             break;
 
         default:
@@ -433,10 +461,12 @@ void Allocate_Workspace_Part2( reax_system * const system, control_params * cons
             workspace->d = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::d" );
             workspace->q = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::q" );
             workspace->p = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::p" );
+#if defined(DUAL_SOLVER)
             workspace->r2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::r2" );
             workspace->d2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::d2" );
             workspace->q2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::q2" );
             workspace->p2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::p2" );
+#endif
             break;
 
         case SDM_S:
@@ -444,10 +474,12 @@ void Allocate_Workspace_Part2( reax_system * const system, control_params * cons
             workspace->d = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::d" );
             workspace->q = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::q" );
             workspace->p = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::p" );
+#if defined(DUAL_SOLVER)
             workspace->r2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::r2" );
             workspace->d2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::d2" );
             workspace->q2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::q2" );
             workspace->p2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::p2" );
+#endif
             break;
 
         case BiCGStab_S:
@@ -460,6 +492,17 @@ void Allocate_Workspace_Part2( reax_system * const system, control_params * cons
             workspace->p = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::p" );
             workspace->r_hat = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::r_hat" );
             workspace->q_hat = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::q_hat" );
+#if defined(DUAL_SOLVER)
+            workspace->y2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::y2" );
+            workspace->g2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::g2" );
+            workspace->z2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::z2" );
+            workspace->r2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::r2" );
+            workspace->d2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::d2" );
+            workspace->q2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::q2" );
+            workspace->p2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::p2" );
+            workspace->r_hat2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::r_hat2" );
+            workspace->q_hat2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::q_hat2" );
+#endif
             break;
 
         case PIPECG_S:
@@ -472,6 +515,7 @@ void Allocate_Workspace_Part2( reax_system * const system, control_params * cons
             workspace->n = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::n" );
             workspace->u = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::u" );
             workspace->w = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::w" );
+#if defined(DUAL_SOLVER)
             workspace->z2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::z2" );
             workspace->r2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::r2" );
             workspace->d2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::d2" );
@@ -481,6 +525,7 @@ void Allocate_Workspace_Part2( reax_system * const system, control_params * cons
             workspace->n2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::n2" );
             workspace->u2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::u2" );
             workspace->w2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::w2" );
+#endif
             break;
 
         case PIPECR_S:
@@ -493,6 +538,17 @@ void Allocate_Workspace_Part2( reax_system * const system, control_params * cons
             workspace->n = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::n" );
             workspace->u = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::u" );
             workspace->w = scalloc( total_cap, sizeof(real), "Allocate_Workspace_Part2::w" );
+#if defined(DUAL_SOLVER)
+            workspace->z2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::z2" );
+            workspace->r2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::r2" );
+            workspace->d2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::d2" );
+            workspace->q2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::q2" );
+            workspace->p2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::p2" );
+            workspace->m2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::m2" );
+            workspace->n2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::n2" );
+            workspace->u2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::u2" );
+            workspace->w2 = scalloc( total_cap, sizeof(rvec2), "Allocate_Workspace_Part2::w2" );
+#endif
             break;
 
         default:
diff --git a/PG-PuReMD/src/basic_comm.c b/PG-PuReMD/src/basic_comm.c
index b1c321dd934125d4128fe995515618769ca58de6..8c094a5bad43671e1b28c9dcce184267d3672731 100644
--- a/PG-PuReMD/src/basic_comm.c
+++ b/PG-PuReMD/src/basic_comm.c
@@ -740,26 +740,6 @@ void Coll_FS( reax_system const * const system, mpi_datatypes * const mpi_data,
 }
 
 
-real Parallel_Norm( real const * const v, const int n, MPI_Comm comm )
-{
-    int i, ret;
-    real sum_l, norm_sqr;
-
-    sum_l = 0.0;
-
-    /* compute local part of vector 2-norm */
-    for ( i = 0; i < n; ++i )
-    {
-        sum_l += SQR( v[i] );
-    }
-
-    ret = MPI_Allreduce( &sum_l, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, comm );
-    Check_MPI_Error( ret, __FILE__, __LINE__ );
-
-    return SQRT( norm_sqr );
-}
-
-
 real Parallel_Dot( real const * const v1, real const * const v2,
         const int n, MPI_Comm comm )
 {
@@ -781,26 +761,6 @@ real Parallel_Dot( real const * const v1, real const * const v2,
 }
 
 
-real Parallel_Vector_Acc( real const * const v, const int n,
-        MPI_Comm comm )
-{
-    int i, ret;
-    real my_acc, res;
-
-    /* compute local part of vector element-wise sum */
-    my_acc = 0.0;
-    for ( i = 0; i < n; ++i )
-    {
-        my_acc += v[i];
-    }
-
-    ret = MPI_Allreduce( &my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, comm );
-    Check_MPI_Error( ret, __FILE__, __LINE__ );
-
-    return res;
-}
-
-
 /*****************************************************************************/
 #if defined(TEST_FORCES)
 void Coll_ids_at_Master( reax_system *system, storage *workspace,
diff --git a/PG-PuReMD/src/basic_comm.h b/PG-PuReMD/src/basic_comm.h
index 392dddc5e6fdba865eb4a9914c8fa2df8b3459b6..d11c7cac416d28eaa569c74015b5adff2bb0d80d 100644
--- a/PG-PuReMD/src/basic_comm.h
+++ b/PG-PuReMD/src/basic_comm.h
@@ -50,12 +50,8 @@ void Coll( reax_system const * const, mpi_datatypes * const,
 void Coll_FS( reax_system const * const, mpi_datatypes * const,
         void * const , int, MPI_Datatype );
 
-real Parallel_Norm( const real * const, const int, MPI_Comm );
-
 real Parallel_Dot( const real * const, const real * const, const int, MPI_Comm );
 
-real Parallel_Vector_Acc( const real * const, const int, MPI_Comm );
-
 #if defined(TEST_FORCES)
 void Coll_ids_at_Master( reax_system*, storage*, mpi_datatypes* );
 
diff --git a/PG-PuReMD/src/charges.c b/PG-PuReMD/src/charges.c
index 7379596ffeaa83061704997a0136e746b75aff0d..a0b1c0252619195ec94c28fcd314671ca9b0c1c4 100644
--- a/PG-PuReMD/src/charges.c
+++ b/PG-PuReMD/src/charges.c
@@ -250,31 +250,36 @@ static void Compute_Preconditioner_QEq( reax_system const * const system,
         mpi_datatypes const * const mpi_data )
 {
     int i;
+#if defined(LOG_PERFORMANCE)
+    real time;
+#endif
 #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
     int ret;
-    real t_pc;
 #endif
 
     if ( control->cm_solver_pre_comp_type == JACOBI_PC )
     {
-        for ( i = 0; i < system->n; ++i )
-        {
-            workspace->Hdia_inv[i] = 1.0 / system->reax_param.sbp[ system->my_atoms[i].type ].eta;
-        }
+#if defined(LOG_PERFORMANCE)
+        time = Get_Time( );
+#endif
+
+        jacobi( &workspace->H, workspace->Hdia_inv );
     }
     else if ( control->cm_solver_pre_comp_type == SAI_PC )
     {
 #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
-        t_pc = sparse_approx_inverse( system, data, workspace, mpi_data,
+        time = sparse_approx_inverse( system, data, workspace, mpi_data,
                 &workspace->H, &workspace->H_spar_patt, &workspace->H_app_inv,
                 control->nprocs );
-
-        data->timing.cm_solver_pre_comp += t_pc;
 #else
         fprintf( stderr, "[ERROR] LAPACKE support disabled. Re-compile before enabling. Terminating...\n" );
         exit( INVALID_INPUT );
 #endif
     }
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_pre_comp );
+#endif
 }
 
 
@@ -399,11 +404,12 @@ static void QEq( reax_system const * const system, control_params const * const
         output_controls const * const out_control,
         mpi_datatypes * const mpi_data )
 {
-    int iters;
+    int iters, refactor;
 
     iters = 0;
+    refactor = is_refactoring_step( control, data );
 
-    if ( is_refactoring_step( control, data ) == TRUE )
+    if ( refactor == TRUE )
     {
         Setup_Preconditioner_QEq( system, control, data, workspace, mpi_data );
 
@@ -450,71 +456,65 @@ static void QEq( reax_system const * const system, control_params const * const
     case CG_S:
 #if defined(DUAL_SOLVER)
         iters = dual_CG( system, control, data, workspace, &workspace->H, workspace->b,
-                control->cm_solver_q_err, workspace->x, mpi_data );
+                control->cm_solver_q_err, workspace->x, mpi_data, refactor );
 #else
         iters = CG( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
 
         iters += CG( system, control, data, workspace, &workspace->H, workspace->b_t,
-                control->cm_solver_q_err, workspace->t, mpi_data );
+                control->cm_solver_q_err, workspace->t, mpi_data, FALSE );
 #endif
         break;
 
     case SDM_S:
 #if defined(DUAL_SOLVER)
-        fprintf( stderr, "[ERROR] Dual SDM solver for QEq not yet implemented. Terminating...\n" );
-        exit( INVALID_INPUT );
-//        iters = dual_SDM( system, control, data, workspace, &workspace->H, workspace->b,
-//                control->cm_solver_q_err, workspace->x, mpi_data );
+        iters = dual_SDM( system, control, data, workspace, &workspace->H, workspace->b,
+                control->cm_solver_q_err, workspace->x, mpi_data, refactor );
 #else
         iters = SDM( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
 
         iters += SDM( system, control, data, workspace, &workspace->H, workspace->b_t,
-                control->cm_solver_q_err, workspace->t, mpi_data );
+                control->cm_solver_q_err, workspace->t, mpi_data, FALSE );
 #endif
         break;
 
     case BiCGStab_S:
 #if defined(DUAL_SOLVER)
-        fprintf( stderr, "[ERROR] Dual BiCGStab solver for QEq not yet implemented. Terminating...\n" );
-        exit( INVALID_INPUT );
-//        iters = dual_BiCGStab( system, control, data, workspace, &workspace->H, workspace->b,
-//                control->cm_solver_q_err, workspace->x, mpi_data );
+        iters = dual_BiCGStab( system, control, data, workspace, &workspace->H, workspace->b,
+                control->cm_solver_q_err, workspace->x, mpi_data, refactor );
 #else
         iters = BiCGStab( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
 
         iters += BiCGStab( system, control, data, workspace, &workspace->H, workspace->b_t,
-                control->cm_solver_q_err, workspace->t, mpi_data );
+                control->cm_solver_q_err, workspace->t, mpi_data, FALSE );
 #endif
         break;
 
     case PIPECG_S:
 #if defined(DUAL_SOLVER)
         iters = dual_PIPECG( system, control, data, workspace, &workspace->H, workspace->b,
-                control->cm_solver_q_err, workspace->x, mpi_data );
+                control->cm_solver_q_err, workspace->x, mpi_data, refactor );
 #else
         iters = PIPECG( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
 
         iters += PIPECG( system, control, data, workspace, &workspace->H, workspace->b_t,
-                control->cm_solver_q_err, workspace->t, mpi_data );
+                control->cm_solver_q_err, workspace->t, mpi_data, FALSE );
 #endif
         break;
 
     case PIPECR_S:
 #if defined(DUAL_SOLVER)
-        fprintf( stderr, "[ERROR] Dual PIPECR solver for QEq not yet implemented. Terminating...\n" );
-        exit( INVALID_INPUT );
-//        iters = dual_PIPECR( system, control, data, workspace, &workspace->H, workspace->b,
-//                control->cm_solver_q_err, workspace->x, mpi_data );
+        iters = dual_PIPECR( system, control, data, workspace, &workspace->H, workspace->b,
+                control->cm_solver_q_err, workspace->x, mpi_data, refactor );
 #else
         iters = PIPECR( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
 
         iters += PIPECR( system, control, data, workspace, &workspace->H, workspace->b_t,
-                control->cm_solver_q_err, workspace->t, mpi_data );
+                control->cm_solver_q_err, workspace->t, mpi_data, FALSE );
 #endif
         break;
 
@@ -544,11 +544,12 @@ static void EE( reax_system const * const system, control_params const * const c
         output_controls const * const out_control,
         mpi_datatypes * const mpi_data )
 {
-    int iters;
+    int iters, refactor;
 
     iters = 0;
+    refactor = is_refactoring_step( control, data );
 
-    if ( is_refactoring_step( control, data ) == TRUE )
+    if ( refactor == TRUE )
     {
         Setup_Preconditioner_EE( system, control, data, workspace, mpi_data );
 
@@ -594,27 +595,27 @@ static void EE( reax_system const * const system, control_params const * const c
 
     case CG_S:
         iters = CG( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     case SDM_S:
         iters = SDM( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     case BiCGStab_S:
         iters = BiCGStab( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     case PIPECG_S:
         iters = PIPECG( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     case PIPECR_S:
         iters = PIPECR( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     default:
@@ -643,11 +644,12 @@ static void ACKS2( reax_system const * const system, control_params const * cons
         output_controls const * const out_control,
         mpi_datatypes * const mpi_data )
 {
-    int iters;
+    int iters, refactor;
 
     iters = 0;
+    refactor = is_refactoring_step( control, data );
 
-    if ( is_refactoring_step( control, data ) == TRUE )
+    if ( refactor == TRUE )
     {
         Setup_Preconditioner_ACKS2( system, control, data, workspace, mpi_data );
 
@@ -693,27 +695,27 @@ static void ACKS2( reax_system const * const system, control_params const * cons
 
     case CG_S:
         iters = CG( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     case SDM_S:
         iters = SDM( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     case BiCGStab_S:
         iters = BiCGStab( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     case PIPECG_S:
         iters = PIPECG( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     case PIPECR_S:
         iters = PIPECR( system, control, data, workspace, &workspace->H, workspace->b_s,
-                control->cm_solver_q_err, workspace->s, mpi_data );
+                control->cm_solver_q_err, workspace->s, mpi_data, refactor );
         break;
 
     default:
diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c
index 1fa0671107bbc426c692778b66c1f7e8aa3dbe70..fd7bd3d88e7fa0dc363e62bb0dc9cf830eb4f82f 100644
--- a/PG-PuReMD/src/init_md.c
+++ b/PG-PuReMD/src/init_md.c
@@ -881,10 +881,12 @@ static void Finalize_Workspace( reax_system * const system, control_params * con
             sfree( workspace->d, "Finalize_Workspace::workspace->d" );
             sfree( workspace->q, "Finalize_Workspace::workspace->q" );
             sfree( workspace->p, "Finalize_Workspace::workspace->p" );
+#if defined(DUAL_SOLVER)
             sfree( workspace->r2, "Finalize_Workspace::workspace->r2" );
             sfree( workspace->d2, "Finalize_Workspace::workspace->d2" );
             sfree( workspace->q2, "Finalize_Workspace::workspace->q2" );
             sfree( workspace->p2, "Finalize_Workspace::workspace->p2" );
+#endif
             break;
 
         case SDM_S:
@@ -892,10 +894,12 @@ static void Finalize_Workspace( reax_system * const system, control_params * con
             sfree( workspace->d, "Finalize_Workspace::workspace->d" );
             sfree( workspace->q, "Finalize_Workspace::workspace->q" );
             sfree( workspace->p, "Finalize_Workspace::workspace->p" );
+#if defined(DUAL_SOLVER)
             sfree( workspace->r2, "Finalize_Workspace::workspace->r2" );
             sfree( workspace->d2, "Finalize_Workspace::workspace->d2" );
             sfree( workspace->q2, "Finalize_Workspace::workspace->q2" );
             sfree( workspace->p2, "Finalize_Workspace::workspace->p2" );
+#endif
             break;
 
         case BiCGStab_S:
@@ -908,6 +912,17 @@ static void Finalize_Workspace( reax_system * const system, control_params * con
             sfree( workspace->p, "Finalize_Workspace::workspace->p" );
             sfree( workspace->r_hat, "Finalize_Workspace::workspace->r_hat" );
             sfree( workspace->q_hat, "Finalize_Workspace::workspace->q_hat" );
+#if defined(DUAL_SOLVER)
+            sfree( workspace->y2, "Finalize_Workspace::workspace->y2" );
+            sfree( workspace->g2, "Finalize_Workspace::workspace->g2" );
+            sfree( workspace->z2, "Finalize_Workspace::workspace->z2" );
+            sfree( workspace->r2, "Finalize_Workspace::workspace->r2" );
+            sfree( workspace->d2, "Finalize_Workspace::workspace->d2" );
+            sfree( workspace->q2, "Finalize_Workspace::workspace->q2" );
+            sfree( workspace->p2, "Finalize_Workspace::workspace->p2" );
+            sfree( workspace->r_hat2, "Finalize_Workspace::workspace->r_hat2" );
+            sfree( workspace->q_hat2, "Finalize_Workspace::workspace->q_hat2" );
+#endif
             break;
 
         case PIPECG_S:
@@ -920,6 +935,7 @@ static void Finalize_Workspace( reax_system * const system, control_params * con
             sfree( workspace->n, "Finalize_Workspace::workspace->n" );
             sfree( workspace->u, "Finalize_Workspace::workspace->u" );
             sfree( workspace->w, "Finalize_Workspace::workspace->w" );
+#if defined(DUAL_SOLVER)
             sfree( workspace->z2, "Finalize_Workspace::workspace->z2" );
             sfree( workspace->r2, "Finalize_Workspace::workspace->r2" );
             sfree( workspace->d2, "Finalize_Workspace::workspace->d2" );
@@ -929,6 +945,7 @@ static void Finalize_Workspace( reax_system * const system, control_params * con
             sfree( workspace->n2, "Finalize_Workspace::workspace->n2" );
             sfree( workspace->u2, "Finalize_Workspace::workspace->u2" );
             sfree( workspace->w2, "Finalize_Workspace::workspace->w2" );
+#endif
             break;
 
         case PIPECR_S:
@@ -941,6 +958,17 @@ static void Finalize_Workspace( reax_system * const system, control_params * con
             sfree( workspace->n, "Finalize_Workspace::workspace->n" );
             sfree( workspace->u, "Finalize_Workspace::workspace->u" );
             sfree( workspace->w, "Finalize_Workspace::workspace->w" );
+#if defined(DUAL_SOLVER)
+            sfree( workspace->z2, "Finalize_Workspace::workspace->z2" );
+            sfree( workspace->r2, "Finalize_Workspace::workspace->r2" );
+            sfree( workspace->d2, "Finalize_Workspace::workspace->d2" );
+            sfree( workspace->q2, "Finalize_Workspace::workspace->q2" );
+            sfree( workspace->p2, "Finalize_Workspace::workspace->p2" );
+            sfree( workspace->m2, "Finalize_Workspace::workspace->m2" );
+            sfree( workspace->n2, "Finalize_Workspace::workspace->n2" );
+            sfree( workspace->u2, "Finalize_Workspace::workspace->u2" );
+            sfree( workspace->w2, "Finalize_Workspace::workspace->w2" );
+#endif
             break;
 
         default:
diff --git a/PG-PuReMD/src/lin_alg.c b/PG-PuReMD/src/lin_alg.c
index ba5bef3b7e45e1ae4200381b0953c4ee50f25399..35b3ef00f4c80c54df81397bf3f61989b1fd83f4 100644
--- a/PG-PuReMD/src/lin_alg.c
+++ b/PG-PuReMD/src/lin_alg.c
@@ -175,22 +175,70 @@ static int find_bucket( double *list, int len, double a )
 }
 
 
-/* Jacobi preconditioner computation */
-//real jacobi( const sparse_matrix * const H, real * const Hdia_inv )
-void jacobi( const reax_system * const system, real * const Hdia_inv )
+/* Compute diagonal inverese (Jacobi) preconditioner
+ *
+ * H: matrix used to compute preconditioner, in CSR format
+ * Hdia_inv: computed diagonal inverse preconditioner
+ */
+void jacobi( sparse_matrix const * const H, real * const Hdia_inv )
+{
+    unsigned int i, pj;
+
+    if ( H->format == SYM_HALF_MATRIX )
+    {
+        for ( i = 0; i < H->n; ++i )
+        {
+            if ( FABS( H->val[H->start[i]] ) > 1.0e-15 )
+            {
+                Hdia_inv[i] = 1.0 / H->val[H->start[i]];
+            }
+            else
+            {
+                Hdia_inv[i] = 1.0;
+            }
+        }
+    }
+    else if ( H->format == SYM_FULL_MATRIX || H->format == FULL_MATRIX )
+    {
+        for ( i = 0; i < H->n; ++i )
+        {
+            for ( pj = H->start[i]; pj < H->start[i + 1]; ++pj )
+            {
+                if ( H->j[pj] == i )
+                {
+                    if ( FABS( H->val[H->start[i]] ) > 1.0e-15 )
+                    {
+                        Hdia_inv[i] = 1.0 / H->val[pj];
+                    }
+                    else
+                    {
+                        Hdia_inv[i] = 1.0;
+                    }
+
+                    break;
+                }
+            }
+        }
+    }
+}
+
+
+/* Apply diagonal inverse (Jacobi) preconditioner to system residual
+ *
+ * Hdia_inv: diagonal inverse preconditioner (constructed using H)
+ * y: current residuals
+ * x: preconditioned residuals
+ * N: dimensions of preconditioner and vectors (# rows in H)
+ */
+static void dual_jacobi_app( const real * const Hdia_inv, const rvec2 * const y,
+        rvec2 * const x, const int N )
 {
     unsigned int i;
 
-    for ( i = 0; i < system->n; ++i )
+    for ( i = 0; i < N; ++i )
     {
-//        if ( FABS( H->val[H->start[i + 1] - 1] ) > 1.0e-15 )
-//        {
-        Hdia_inv[i] = 1.0 / system->reax_param.sbp[ system->my_atoms[i].type ].eta;
-//        }
-//        else
-//        {
-//            Hdia_inv[i] = 1.0;
-//        }
+        x[i][0] = y[i][0] * Hdia_inv[i];
+        x[i][1] = y[i][1] * Hdia_inv[i];
     }
 }
 
@@ -221,7 +269,7 @@ static void jacobi_app( const real * const Hdia_inv, const real * const y,
  * b (output): two dense vectors
  * N: number of entries in both vectors in b (must be equal)
  */
-static void dual_Sparse_MatVec_local( sparse_matrix const * const A,
+static void Dual_Sparse_MatVec_local( sparse_matrix const * const A,
         rvec2 const * const x, rvec2 * const b, int N )
 {
     int i, j, k, si, num_rows;
@@ -334,6 +382,26 @@ static void dual_Sparse_MatVec_local( sparse_matrix const * const A,
 }
 
 
+/* Communications for sparse matrix-dense vector multiplication Ax = b
+ *
+ * system:
+ * control: 
+ * mpi_data:
+ * x: dense vector
+ * buf_type: data structure type for x
+ * mpi_type: MPI_Datatype struct for communications
+ *
+ * returns: communication time
+ */
+static void Sparse_MatVec_Comm_Part1( const reax_system * const system,
+        const control_params * const control, mpi_datatypes * const mpi_data,
+        void const * const x, int buf_type, MPI_Datatype mpi_type )
+{
+    /* exploit 3D domain decomposition of simulation space with 3-stage communication pattern */
+    Dist( system, mpi_data, x, buf_type, mpi_type );
+}
+
+
 /* Local arithmetic portion of sparse matrix-dense vector multiplication Ax = b
  *
  * A: sparse matrix, 1D partitioned row-wise
@@ -444,26 +512,6 @@ static void Sparse_MatVec_local( sparse_matrix const * const A,
 }
 
 
-/* Communications for sparse matrix-dense vector multiplication Ax = b
- *
- * system:
- * control: 
- * mpi_data:
- * x: dense vector
- * buf_type: data structure type for x
- * mpi_type: MPI_Datatype struct for communications
- *
- * returns: communication time
- */
-static void Sparse_MatVec_Comm_Part1( const reax_system * const system,
-        const control_params * const control, mpi_datatypes * const mpi_data,
-        void const * const x, int buf_type, MPI_Datatype mpi_type )
-{
-    /* exploit 3D domain decomposition of simulation space with 3-stage communication pattern */
-    Dist( system, mpi_data, x, buf_type, mpi_type );
-}
-
-
 /* Communications for sparse matrix-dense vector multiplication Ax = b
  *
  * system:
@@ -493,6 +541,90 @@ static void Sparse_MatVec_Comm_Part2( const reax_system * const system,
 }
 
 
+/* sparse matrix, dense vector multiplication AX = B
+ *
+ * system:
+ * control:
+ * data:
+ * A: symmetric matrix, stored in CSR format
+ * X: dense vector
+ * n: number of entries in x
+ * B (output): dense vector */
+static void Dual_Sparse_MatVec( reax_system const * const system,
+        control_params const * const control, simulation_data * const data,
+        mpi_datatypes * const mpi_data, sparse_matrix const * const A,
+        rvec2 const * const x, int n, rvec2 * const b )
+{
+#if defined(LOG_PERFORMANCE)
+    real time;
+
+    time = Get_Time( );
+#endif
+
+    Sparse_MatVec_Comm_Part1( system, control, mpi_data, x,
+            RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+#endif
+
+    Dual_Sparse_MatVec_local( A, x, b, n );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+#endif
+
+    Sparse_MatVec_Comm_Part2( system, control, mpi_data, A->format, b,
+            RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+#endif
+}
+
+
+/* sparse matrix, dense vector multiplication Ax = b
+ *
+ * system:
+ * control:
+ * data:
+ * A: symmetric matrix, stored in CSR format
+ * x: dense vector
+ * n: number of entries in x
+ * b (output): dense vector */
+static void Sparse_MatVec( reax_system const * const system,
+        control_params const * const control, simulation_data * const data,
+        mpi_datatypes * const mpi_data, sparse_matrix const * const A,
+        real const * const x, int n, real * const b )
+{
+#if defined(LOG_PERFORMANCE)
+    real time;
+
+    time = Get_Time( );
+#endif
+
+    Sparse_MatVec_Comm_Part1( system, control, mpi_data, x,
+            REAL_PTR_TYPE, MPI_DOUBLE );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+#endif
+
+    Sparse_MatVec_local( A, x, b, n );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+#endif
+
+    Sparse_MatVec_Comm_Part2( system, control, mpi_data, A->format, b,
+            REAL_PTR_TYPE, MPI_DOUBLE );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+#endif
+}
+
+
 void setup_sparse_approx_inverse( reax_system const * const system,
         simulation_data * const data,
         storage * const workspace, mpi_datatypes * const mpi_data,
@@ -1833,25 +1965,35 @@ real sparse_approx_inverse( reax_system const * const system,
 #endif
 
 
-static void apply_preconditioner( const reax_system * const system,
-        const storage * const workspace, 
-        const control_params * const control,
-        mpi_datatypes * const  mpi_data,
-        const real * const y, real * const x,
-        const int fresh_pre, const int side )
+/* Apply left-sided preconditioning while solving M^{-1}AX = M^{-1}B
+ *
+ * system:
+ * workspace: data struct containing matrices and vectors, stored in CSR
+ * control: data struct containing parameters
+ * data: struct containing timing simulation data (including performance data)
+ * y: vector to which to apply preconditioning,
+ *  specific to internals of iterative solver being used
+ * x (output): preconditioned vector
+ * fresh_pre: parameter indicating if this is a newly computed (fresh) preconditioner
+ * side: used in determining how to apply preconditioner if the preconditioner is
+ *  factorized as M = M_{1}M_{2} (e.g., incomplete LU, A \approx LU)
+ *
+ * Assumptions:
+ *   Matrices have non-zero diagonals
+ *   Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */
+static void dual_apply_preconditioner( reax_system const * const system,
+        storage const * const workspace, control_params const * const control,
+        simulation_data * const data, mpi_datatypes * const  mpi_data,
+        rvec2 const * const y, rvec2 * const x, int fresh_pre, int side )
 {
 //    int i, si;
-    real t_start, t_pa, t_comm;
-
-    t_pa = 0.0;
-    t_comm = 0.0;
 
     /* no preconditioning */
     if ( control->cm_solver_pre_comp_type == NONE_PC )
     {
         if ( x != y )
         {
-            Vector_Copy( x, y, system->n );
+            Vector_Copy_rvec2( x, y, system->n );
         }
     }
     else
@@ -1865,7 +2007,7 @@ static void apply_preconditioner( const reax_system * const system,
                         switch ( control->cm_solver_pre_comp_type )
                         {
                             case JACOBI_PC:
-                                jacobi_app( workspace->Hdia_inv, y, x, system->n );
+                                dual_jacobi_app( workspace->Hdia_inv, y, x, system->n );
                                 break;
 //                            case ICHOLT_PC:
 //                            case ILUT_PC:
@@ -1873,18 +2015,13 @@ static void apply_preconditioner( const reax_system * const system,
 //                                  tri_solve( workspace->L, y, x, workspace->L->n, LOWER );
 //                                  break;
                             case SAI_PC:
-                                Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                                        y, REAL_PTR_TYPE, MPI_DOUBLE );
-                                
-                                t_start = Get_Time( );
 #if defined(NEUTRAL_TERRITORY)
-                                Sparse_MatVec_local( &workspace->H_app_inv, y, x, H->NT );
+                                Dual_Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
+                                        y, H->NT, x );
 #else
-                                Sparse_MatVec_local( &workspace->H_app_inv, y, x, system->n );
+                                Dual_Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
+                                        y, system->n, x );
 #endif
-                                t_pa += Get_Time( ) - t_start;
-
-                                /* no comm part2 because x is only local portion */
                                 break;
                             default:
                                 fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
@@ -1896,7 +2033,7 @@ static void apply_preconditioner( const reax_system * const system,
                         switch ( control->cm_solver_pre_comp_type )
                         {
                             case JACOBI_PC:
-                                jacobi_app( workspace->Hdia_inv, y, x, system->n );
+                                dual_jacobi_app( workspace->Hdia_inv, y, x, system->n );
                                 break;
 //                            case ICHOLT_PC:
 //                            case ILUT_PC:
@@ -1905,18 +2042,13 @@ static void apply_preconditioner( const reax_system * const system,
 //                                          workspace->L, y, x, workspace->L->n, LOWER, fresh_pre );
 //                                  break;
                             case SAI_PC:
-                                Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                                        y, REAL_PTR_TYPE, MPI_DOUBLE );
-                                
-                                t_start = Get_Time( );
 #if defined(NEUTRAL_TERRITORY)
-                                Sparse_MatVec_local( &workspace->H_app_inv, y, x, H->NT );
+                                Dual_Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
+                                        y, H->NT, x );
 #else
-                                Sparse_MatVec_local( &workspace->H_app_inv, y, x, system->n );
+                                Dual_Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
+                                        y, system->n, x );
 #endif
-                                t_pa += Get_Time( ) - t_start;
-
-                                /* no comm part2 because x is only local portion */
                                 break;
                             default:
                                 fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
@@ -1998,7 +2130,7 @@ static void apply_preconditioner( const reax_system * const system,
                             case SAI_PC:
                                 if ( x != y )
                                 {
-                                    Vector_Copy( x, y, system->n );
+                                    Vector_Copy_rvec2( x, y, system->n );
                                 }
                                 break;
 //                            case ICHOLT_PC:
@@ -2019,7 +2151,7 @@ static void apply_preconditioner( const reax_system * const system,
                             case SAI_PC:
                                 if ( x != y )
                                 {
-                                    Vector_Copy( x, y, system->n );
+                                    Vector_Copy_rvec2( x, y, system->n );
                                 }
                                 break;
 //                            case ICHOLT_PC:
@@ -2096,200 +2228,410 @@ static void apply_preconditioner( const reax_system * const system,
 }
 
 
-/* Steepest Descent */
-int SDM( reax_system const * const system, control_params const * const control,
-        simulation_data * const data, storage * const workspace,
-        sparse_matrix * const H, real * const b, real tol,
-        real * const x, mpi_datatypes * const  mpi_data )
+/* Apply left-sided preconditioning while solving M^{-1}Ax = M^{-1}b
+ *
+ * system:
+ * workspace: data struct containing matrices and vectors, stored in CSR
+ * control: data struct containing parameters
+ * data: struct containing timing simulation data (including performance data)
+ * y: vector to which to apply preconditioning,
+ *  specific to internals of iterative solver being used
+ * x (output): preconditioned vector
+ * fresh_pre: parameter indicating if this is a newly computed (fresh) preconditioner
+ * side: used in determining how to apply preconditioner if the preconditioner is
+ *  factorized as M = M_{1}M_{2} (e.g., incomplete LU, A \approx LU)
+ *
+ * Assumptions:
+ *   Matrices have non-zero diagonals
+ *   Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */
+static void apply_preconditioner( reax_system const * const system,
+        storage const * const workspace, control_params const * const control,
+        simulation_data * const data, mpi_datatypes * const  mpi_data,
+        real const * const y, real * const x, int fresh_pre, int side )
 {
-    int i, j, ret;
-    real tmp, alpha, bnorm, sig;
-    real redux[2];
-#if defined(LOG_PERFORMANCE)
-    real time;
-
-    time = Get_Time( );
-#endif
-
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            x, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-
-#if defined(NEUTRAL_TERRITORY)
-    Sparse_MatVec_local( H, x, workspace->q, H->NT );
-#else
-    Sparse_MatVec_local( H, x, workspace->q, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
-#endif
-
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-
-    Vector_Sum( workspace->r, 1.0,  b, -1.0, workspace->q, system->n );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
-#endif
+//    int i, si;
 
-    /* pre-conditioning */
+    /* no preconditioning */
     if ( control->cm_solver_pre_comp_type == NONE_PC )
     {
-        Vector_Copy( workspace->d, workspace->r, system->n );
-    }
-    else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-    {
-        for ( j = 0; j < system->n; ++j )
+        if ( x != y )
         {
-            workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
+            Vector_Copy( x, y, system->n );
         }
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
     }
-    else if ( control->cm_solver_pre_comp_type == SAI_PC )
+    else
     {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->r, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-        
+        switch ( side )
+        {
+            case LEFT:
+                switch ( control->cm_solver_pre_app_type )
+                {
+                    case TRI_SOLVE_PA:
+                        switch ( control->cm_solver_pre_comp_type )
+                        {
+                            case JACOBI_PC:
+                                jacobi_app( workspace->Hdia_inv, y, x, system->n );
+                                break;
+//                            case ICHOLT_PC:
+//                            case ILUT_PC:
+//                            case ILUTP_PC:
+//                                  tri_solve( workspace->L, y, x, workspace->L->n, LOWER );
+//                                  break;
+                            case SAI_PC:
 #if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->d, H->NT );
+                                Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
+                                        y, H->NT, x );
 #else
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->d, system->n );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+                                Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
+                                        y, system->n, x );
 #endif
-
-        /* no comm part2 because d is only local portion */
-    }
-
-    redux[0] = Dot_local( b, b, system->n );
-    redux[1] = Dot_local( workspace->r, workspace->d, system->n );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+                                break;
+                            default:
+                                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+                        }
+                        break;
+                    case TRI_SOLVE_LEVEL_SCHED_PA:
+                        switch ( control->cm_solver_pre_comp_type )
+                        {
+                            case JACOBI_PC:
+                                jacobi_app( workspace->Hdia_inv, y, x, system->n );
+                                break;
+//                            case ICHOLT_PC:
+//                            case ILUT_PC:
+//                            case ILUTP_PC:
+//                                  tri_solve_level_sched( (static_storage *) workspace,
+//                                          workspace->L, y, x, workspace->L->n, LOWER, fresh_pre );
+//                                  break;
+                            case SAI_PC:
+#if defined(NEUTRAL_TERRITORY)
+                                Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
+                                        y, H->NT, x );
+#else
+                                Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
+                                        y, system->n, x );
 #endif
+                                break;
+                            default:
+                                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+                        }
+                        break;
+                    case TRI_SOLVE_GC_PA:
+                        switch ( control->cm_solver_pre_comp_type )
+                        {
+                            case JACOBI_PC:
+                            case SAI_PC:
+                                fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+//                            case ICHOLT_PC:
+//                            case ILUT_PC:
+//                            case ILUTP_PC:
+//                                  for ( i = 0; i < workspace->H->n; ++i )
+//                                  {
+//                                      workspace->y_p[i] = y[i];
+//                                  }
+//
+//                                  permute_vector( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER );
+//                                  tri_solve_level_sched( (static_storage *) workspace,
+//                                  workspace->L, workspace->y_p, x, workspace->L->n, LOWER, fresh_pre );
+//                                  break;
+                            default:
+                                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+                        }
+                        break;
+                    case JACOBI_ITER_PA:
+                        switch ( control->cm_solver_pre_comp_type )
+                        {
+                            case JACOBI_PC:
+                            case SAI_PC:
+                                fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+//                            case ICHOLT_PC:
+//                            case ILUT_PC:
+//                            case ILUTP_PC:
+//                                // construct D^{-1}_L
+//                                if ( fresh_pre == TRUE )
+//                                {
+//                                    for ( i = 0; i < workspace->L->n; ++i )
+//                                    {
+//                                        si = workspace->L->start[i + 1] - 1;
+//                                        workspace->Dinv_L[i] = 1.0 / workspace->L->val[si];
+//                                    }
+//                                }
+//
+//                                jacobi_iter( workspace, workspace->L, workspace->Dinv_L,
+//                                        y, x, LOWER, control->cm_solver_pre_app_jacobi_iters );
+//                                break;
+                            default:
+                                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+                        }
+                        break;
+                    default:
+                        fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                        exit( INVALID_INPUT );
+                        break;
 
-    ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
-            MPI_SUM, MPI_COMM_WORLD );
-    Check_MPI_Error( ret, __FILE__, __LINE__ );
-    bnorm = SQRT( redux[0] );
-    sig = redux[1];
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
-#endif
+                }
+                break;
+
+            case RIGHT:
+                switch ( control->cm_solver_pre_app_type )
+                {
+                    case TRI_SOLVE_PA:
+                        switch ( control->cm_solver_pre_comp_type )
+                        {
+                            case JACOBI_PC:
+                            case SAI_PC:
+                                if ( x != y )
+                                {
+                                    Vector_Copy( x, y, system->n );
+                                }
+                                break;
+//                            case ICHOLT_PC:
+//                            case ILUT_PC:
+//                            case ILUTP_PC:
+//                                  tri_solve( workspace->U, y, x, workspace->U->n, UPPER );
+//                                  break;
+                            default:
+                                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+                        }
+                        break;
+                    case TRI_SOLVE_LEVEL_SCHED_PA:
+                        switch ( control->cm_solver_pre_comp_type )
+                        {
+                            case JACOBI_PC:
+                            case SAI_PC:
+                                if ( x != y )
+                                {
+                                    Vector_Copy( x, y, system->n );
+                                }
+                                break;
+//                            case ICHOLT_PC:
+//                            case ILUT_PC:
+//                            case ILUTP_PC:
+//                                  tri_solve_level_sched( (static_storage *) workspace,
+//                                          workspace->U, y, x, workspace->U->n, UPPER, fresh_pre );
+//                                  break;
+                            default:
+                                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+                        }
+                        break;
+                    case TRI_SOLVE_GC_PA:
+                        switch ( control->cm_solver_pre_comp_type )
+                        {
+                            case JACOBI_PC:
+                            case SAI_PC:
+                                fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+//                            case ICHOLT_PC:
+//                            case ILUT_PC:
+//                            case ILUTP_PC:
+//                                  tri_solve_level_sched( (static_storage *) workspace,
+//                                  workspace->U, y, x, workspace->U->n, UPPER, fresh_pre );
+//                                  permute_vector( workspace, x, workspace->H->n, TRUE, UPPER );
+//                                  break;
+                            default:
+                                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+                        }
+                        break;
+                    case JACOBI_ITER_PA:
+                        switch ( control->cm_solver_pre_comp_type )
+                        {
+                            case JACOBI_PC:
+                            case SAI_PC:
+                                fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+//                            case ICHOLT_PC:
+//                            case ILUT_PC:
+//                            case ILUTP_PC:
+//                                  if ( fresh_pre == TRUE )
+//                                  {
+//                                      for ( i = 0; i < workspace->U->n; ++i )
+//                                      {
+//                                          si = workspace->U->start[i];
+//                                          workspace->Dinv_U[i] = 1.0 / workspace->U->val[si];
+//                                      }
+//                                  }
+//
+//                                  jacobi_iter( workspace, workspace->U, workspace->Dinv_U,
+//                                          y, x, UPPER, control->cm_solver_pre_app_jacobi_iters );
+//                                  break;
+                            default:
+                                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                                exit( INVALID_INPUT );
+                                break;
+                        }
+                        break;
+                    default:
+                        fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                        exit( INVALID_INPUT );
+                        break;
+
+                }
+                break;
+        }
+    }
+}
 
-    for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / bnorm > tol; ++i )
-    {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->d, REAL_PTR_TYPE, MPI_DOUBLE );
 
+/* Steepest Descent 
+ * This function performs dual iteration for QEq (2 simultaneous solves)
+ * */
+int dual_SDM( reax_system const * const system, control_params const * const control,
+        simulation_data * const data, storage * const workspace,
+        sparse_matrix * const H, rvec2 * const b, real tol,
+        rvec2 * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
+{
+    int i, j, ret;
+    rvec2 tmp, alpha, bnorm, sig;
+    real redux[4];
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    real time;
 #endif
 
 #if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( H, workspace->d, workspace->q, H->NT );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            H->NT, workspace->q2 );
 #else
-        Sparse_MatVec_local( H, workspace->d, workspace->q, system->N );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            system->N, workspace->q2 );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    time = Get_Time( );
 #endif
 
-        Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-                H->format, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE );
+    Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, b,
+            -1.0, -1.0, workspace->q2, system->n );
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-        redux[0] = Dot_local( workspace->r, workspace->d, system->n );
-        redux[1] = Dot_local( workspace->d, workspace->q, system->n );
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r2,
+            workspace->q2, fresh_pre, LEFT );
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q2,
+            workspace->d2, fresh_pre, RIGHT );
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+    time = Get_Time( );
 #endif
 
-        ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
-                MPI_SUM, MPI_COMM_WORLD );
-        Check_MPI_Error( ret, __FILE__, __LINE__ );
+    Dot_local_rvec2( b, b, system->n, &redux[0], &redux[1] );
+    Dot_local_rvec2( workspace->r2, workspace->d2, system->n, &redux[2], &redux[3] );
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-        sig = redux[0];
-        tmp = redux[1];
-        alpha = sig / tmp;
-        Vector_Add( x, alpha, workspace->d, system->n );
-        Vector_Add( workspace->r, -1.0 * alpha, workspace->q, system->n );
+    ret = MPI_Allreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE,
+            MPI_SUM, MPI_COMM_WORLD );
+    Check_MPI_Error( ret, __FILE__, __LINE__ );
+    bnorm[0] = SQRT( redux[0] );
+    bnorm[1] = SQRT( redux[1] );
+    sig[0] = redux[2];
+    sig[1] = redux[3];
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+    Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
-        /* pre-conditioning */
-        if ( control->cm_solver_pre_comp_type == NONE_PC )
+    for ( i = 0; i < control->cm_solver_max_iters; ++i )
+    {
+        if ( SQRT(sig[0]) / bnorm[0] <= tol || SQRT(sig[1]) / bnorm[1] <= tol )
         {
-            Vector_Copy( workspace->d, workspace->r, system->n );
+            break;
         }
-        else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-        {
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
-            }
+
+#if defined(NEUTRAL_TERRITORY)
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->d2,
+                H->NT, workspace->q2 );
+#else
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->d2,
+                system->N, workspace->q2 );
+#endif
 
 #if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        time = Get_Time( );
 #endif
-        }
-        else if ( control->cm_solver_pre_comp_type == SAI_PC )
-        {
-            Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                    workspace->r, REAL_PTR_TYPE, MPI_DOUBLE );
+
+        Dot_local_rvec2( workspace->r2, workspace->d2, system->n, &redux[0], &redux[1] );
+        Dot_local_rvec2( workspace->d2, workspace->q2, system->n, &redux[2], &redux[3] );
 
 #if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
-            
-#if defined(NEUTRAL_TERRITORY)
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->d, H->NT );
-#else
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->d, system->n );
+
+        ret = MPI_Allreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE,
+                MPI_SUM, MPI_COMM_WORLD );
+        Check_MPI_Error( ret, __FILE__, __LINE__ );
+
+#if defined(LOG_PERFORMANCE)
+        Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
+        sig[0] = redux[0];
+        sig[1] = redux[1];
+        tmp[0] = redux[2];
+        tmp[1] = redux[3];
+        alpha[0] = sig[0] / tmp[0];
+        alpha[1] = sig[1] / tmp[1];
+        Vector_Add_rvec2( x, alpha[0], alpha[1], workspace->d2, system->n );
+        Vector_Add_rvec2( workspace->r2,
+                -1.0 * alpha[0], -1.0 * alpha[1], workspace->q2, system->n );
+
 #if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-            /* no comm part2 because d is only local portion */
-        }
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r2,
+                workspace->q2, FALSE, LEFT );
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q2,
+                workspace->d2, FALSE, RIGHT );
+    }
+
+    /* continue to solve the system that has not converged yet */
+    if ( sig[0] / bnorm[0] > tol )
+    {
+        Vector_Copy_From_rvec2( workspace->s, workspace->x, 0, system->n );
+
+        i += SDM( system, control, data, workspace,
+                H, workspace->b_s, tol, workspace->s, mpi_data, FALSE );
+
+        Vector_Copy_To_rvec2( workspace->x, workspace->s, 0, system->n );
+    }
+    else if ( sig[1] / bnorm[1] > tol )
+    {
+        Vector_Copy_From_rvec2( workspace->t, workspace->x, 1, system->n );
+
+        i += SDM( system, control, data, workspace,
+                H, workspace->b_t, tol, workspace->t, mpi_data, FALSE );
+
+        Vector_Copy_To_rvec2( workspace->x, workspace->t, 1, system->n );
     }
 
     if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
     {
         fprintf( stderr, "[WARNING] SDM convergence failed (%d iters)\n", i );
-        fprintf( stderr, "  [INFO] Rel. residual error: %f\n", SQRT(sig) / bnorm );
+        fprintf( stderr, "  [INFO] Rel. residual error (s solve): %f\n", SQRT(sig[0]) / bnorm[0] );
+        fprintf( stderr, "  [INFO] Rel. residual error (t solve): %f\n", SQRT(sig[1]) / bnorm[1] );
         return i;
     }
 
@@ -2297,114 +2639,168 @@ int SDM( reax_system const * const system, control_params const * const control,
 }
 
 
-/* Dual iteration of the Preconditioned Conjugate Gradient Method
- * for QEq (2 simaltaneous solves) */
-int dual_CG( reax_system const * const system, control_params const * const control,
-        simulation_data * const data,
-        storage * const workspace, sparse_matrix * const H, rvec2 * const b,
-        real tol, rvec2 * const x, mpi_datatypes * const  mpi_data )
+/* Steepest Descent */
+int SDM( reax_system const * const system, control_params const * const control,
+        simulation_data * const data, storage * const workspace,
+        sparse_matrix * const H, real * const b, real tol,
+        real * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
 {
     int i, j, ret;
-    rvec2 tmp, alpha, beta, r_norm, b_norm, sig_old, sig_new;
-    real redux[6];
+    real tmp, alpha, bnorm, sig;
+    real redux[2];
 #if defined(LOG_PERFORMANCE)
     real time;
+#endif
 
+#if defined(NEUTRAL_TERRITORY)
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            H->NT, workspace->q );
+#else
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            system->N, workspace->q );
+#endif
+
+#if defined(LOG_PERFORMANCE)
     time = Get_Time( );
 #endif
 
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            x, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
+    Vector_Sum( workspace->r, 1.0,  b, -1.0, workspace->q, system->n );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r,
+            workspace->q, fresh_pre, LEFT );
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q,
+            workspace->d, fresh_pre, RIGHT );
+
+#if defined(LOG_PERFORMANCE)
+    time = Get_Time( );
+#endif
+
+    redux[0] = Dot_local( b, b, system->n );
+    redux[1] = Dot_local( workspace->r, workspace->d, system->n );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+#endif
+
+    ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
+            MPI_SUM, MPI_COMM_WORLD );
+    Check_MPI_Error( ret, __FILE__, __LINE__ );
+    bnorm = SQRT( redux[0] );
+    sig = redux[1];
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
+#endif
+
+    for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / bnorm > tol; ++i )
+    {
 #if defined(NEUTRAL_TERRITORY)
-    dual_Sparse_MatVec_local( H, x, workspace->q2, H->NT );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->d,
+                H->NT, workspace->q );
 #else
-    dual_Sparse_MatVec_local( H, x, workspace->q2, system->N );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->d,
+                system->N, workspace->q );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+        time = Get_Time( );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->q2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
+        redux[0] = Dot_local( workspace->r, workspace->d, system->n );
+        redux[1] = Dot_local( workspace->d, workspace->q, system->n );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-    /* residual */
-    for ( j = 0; j < system->n; ++j )
-    {
-        workspace->r2[j][0] = b[j][0] - workspace->q2[j][0];
-        workspace->r2[j][1] = b[j][1] - workspace->q2[j][1];
-    }
+        ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
+                MPI_SUM, MPI_COMM_WORLD );
+        Check_MPI_Error( ret, __FILE__, __LINE__ );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+        Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
-    if ( control->cm_solver_pre_comp_type == NONE_PC )
-    {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->d2[j][0] = workspace->r2[j][0];
-            workspace->d2[j][1] = workspace->r2[j][1];
-        }
-    }
-    else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-    {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->d2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j];
-            workspace->d2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j];
-        }
+        sig = redux[0];
+        tmp = redux[1];
+        alpha = sig / tmp;
+        Vector_Add( x, alpha, workspace->d, system->n );
+        Vector_Add( workspace->r, -1.0 * alpha, workspace->q, system->n );
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
+
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r,
+                workspace->q, FALSE, LEFT );
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q,
+                workspace->d, FALSE, RIGHT );
     }
-    else if ( control->cm_solver_pre_comp_type == SAI_PC )
+
+    if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
     {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->r2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
+        fprintf( stderr, "[WARNING] SDM convergence failed (%d iters)\n", i );
+        fprintf( stderr, "  [INFO] Rel. residual error: %f\n", SQRT(sig) / bnorm );
+        return i;
+    }
+
+    return i;
+}
+
 
+/* Dual iteration of the Preconditioned Conjugate Gradient Method
+ * for QEq (2 simultaneous solves) */
+int dual_CG( reax_system const * const system, control_params const * const control,
+        simulation_data * const data,
+        storage * const workspace, sparse_matrix * const H, rvec2 * const b,
+        real tol, rvec2 * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
+{
+    int i, j, ret;
+    rvec2 tmp, alpha, beta, r_norm, b_norm, sig_old, sig_new;
+    real redux[6];
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    real time;
 #endif
 
 #if defined(NEUTRAL_TERRITORY)
-        dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->r2, workspace->d2, H->NT );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x,
+            H->NT, workspace->q2 );
 #else
-        dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->r2, workspace->d2, system->n );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x,
+            system->N, workspace->q2 );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+    time = Get_Time( );
 #endif
 
-        /* no comm part2 because d2 is only local portion */
-    }
+    Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, b, -1.0, -1.0, workspace->q2, system->n );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+#endif
+
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r2,
+            workspace->q2, fresh_pre, LEFT );
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q2,
+            workspace->d2, fresh_pre, RIGHT );
+
+#if defined(LOG_PERFORMANCE)
+    time = Get_Time( );
+#endif
 
     for ( j = 0; j < 6; ++j )
     {
         redux[j] = 0.0;
     }
-    for ( j = 0; j < system->n; ++j )
-    {
-        redux[0] += workspace->r2[j][0] * workspace->d2[j][0];
-        redux[1] += workspace->r2[j][1] * workspace->d2[j][1];
-        
-        redux[2] += workspace->d2[j][0] * workspace->d2[j][0];
-        redux[3] += workspace->d2[j][1] * workspace->d2[j][1];
 
-        redux[4] += b[j][0] * b[j][0];
-        redux[5] += b[j][1] * b[j][1];
-    }
+    Dot_local_rvec2( workspace->r2, workspace->d2, system->n, &redux[0], &redux[1] );
+    Dot_local_rvec2( workspace->d2, workspace->d2, system->n, &redux[2], &redux[3] );
+    Dot_local_rvec2( b, b, system->n, &redux[4], &redux[5] );
 
 #if defined(LOG_PERFORMANCE)
     Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
@@ -2432,38 +2828,21 @@ int dual_CG( reax_system const * const system, control_params const * const cont
             break;
         }
 
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->d2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-
 #if defined(NEUTRAL_TERRITORY)
-        dual_Sparse_MatVec_local( H, workspace->d2, workspace->q2, H->NT );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->d2,
+                H->NT, workspace->q2 );
 #else
-        dual_Sparse_MatVec_local( H, workspace->d2, workspace->q2, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->d2,
+                system->N, workspace->q2 );
 #endif
 
-        Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-                H->format, workspace->q2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        time = Get_Time( );
 #endif
 
-        /* dot product: d.q */
         redux[0] = 0.0;
         redux[1] = 0.0;
-        for ( j = 0; j < system->n; ++j )
-        {
-            redux[0] += workspace->d2[j][0] * workspace->q2[j][0];
-            redux[1] += workspace->d2[j][1] * workspace->q2[j][1];
-        }
+        Dot_local_rvec2( workspace->d2, workspace->q2, system->n, &redux[0], &redux[1] );
 
 #if defined(LOG_PERFORMANCE)
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
@@ -2479,77 +2858,29 @@ int dual_CG( reax_system const * const system, control_params const * const cont
 
         alpha[0] = sig_new[0] / tmp[0];
         alpha[1] = sig_new[1] / tmp[1];
-        /* update x */
-        for ( j = 0; j < system->n; ++j )
-        {
-            x[j][0] += alpha[0] * workspace->d2[j][0];
-            x[j][1] += alpha[1] * workspace->d2[j][1];
-        }
-        /* update residual */
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->r2[j][0] -= alpha[0] * workspace->q2[j][0];
-            workspace->r2[j][1] -= alpha[1] * workspace->q2[j][1];
-        }
+        Vector_Add_rvec2( x, alpha[0], alpha[1], workspace->d2, system->n );
+        Vector_Add_rvec2( workspace->r2, -1.0 * alpha[0], -1.0 * alpha[1],
+                workspace->q2, system->n );
 
 #if defined(LOG_PERFORMANCE)
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-        if ( control->cm_solver_pre_comp_type == NONE_PC )
-        {
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->p2[j][0] = workspace->r2[j][0];
-                workspace->p2[j][1] = workspace->r2[j][1];
-            }
-        }
-        else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-        {
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->p2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j];
-                workspace->p2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j];
-            }
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-        }
-        else if ( control->cm_solver_pre_comp_type == SAI_PC )
-        {
-            Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                    workspace->r2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-
-#if defined(NEUTRAL_TERRITORY)
-            dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->r2, workspace->p2, H->NT );
-#else
-            dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->r2, workspace->p2, system->n );
-#endif
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r2,
+                workspace->q2, FALSE, LEFT );
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q2,
+                workspace->p2, FALSE, RIGHT );
 
 #if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        time = Get_Time( );
 #endif
 
-            /* no comm part2 because p2 is only local portion */
-        }
-
         redux[0] = 0.0;
         redux[1] = 0.0;
         redux[2] = 0.0;
         redux[3] = 0.0;
-        /* dot products: r.p and p.p */
-        for ( j = 0; j < system->n; ++j )
-        {
-            redux[0] += workspace->r2[j][0] * workspace->p2[j][0];
-            redux[1] += workspace->r2[j][1] * workspace->p2[j][1];
-            redux[2] += workspace->p2[j][0] * workspace->p2[j][0];
-            redux[3] += workspace->p2[j][1] * workspace->p2[j][1];
-        }
+        Dot_local_rvec2( workspace->r2, workspace->p2, system->n, &redux[0], &redux[1] );
+        Dot_local_rvec2( workspace->p2, workspace->p2, system->n, &redux[2], &redux[3] );
 
 #if defined(LOG_PERFORMANCE)
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
@@ -2571,12 +2902,8 @@ int dual_CG( reax_system const * const system, control_params const * const cont
         r_norm[1] = SQRT( redux[3] );
         beta[0] = sig_new[0] / sig_old[0];
         beta[1] = sig_new[1] / sig_old[1];
-        /* d = p + beta * d */
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->d2[j][0] = workspace->p2[j][0] + beta[0] * workspace->d2[j][0];
-            workspace->d2[j][1] = workspace->p2[j][1] + beta[1] * workspace->d2[j][1];
-        }
+        Vector_Sum_rvec2( workspace->d2, 1.0, 1.0, workspace->p2, beta[0], beta[1],
+                workspace->d2, system->n );
 
 #if defined(LOG_PERFORMANCE)
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
@@ -2586,33 +2913,21 @@ int dual_CG( reax_system const * const system, control_params const * const cont
     /* continue to solve the system that has not converged yet */
     if ( r_norm[0] / b_norm[0] > tol )
     {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->s[j] = workspace->x[j][0];
-        }
+        Vector_Copy_From_rvec2( workspace->s, workspace->x, 0, system->n );
 
         i += CG( system, control, data, workspace,
-                H, workspace->b_s, tol, workspace->s, mpi_data );
-
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->x[j][0] = workspace->s[j];
-        }
+                H, workspace->b_s, tol, workspace->s, mpi_data, FALSE );
+
+        Vector_Copy_To_rvec2( workspace->x, workspace->s, 0, system->n );
     }
     else if ( r_norm[1] / b_norm[1] > tol )
     {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->t[j] = workspace->x[j][1];
-        }
+        Vector_Copy_From_rvec2( workspace->t, workspace->x, 1, system->n );
 
         i += CG( system, control, data, workspace,
-                H, workspace->b_t, tol, workspace->t, mpi_data );
+                H, workspace->b_t, tol, workspace->t, mpi_data, FALSE );
 
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->x[j][1] = workspace->t[j];
-        }
+        Vector_Copy_To_rvec2( workspace->x, workspace->t, 1, system->n );
     }
 
     if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
@@ -2630,7 +2945,7 @@ int dual_CG( reax_system const * const system, control_params const * const cont
 int CG( reax_system const * const system, control_params const * const control,
         simulation_data * const data,
         storage * const workspace, sparse_matrix * const H, real * const b,
-        real tol, real * const x, mpi_datatypes * const  mpi_data )
+        real tol, real * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
 {
     int i, j, ret;
     real tmp, alpha, beta, r_norm, b_norm;
@@ -2638,178 +2953,361 @@ int CG( reax_system const * const system, control_params const * const control,
     real redux[3];
 #if defined(LOG_PERFORMANCE)
     real time;
+#endif
+
+#if defined(NEUTRAL_TERRITORY)
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            H->NT, workspace->q );
+#else
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            system->N, workspace->q );
+#endif
 
+#if defined(LOG_PERFORMANCE)
     time = Get_Time( );
 #endif
 
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            x, REAL_PTR_TYPE, MPI_DOUBLE );
+    Vector_Sum( workspace->r, 1.0, b, -1.0, workspace->q, system->n );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+#endif
+
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r,
+            workspace->q, fresh_pre, LEFT );
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q,
+            workspace->d, fresh_pre, RIGHT );
+
+#if defined(LOG_PERFORMANCE)
+    time = Get_Time( );
+#endif
+
+    redux[0] = Dot_local( workspace->r, workspace->d, system->n );
+    redux[1] = Dot_local( workspace->d, workspace->d, system->n );
+    redux[2] = Dot_local( b, b, system->n );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+#endif
+
+    ret = MPI_Allreduce( MPI_IN_PLACE, redux, 3, MPI_DOUBLE,
+            MPI_SUM, MPI_COMM_WORLD );
+    Check_MPI_Error( ret, __FILE__, __LINE__ );
+
+    sig_new = redux[0];
+    r_norm = SQRT( redux[1] );
+    b_norm = SQRT( redux[2] );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
+    for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i )
+    {
 #if defined(NEUTRAL_TERRITORY)
-    Sparse_MatVec_local( H, x, workspace->q, H->NT );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->d, 
+                H->NT, workspace->q );
 #else
-    Sparse_MatVec_local( H, x, workspace->q, system->N );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->d, 
+                system->N, workspace->q );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+        time = Get_Time( );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE );
+        tmp = Parallel_Dot( workspace->d, workspace->q, system->n, MPI_COMM_WORLD );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
-    Vector_Sum( workspace->r, 1.0,  b, -1.0, workspace->q, system->n );
+        alpha = sig_new / tmp;
+        Vector_Add( x, alpha, workspace->d, system->n );
+        Vector_Add( workspace->r, -1.0 * alpha, workspace->q, system->n );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-    /* pre-conditioning */
-    if ( control->cm_solver_pre_comp_type == NONE_PC )
-    {
-        Vector_Copy( workspace->d, workspace->r, system->n );
-    }
-    else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-    {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
-        }
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r,
+                workspace->q, FALSE, LEFT );
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q,
+                workspace->p, FALSE, RIGHT );
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        time = Get_Time( );
 #endif
-    }
-    else if ( control->cm_solver_pre_comp_type == SAI_PC )
-    {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->r, REAL_PTR_TYPE, MPI_DOUBLE );
+
+        redux[0] = Dot_local( workspace->r, workspace->p, system->n );
+        redux[1] = Dot_local( workspace->p, workspace->p, system->n );
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
-        
-#if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->d, H->NT );
-#else
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->d, system->n );
+
+        ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
+                MPI_SUM, MPI_COMM_WORLD );
+        Check_MPI_Error( ret, __FILE__, __LINE__ );
+
+#if defined(LOG_PERFORMANCE)
+        Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
+        sig_old = sig_new;
+        sig_new = redux[0];
+        r_norm = SQRT( redux[1] );
+        beta = sig_new / sig_old;
+        Vector_Sum( workspace->d, 1.0, workspace->p, beta, workspace->d, system->n );
+
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
+    }
 
-        /* no comm part2 because d is only local portion */
+    if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
+    {
+        fprintf( stderr, "[WARNING] CG convergence failed (%d iters)\n", i );
+        fprintf( stderr, "  [INFO] Rel. residual error: %e\n", r_norm / b_norm );
+        return i;
     }
 
-    redux[0] = Dot_local( workspace->r, workspace->d, system->n );
-    redux[1] = Dot_local( workspace->d, workspace->d, system->n );
-    redux[2] = Dot_local( b, b, system->n );
+    return i;
+}
+
+
+/* Bi-conjugate gradient stabalized method with left preconditioning for
+ * solving nonsymmetric linear systems.
+ * This function performs dual iteration for QEq (2 simultaneous solves)
+ *
+ * system: 
+ * workspace: struct containing storage for workspace for the linear solver
+ * control: struct containing parameters governing the simulation and numeric methods
+ * data: struct containing simulation data (e.g., atom info)
+ * H: sparse, symmetric matrix, lower half stored in CSR format
+ * b: right-hand side of the linear system
+ * tol: tolerence compared against the relative residual for determining convergence
+ * x: inital guess
+ * mpi_data: 
+ *
+ * Reference: Netlib (in MATLAB)
+ *  http://www.netlib.org/templates/matlab/bicgstab.m
+ * */
+int dual_BiCGStab( reax_system const * const system, control_params const * const control,
+        simulation_data * const data,
+        storage * const workspace, sparse_matrix * const H, rvec2 * const b,
+        real tol, rvec2 * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
+{
+    int i, j, ret;
+    rvec2 tmp, alpha, beta, omega, sigma, rho, rho_old, r_norm, b_norm;
+    real time, redux[4];
+
+#if defined(NEUTRAL_TERRITORY)
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            H->NT, workspace->d2 );
+#else
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            system->N, workspace->d2 );
+#endif
+
+#if defined(LOG_PERFORMANCE)
+    time = Get_Time( );
+#endif
+
+    Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, b, -1.0, -1.0, workspace->d2, system->n );
+    Dot_local_rvec2( b, b, system->n, &redux[0], &redux[1] );
+    Dot_local_rvec2( workspace->r2, workspace->r2, system->n, &redux[2], &redux[3] );
 
 #if defined(LOG_PERFORMANCE)
     Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-    ret = MPI_Allreduce( MPI_IN_PLACE, redux, 3, MPI_DOUBLE,
+    ret = MPI_Allreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE,
             MPI_SUM, MPI_COMM_WORLD );
     Check_MPI_Error( ret, __FILE__, __LINE__ );
 
-    sig_new = redux[0];
-    r_norm = SQRT( redux[1] );
-    b_norm = SQRT( redux[2] );
-
 #if defined(LOG_PERFORMANCE)
     Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
-    for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i )
+    b_norm[0] = SQRT( redux[0] );
+    b_norm[1] = SQRT( redux[1] );
+    r_norm[0] = SQRT( redux[2] );
+    r_norm[1] = SQRT( redux[3] );
+    if ( b_norm[0] == 0.0 )
     {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->d, REAL_PTR_TYPE, MPI_DOUBLE );
+        b_norm[0] = 1.0;
+    }
+    if ( b_norm[1] == 0.0 )
+    {
+        b_norm[1] = 1.0;
+    }
+    Vector_Copy_rvec2( workspace->r_hat2, workspace->r2, system->n );
+    omega[0] = 1.0;
+    omega[1] = 1.0;
+    rho[0] = 1.0;
+    rho[1] = 1.0;
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+#endif
+
+    for ( i = 0; i < control->cm_solver_max_iters; ++i )
+    {
+        if ( r_norm[0] / b_norm[0] <= tol || r_norm[1] / b_norm[1] <= tol )
+        {
+            break;
+        }
+
+        Dot_local_rvec2( workspace->r_hat2, workspace->r2, system->n,
+                &redux[0], &redux[1] );
+
+#if defined(LOG_PERFORMANCE)
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+#endif
+
+        ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
+                MPI_SUM, MPI_COMM_WORLD );
+        Check_MPI_Error( ret, __FILE__, __LINE__ );
+
+#if defined(LOG_PERFORMANCE)
+        Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
+#endif
+
+        rho[0] = redux[0];
+        rho[1] = redux[1];
+        if ( rho[0] == 0.0 || rho[1] == 0.0 )
+        {
+            break;
+        }
+        if ( i > 0 )
+        {
+            beta[0] = (rho[0] / rho_old[0]) * (alpha[0] / omega[0]);
+            beta[1] = (rho[1] / rho_old[1]) * (alpha[1] / omega[1]);
+            Vector_Sum_rvec2( workspace->q2, 1.0, 1.0, workspace->p2,
+                    -1.0 * omega[0], -1.0 * omega[1], workspace->z2, system->n );
+            Vector_Sum_rvec2( workspace->p2, 1.0, 1.0, workspace->r2,
+                    beta[0], beta[1], workspace->q2, system->n );
+        }
+        else
+        {
+            Vector_Copy_rvec2( workspace->p2, workspace->r2, system->n );
+        }
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->p2,
+                workspace->y2, i == 0 ? fresh_pre : FALSE, LEFT );
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->y2,
+                workspace->d2, i == 0 ? fresh_pre : FALSE, RIGHT );
+
 #if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( H, workspace->d, workspace->q, H->NT );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->d2,
+                H->NT, workspace->z2 );
 #else
-        Sparse_MatVec_local( H, workspace->d, workspace->q, system->N );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->d2,
+                system->N, workspace->z2 );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+        time = Get_Time( );
 #endif
 
-        Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-                H->format, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE );
+        Dot_local_rvec2( workspace->r_hat2, workspace->z2, system->n,
+                &redux[0], &redux[1] );
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-        tmp = Parallel_Dot( workspace->d, workspace->q, system->n, MPI_COMM_WORLD );
+        ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
+                MPI_SUM, MPI_COMM_WORLD );
+        Check_MPI_Error( ret, __FILE__, __LINE__ );
 
 #if defined(LOG_PERFORMANCE)
         Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
-        alpha = sig_new / tmp;
-        Vector_Add( x, alpha, workspace->d, system->n );
-        Vector_Add( workspace->r, -1.0 * alpha, workspace->q, system->n );
+        tmp[0] = redux[0];
+        tmp[1] = redux[1];
+        alpha[0] = rho[0] / tmp[0];
+        alpha[1] = rho[1] / tmp[1];
+        Vector_Sum_rvec2( workspace->q2, 1.0, 1.0, workspace->r2,
+                -1.0 * alpha[0], -1.0 * alpha[1], workspace->z2, system->n );
+        Dot_local_rvec2( workspace->q2, workspace->q2, system->n,
+                &redux[0], &redux[1] );
 
 #if defined(LOG_PERFORMANCE)
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-        /* pre-conditioning */
-        if ( control->cm_solver_pre_comp_type == NONE_PC )
-        {
-            Vector_Copy( workspace->p, workspace->r, system->n );
-        }
-        else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-        {
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j];
-            }
+        ret = MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE,
+                MPI_SUM, MPI_COMM_WORLD );
+        Check_MPI_Error( ret, __FILE__, __LINE__ );
 
 #if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
-        }
-        else if ( control->cm_solver_pre_comp_type == SAI_PC )
+
+        tmp[0] = redux[0];
+        tmp[1] = redux[1];
+        /* early convergence check */
+        if ( tmp[0] < tol || tmp[1] < tol )
         {
-            Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                    workspace->r, REAL_PTR_TYPE, MPI_DOUBLE );
+            Vector_Add_rvec2( x, alpha[0], alpha[1], workspace->d2, system->n );
+            break;
+        }
 
 #if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q2,
+                workspace->y2, FALSE, LEFT );
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->y2,
+                workspace->q_hat2, FALSE, RIGHT );
+
 #if defined(NEUTRAL_TERRITORY)
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->p, H->NT );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->q_hat2,
+                H->NT, workspace->y2 );
 #else
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->p, system->n );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->q_hat2,
+                system->N, workspace->y2 );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        time = Get_Time( );
 #endif
 
-            /* no comm part2 because p is only local portion */
-        }
+        Dot_local_rvec2( workspace->y2, workspace->q2, system->n, &redux[0], &redux[1] );
+        Dot_local_rvec2( workspace->y2, workspace->y2, system->n, &redux[2], &redux[3] );
 
-        redux[0] = Dot_local( workspace->r, workspace->p, system->n );
-        redux[1] = Dot_local( workspace->p, workspace->p, system->n );
+#if defined(LOG_PERFORMANCE)
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+#endif
+
+        ret = MPI_Allreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE,
+                MPI_SUM, MPI_COMM_WORLD );
+        Check_MPI_Error( ret, __FILE__, __LINE__ );
+
+#if defined(LOG_PERFORMANCE)
+        Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
+#endif
+
+        sigma[0] = redux[0];
+        sigma[1] = redux[1];
+        tmp[0] = redux[2];
+        tmp[1] = redux[3];
+        omega[0] = sigma[0] / tmp[0];
+        omega[1] = sigma[1] / tmp[1];
+        Vector_Sum_rvec2( workspace->g2, alpha[0], alpha[1], workspace->d2,
+                omega[0], omega[1], workspace->q_hat2, system->n );
+        Vector_Add_rvec2( x, 1.0, 1.0, workspace->g2, system->n );
+        Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, workspace->q2,
+                -1.0 * omega[0], -1.0 * omega[1], workspace->y2, system->n );
+        Dot_local_rvec2( workspace->r2, workspace->r2, system->n, &redux[0], &redux[1] );
 
 #if defined(LOG_PERFORMANCE)
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
@@ -2823,22 +3321,58 @@ int CG( reax_system const * const system, control_params const * const control,
         Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
-        sig_old = sig_new;
-        sig_new = redux[0];
-        r_norm = SQRT( redux[1] );
-        beta = sig_new / sig_old;
-        Vector_Sum( workspace->d, 1.0, workspace->p, beta, workspace->d, system->n );
+        r_norm[0] = SQRT( redux[0] );
+        r_norm[1] = SQRT( redux[1] );
+        if ( omega[0] == 0.0 || omega[1] == 0.0 )
+        {
+            break;
+        }
+        rho_old[0] = rho[0];
+        rho_old[1] = rho[1];
 
 #if defined(LOG_PERFORMANCE)
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
     }
 
-    if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
+    if ( (omega[0] == 0.0 || omega[1] == 0.0) && system->my_rank == MASTER_NODE )
     {
-        fprintf( stderr, "[WARNING] CG convergence failed (%d iters)\n", i );
-        fprintf( stderr, "  [INFO] Rel. residual error: %e\n", r_norm / b_norm );
-        return i;
+        fprintf( stderr, "[WARNING] BiCGStab numeric breakdown (%d iters)\n", i );
+        fprintf( stderr, "  [INFO] omega = %e\n", omega );
+    }
+    else if ( (rho[0] == 0.0 || rho[1] == 0.0) && system->my_rank == MASTER_NODE )
+    {
+        fprintf( stderr, "[WARNING] BiCGStab numeric breakdown (%d iters)\n", i );
+        fprintf( stderr, "  [INFO] rho = %e\n", rho );
+    }
+
+    /* continue to solve the system that has not converged yet */
+    if ( r_norm[0] / b_norm[0] > tol )
+    {
+        Vector_Copy_From_rvec2( workspace->s, workspace->x, 0, system->n );
+
+        i += BiCGStab( system, control, data, workspace,
+                H, workspace->b_s, tol, workspace->s, mpi_data, FALSE );
+
+        Vector_Copy_To_rvec2( workspace->x, workspace->s, 0, system->n );
+    }
+    else if ( r_norm[1] / b_norm[1] > tol )
+    {
+        Vector_Copy_From_rvec2( workspace->t, workspace->x, 1, system->n );
+
+        i += BiCGStab( system, control, data, workspace,
+                H, workspace->b_t, tol, workspace->t, mpi_data, FALSE );
+
+        Vector_Copy_To_rvec2( workspace->x, workspace->t, 1, system->n );
+    }
+
+
+    if ( i >= control->cm_solver_max_iters
+            && system->my_rank == MASTER_NODE )
+    {
+        fprintf( stderr, "[WARNING] BiCGStab convergence failed (%d iters)\n", i );
+        fprintf( stderr, "  [INFO] Rel. residual error (s solve): %e\n", r_norm[0] / b_norm[0] );
+        fprintf( stderr, "  [INFO] Rel. residual error (t solve): %e\n", r_norm[1] / b_norm[1] );
     }
 
     return i;
@@ -2864,38 +3398,22 @@ int CG( reax_system const * const system, control_params const * const control,
 int BiCGStab( reax_system const * const system, control_params const * const control,
         simulation_data * const data,
         storage * const workspace, sparse_matrix * const H, real * const b,
-        real tol, real * const x, mpi_datatypes * const  mpi_data )
+        real tol, real * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
 {
     int i, j, ret;
     real tmp, alpha, beta, omega, sigma, rho, rho_old, r_norm, b_norm;
     real time, redux[2];
 
-#if defined(LOG_PERFORMANCE)
-    time = Get_Time( );
-#endif
-
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            x, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-
 #if defined(NEUTRAL_TERRITORY)
-    Sparse_MatVec_local( H, x, workspace->d, H->NT );
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            H->NT, workspace->d );
 #else
-    Sparse_MatVec_local( H, x, workspace->d, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            system->N, workspace->d );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->d, REAL_PTR_TYPE, MPI_DOUBLE );
-
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    time = Get_Time( );
 #endif
 
     Vector_Sum( workspace->r, 1.0,  b, -1.0, workspace->d, system->n );
@@ -2964,66 +3482,21 @@ int BiCGStab( reax_system const * const system, control_params const * const con
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-        /* pre-conditioning */
-        if ( control->cm_solver_pre_comp_type == NONE_PC )
-        {
-            Vector_Copy( workspace->d, workspace->p, system->n );
-        }
-        else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-        {
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->d[j] = workspace->p[j] * workspace->Hdia_inv[j];
-            }
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-        }
-        else if ( control->cm_solver_pre_comp_type == SAI_PC )
-        {
-            Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                    workspace->p, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-
-#if defined(NEUTRAL_TERRITORY)
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->p, workspace->d, H->NT );
-#else
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->p, workspace->d, system->n );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-
-            /* no comm part2 because d is only local portion */
-        }
-
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->d, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->p,
+                workspace->y, i == 0 ? fresh_pre : FALSE, LEFT );
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->y,
+                workspace->d, i == 0 ? fresh_pre : FALSE, RIGHT );
 
 #if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( H, workspace->d, workspace->z, H->NT );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->d,
+                H->NT, workspace->z );
 #else
-        Sparse_MatVec_local( H, workspace->d, workspace->z, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->d,
+                system->N, workspace->z );
 #endif
 
-        Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-                H->format, workspace->z, REAL_PTR_TYPE, MPI_DOUBLE );
-
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        time = Get_Time( );
 #endif
 
         redux[0] = Dot_local( workspace->r_hat, workspace->z, system->n );
@@ -3069,66 +3542,21 @@ int BiCGStab( reax_system const * const system, control_params const * const con
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-        /* pre-conditioning */
-        if ( control->cm_solver_pre_comp_type == NONE_PC )
-        {
-            Vector_Copy( workspace->q_hat, workspace->q, system->n );
-        }
-        else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-        {
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->q_hat[j] = workspace->q[j] * workspace->Hdia_inv[j];
-            }
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-        }
-        else if ( control->cm_solver_pre_comp_type == SAI_PC )
-        {
-            Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                    workspace->q, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-
-#if defined(NEUTRAL_TERRITORY)
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->q, workspace->q_hat, H->NT );
-#else
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->q, workspace->q_hat, system->n );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-
-            /* no comm part2 because q_hat is only local portion */
-        }
-
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->q_hat, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->q,
+                workspace->y, FALSE, LEFT );
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->y,
+                workspace->q_hat, FALSE, RIGHT );
 
 #if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( H, workspace->q_hat, workspace->y, H->NT );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->q_hat,
+                H->NT, workspace->y );
 #else
-        Sparse_MatVec_local( H, workspace->q_hat, workspace->y, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->q_hat,
+                system->N, workspace->y );
 #endif
 
-        Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-                H->format, workspace->y, REAL_PTR_TYPE, MPI_DOUBLE );
-
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        time = Get_Time( );
 #endif
 
         redux[0] = Dot_local( workspace->y, workspace->q, system->n );
@@ -3212,7 +3640,7 @@ int BiCGStab( reax_system const * const system, control_params const * const con
 int dual_PIPECG( reax_system const * const system, control_params const * const control,
         simulation_data * const data,
         storage * const workspace, sparse_matrix * const H, rvec2 * const b,
-        real tol, rvec2 * const x, mpi_datatypes * const  mpi_data )
+        real tol, rvec2 * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
 {
     int i, j, ret;
     rvec2 alpha, beta, delta, gamma_old, gamma_new, r_norm, b_norm;
@@ -3220,135 +3648,51 @@ int dual_PIPECG( reax_system const * const system, control_params const * const
     MPI_Request req;
 #if defined(LOG_PERFORMANCE)
     real time;
-
-    time = Get_Time( );
-#endif
-
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            x, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
 #endif
 
 #if defined(NEUTRAL_TERRITORY)
-    dual_Sparse_MatVec_local( H, x, workspace->u2, H->NT );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x,
+            H->NT, workspace->u2 );
 #else
-    dual_Sparse_MatVec_local( H, x, workspace->u2, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x,
+            system->N, workspace->u2 );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->u2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    time = Get_Time( );
 #endif
 
-    //Vector_Sum( workspace->r , 1.0,  b, -1.0, workspace->u, system->n );
-    for ( j = 0; j < system->n; ++j )
-    {
-        workspace->r2[j][0] = b[j][0] - workspace->u2[j][0];
-        workspace->r2[j][1] = b[j][1] - workspace->u2[j][1];
-    }
+    Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, b, -1.0, -1.0, workspace->u2, system->n );
 
 #if defined(LOG_PERFORMANCE)
     Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-    /* pre-conditioning */
-    if ( control->cm_solver_pre_comp_type == NONE_PC )
-    {
-        //Vector_Copy( workspace->u, workspace->r, system->n );
-        for ( j = 0; j < system->n ; ++j )
-        {
-            workspace->u2[j][0] = workspace->r2[j][0];
-            workspace->u2[j][1] = workspace->r2[j][1];
-        }
-    }
-    else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-    {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->u2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j];
-            workspace->u2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j];
-        }
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-    }
-    else if ( control->cm_solver_pre_comp_type == SAI_PC )
-    {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->r2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-        
-#if defined(NEUTRAL_TERRITORY)
-        dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->r2, workspace->u2, H->NT );
-#else
-        dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->r2, workspace->u2, system->n );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-
-        /* no comm part2 because u2 is only local portion */
-    }
-
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            workspace->u2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r2,
+            workspace->m2, fresh_pre, LEFT );
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->m2,
+            workspace->u2, fresh_pre, RIGHT );
 
 #if defined(NEUTRAL_TERRITORY)
-    dual_Sparse_MatVec_local( H, workspace->u2, workspace->w2, H->NT );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->u2,
+            H->NT, workspace->w2 );
 #else
-    dual_Sparse_MatVec_local( H, workspace->u2, workspace->w2, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->u2,
+            system->N, workspace->w2 );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    time = Get_Time( );
 #endif
 
-    //redux[0] = Dot_local( workspace->w, workspace->u, system->n );
-    //redux[1] = Dot_local( workspace->r, workspace->u, system->n );
-    //redux[2] = Dot_local( workspace->u, workspace->u, system->n );
-    //redux[3] = Dot_local( b, b, system->n );
     for ( j = 0; j < 8; ++j )
     {
         redux[j] = 0.0;
     }
-    for( j = 0; j < system->n; ++j )
-    {
-        redux[0] += workspace->w2[j][0] * workspace->u2[j][0];
-        redux[1] += workspace->w2[j][1] * workspace->u2[j][1];
-
-        redux[2] += workspace->r2[j][0] * workspace->u2[j][0];
-        redux[3] += workspace->r2[j][1] * workspace->u2[j][1];
-
-        redux[4] += workspace->u2[j][0] * workspace->u2[j][0];
-        redux[5] += workspace->u2[j][1] * workspace->u2[j][1];
-
-        redux[6] += b[j][0] * b[j][0];
-        redux[7] += b[j][1] * b[j][1];
-    }
+    Dot_local_rvec2( workspace->w2, workspace->u2, system->n, &redux[0], &redux[1] );
+    Dot_local_rvec2( workspace->r2, workspace->u2, system->n, &redux[2], &redux[3] );
+    Dot_local_rvec2( workspace->u2, workspace->u2, system->n, &redux[4], &redux[5] );
+    Dot_local_rvec2( b, b, system->n, &redux[6], &redux[7] );
 
 #if defined(LOG_PERFORMANCE)
     Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
@@ -3358,72 +3702,21 @@ int dual_PIPECG( reax_system const * const system, control_params const * const
             MPI_COMM_WORLD, &req );
     Check_MPI_Error( ret, __FILE__, __LINE__ );
 
-    /* pre-conditioning */
-    if ( control->cm_solver_pre_comp_type == NONE_PC )
-    {
-        //Vector_Copy( workspace->m, workspace->w, system->n );
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->m2[j][0] = workspace->w2[j][0];
-            workspace->m2[j][1] = workspace->w2[j][1];
-        }
-    }
-    else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-    {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->m2[j][0] = workspace->w2[j][0] * workspace->Hdia_inv[j];
-            workspace->m2[j][1] = workspace->w2[j][1] * workspace->Hdia_inv[j];
-        }
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-    }
-    else if ( control->cm_solver_pre_comp_type == SAI_PC )
-    {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-        
-#if defined(NEUTRAL_TERRITORY)
-        dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->w2, workspace->m2, H->NT );
-#else
-        dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->w2, workspace->m2, system->n );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-
-        /* no comm part2 because m2 is only local portion */
-    }
-
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            workspace->m2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w2,
+            workspace->n2, fresh_pre, LEFT );
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n2,
+            workspace->m2, fresh_pre, RIGHT );
 
 #if defined(NEUTRAL_TERRITORY)
-    dual_Sparse_MatVec_local( H, workspace->m2, workspace->n2, H->NT );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
+            H->NT, workspace->n2 );
 #else
-    dual_Sparse_MatVec_local( H, workspace->m2, workspace->n2, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
+            system->N, workspace->n2 );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->n2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    time = Get_Time( );
 #endif
 
     ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
@@ -3462,57 +3755,30 @@ int dual_PIPECG( reax_system const * const system, control_params const * const
             alpha[1] = gamma_new[1] / delta[1];
         }
 
-        //Vector_Sum( workspace->z, 1.0, workspace->n, beta, workspace->z, system->n );
-        //Vector_Sum( workspace->q, 1.0, workspace->m, beta, workspace->q, system->n );
-        //Vector_Sum( workspace->p, 1.0, workspace->u, beta, workspace->p, system->n );
-        //Vector_Sum( workspace->d, 1.0, workspace->w, beta, workspace->d, system->n );
-        //Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n );
-        //Vector_Sum( workspace->u, 1.0, workspace->u, -alpha, workspace->q, system->n );
-        //Vector_Sum( workspace->w, 1.0, workspace->w, -alpha, workspace->z, system->n );
-        //Vector_Sum( workspace->r, 1.0, workspace->r, -alpha, workspace->d, system->n );
-        //redux[0] = Dot_local( workspace->w, workspace->u, system->n );
-        //redux[1] = Dot_local( workspace->r, workspace->u, system->n );
-        //redux[2] = Dot_local( workspace->u, workspace->u, system->n );
+        Vector_Sum_rvec2( workspace->z2, 1.0, 1.0, workspace->n2,
+                beta[0], beta[1], workspace->z2, system->n );
+        Vector_Sum_rvec2( workspace->q2, 1.0, 1.0, workspace->m2,
+                beta[0], beta[1], workspace->q2, system->n );
+        Vector_Sum_rvec2( workspace->p2, 1.0, 1.0, workspace->u2,
+                beta[0], beta[1], workspace->p2, system->n );
+        Vector_Sum_rvec2( workspace->d2, 1.0, 1.0, workspace->w2,
+                beta[0], beta[1], workspace->d2, system->n );
+        Vector_Sum_rvec2( x, 1.0, 1.0, x,
+                alpha[0], alpha[1], workspace->p2, system->n );
+        Vector_Sum_rvec2( workspace->u2, 1.0, 1.0, workspace->u2,
+                -1.0 * alpha[0], -1.0 * alpha[1], workspace->q2, system->n );
+        Vector_Sum_rvec2( workspace->w2, 1.0, 1.0, workspace->w2,
+                -1.0 * alpha[0], -1.0 * alpha[1], workspace->z2, system->n );
+        Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, workspace->r2,
+                -1.0 * alpha[0], -1.0 * alpha[1], workspace->d2, system->n );
+
         for ( j = 0; j < 6; ++j )
         {
             redux[j] = 0.0;
         }
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->z2[j][0] = workspace->n2[j][0] + beta[0] * workspace->z2[j][0];
-            workspace->z2[j][1] = workspace->n2[j][1] + beta[1] * workspace->z2[j][1];
-
-            workspace->q2[j][0] = workspace->m2[j][0] + beta[0] * workspace->q2[j][0];
-            workspace->q2[j][1] = workspace->m2[j][1] + beta[1] * workspace->q2[j][1];
-
-            workspace->p2[j][0] = workspace->u2[j][0] + beta[0] * workspace->p2[j][0];
-            workspace->p2[j][1] = workspace->u2[j][1] + beta[1] * workspace->p2[j][1];
-
-            workspace->d2[j][0] = workspace->w2[j][0] + beta[0] * workspace->d2[j][0];
-            workspace->d2[j][1] = workspace->w2[j][1] + beta[1] * workspace->d2[j][1];
-
-            x[j][0] += alpha[0] * workspace->p2[j][0];
-            x[j][1] += alpha[1] * workspace->p2[j][1];
-
-            workspace->u2[j][0] -= alpha[0] * workspace->q2[j][0];
-            workspace->u2[j][1] -= alpha[1] * workspace->q2[j][1];
-
-            workspace->w2[j][0] -= alpha[0] * workspace->z2[j][0];
-            workspace->w2[j][1] -= alpha[1] * workspace->z2[j][1];
-
-            workspace->r2[j][0] -= alpha[0] * workspace->d2[j][0];
-            workspace->r2[j][1] -= alpha[1] * workspace->d2[j][1];
-
-            redux[0] += workspace->w2[j][0] * workspace->u2[j][0];
-            redux[1] += workspace->w2[j][1] * workspace->u2[j][1];
-            
-            redux[2] += workspace->r2[j][0] * workspace->u2[j][0];
-            redux[3] += workspace->r2[j][1] * workspace->u2[j][1];
-            
-            redux[4] += workspace->u2[j][0] * workspace->u2[j][0];
-            redux[5] += workspace->u2[j][1] * workspace->u2[j][1];
-
-        }
+        Dot_local_rvec2( workspace->w2, workspace->u2, system->n, &redux[0], &redux[1] );
+        Dot_local_rvec2( workspace->r2, workspace->u2, system->n, &redux[2], &redux[3] );
+        Dot_local_rvec2( workspace->u2, workspace->u2, system->n, &redux[4], &redux[5] );
 
 #if defined(LOG_PERFORMANCE)
         Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
@@ -3522,72 +3788,21 @@ int dual_PIPECG( reax_system const * const system, control_params const * const
                 MPI_COMM_WORLD, &req );
         Check_MPI_Error( ret, __FILE__, __LINE__ );
 
-        /* pre-conditioning */
-        if ( control->cm_solver_pre_comp_type == NONE_PC )
-        {
-            //Vector_Copy( workspace->m, workspace->w, system->n );
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->m2[j][0] = workspace->w2[j][0];
-                workspace->m2[j][1] = workspace->w2[j][1];
-            }
-        }
-        else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-        {
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->m2[j][0] = workspace->w2[j][0] * workspace->Hdia_inv[j];
-                workspace->m2[j][1] = workspace->w2[j][1] * workspace->Hdia_inv[j];
-            }
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-        }
-        else if ( control->cm_solver_pre_comp_type == SAI_PC )
-        {
-            Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                    workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-            
-#if defined(NEUTRAL_TERRITORY)
-            dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->w2, workspace->m2, H->NT );
-#else
-            dual_Sparse_MatVec_local( &workspace->H_app_inv, workspace->w2, workspace->m2, system->n );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-
-            /* no comm part2 because m2 is only local portion */
-        }
-
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->m2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w2,
+                workspace->n2, fresh_pre, LEFT );
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n2,
+                workspace->m2, fresh_pre, RIGHT );
 
 #if defined(NEUTRAL_TERRITORY)
-        dual_Sparse_MatVec_local( H, workspace->m2, workspace->n2, H->NT );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
+                H->NT, workspace->n2 );
 #else
-        dual_Sparse_MatVec_local( H, workspace->m2, workspace->n2, system->N );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
+                system->N, workspace->n2 );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
-#endif
-
-        Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-                H->format, workspace->n2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        time = Get_Time( );
 #endif
 
         gamma_old[0] = gamma_new[0];
@@ -3610,33 +3825,21 @@ int dual_PIPECG( reax_system const * const system, control_params const * const
     /* continue to solve the system that has not converged yet */
     if ( r_norm[0] / b_norm[0] > tol )
     {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->s[j] = workspace->x[j][0];
-        }
+        Vector_Copy_From_rvec2( workspace->s, workspace->x, 0, system->n );
 
         i += PIPECG( system, control, data, workspace,
-                H, workspace->b_s, tol, workspace->s, mpi_data );
+                H, workspace->b_s, tol, workspace->s, mpi_data, FALSE );
 
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->x[j][0] = workspace->s[j];
-        }
+        Vector_Copy_To_rvec2( workspace->x, workspace->s, 0, system->n );
     }
     else if ( r_norm[1] / b_norm[1] > tol )
     {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->t[j] = workspace->x[j][1];
-        }
+        Vector_Copy_From_rvec2( workspace->t, workspace->x, 1, system->n );
 
         i += PIPECG( system, control, data, workspace,
-                H, workspace->b_t, tol, workspace->t, mpi_data );
+                H, workspace->b_t, tol, workspace->t, mpi_data, FALSE );
 
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->x[j][1] = workspace->t[j];
-        }
+        Vector_Copy_To_rvec2( workspace->x, workspace->t, 1, system->n );
     }
 
     if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
@@ -3661,7 +3864,7 @@ int dual_PIPECG( reax_system const * const system, control_params const * const
 int PIPECG( reax_system const * const system, control_params const * const control,
         simulation_data * const data,
         storage * const workspace, sparse_matrix * const H, real * const b,
-        real tol, real * const x, mpi_datatypes * const  mpi_data )
+        real tol, real * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
 {
     int i, j, ret;
     real alpha, beta, delta, gamma_old, gamma_new, r_norm, b_norm;
@@ -3669,100 +3872,41 @@ int PIPECG( reax_system const * const system, control_params const * const contr
     MPI_Request req;
 #if defined(LOG_PERFORMANCE)
     real time;
-
-    time = Get_Time( );
-#endif
-
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            x, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
 #endif
 
 #if defined(NEUTRAL_TERRITORY)
-    Sparse_MatVec_local( H, x, workspace->u, H->NT );
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            H->NT, workspace->u );
 #else
-    Sparse_MatVec_local( H, x, workspace->u, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            system->N, workspace->u );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE );
-
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    time = Get_Time( );
 #endif
 
     Vector_Sum( workspace->r, 1.0, b, -1.0, workspace->u, system->n );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
-#endif
-
-    /* pre-conditioning */
-    if ( control->cm_solver_pre_comp_type == NONE_PC )
-    {
-        Vector_Copy( workspace->u, workspace->r, system->n );
-    }
-    else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-    {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->u[j] = workspace->r[j] * workspace->Hdia_inv[j];
-        }
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-    }
-    else if ( control->cm_solver_pre_comp_type == SAI_PC )
-    {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->r, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-        
-#if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->u, H->NT );
-#else
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->u, system->n );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-
-        /* no comm part2 because u is only local portion */
-    }
-
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            workspace->u, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r,
+            workspace->m, fresh_pre, LEFT );
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->m,
+            workspace->u, fresh_pre, RIGHT );
+
 #if defined(NEUTRAL_TERRITORY)
-    Sparse_MatVec_local( H, workspace->u, workspace->w, H->NT );
+    Sparse_MatVec( system, control, data, mpi_data, H, workspace->u,
+            H->NT, workspace->w );
 #else
-    Sparse_MatVec_local( H, workspace->u, workspace->w, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    Sparse_MatVec( system, control, data, mpi_data, H, workspace->u,
+            system->N, workspace->w );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE );
-
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    time = Get_Time( );
 #endif
 
     redux[0] = Dot_local( workspace->w, workspace->u, system->n );
@@ -3778,66 +3922,21 @@ int PIPECG( reax_system const * const system, control_params const * const contr
             MPI_COMM_WORLD, &req );
     Check_MPI_Error( ret, __FILE__, __LINE__ );
 
-    /* pre-conditioning */
-    if ( control->cm_solver_pre_comp_type == NONE_PC )
-    {
-        Vector_Copy( workspace->m, workspace->w, system->n );
-    }
-    else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-    {
-        for ( j = 0; j < system->n; ++j )
-        {
-            workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j];
-        }
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-    }
-    else if ( control->cm_solver_pre_comp_type == SAI_PC )
-    {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->w, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-        
-#if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, H->NT );
-#else
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, system->n );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-
-        /* no comm part2 because m is only local portion */
-    }
-
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            workspace->m, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w,
+            workspace->n, FALSE, LEFT );
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n,
+            workspace->m, FALSE, RIGHT );
 
 #if defined(NEUTRAL_TERRITORY)
-    Sparse_MatVec_local( H, workspace->m, workspace->n, H->NT );
+    Sparse_MatVec( system, control, data, mpi_data, H, workspace->m,
+            H->NT, workspace->n );
 #else
-    Sparse_MatVec_local( H, workspace->m, workspace->n, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    Sparse_MatVec( system, control, data, mpi_data, H, workspace->m,
+            system->N, workspace->n );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE );
-
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    time = Get_Time( );
 #endif
 
     ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
@@ -3884,66 +3983,21 @@ int PIPECG( reax_system const * const system, control_params const * const contr
                 MPI_COMM_WORLD, &req );
         Check_MPI_Error( ret, __FILE__, __LINE__ );
 
-        /* pre-conditioning */
-        if ( control->cm_solver_pre_comp_type == NONE_PC )
-        {
-            Vector_Copy( workspace->m, workspace->w, system->n );
-        }
-        else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-        {
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j];
-            }
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-        }
-        else if ( control->cm_solver_pre_comp_type == SAI_PC )
-        {
-            Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                    workspace->w, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-            
-#if defined(NEUTRAL_TERRITORY)
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, H->NT );
-#else
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, system->n );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-
-            /* no comm part2 because m is only local portion */
-        }
-
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->m, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w,
+                workspace->n, FALSE, LEFT );
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n,
+                workspace->m, FALSE, RIGHT );
 
 #if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( H, workspace->m, workspace->n, H->NT );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->m,
+                H->NT, workspace->n );
 #else
-        Sparse_MatVec_local( H, workspace->m, workspace->n, system->N );
-#endif
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->m,
+                system->N, workspace->n );
 #endif
 
-        Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-                H->format, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE );
-
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        time = Get_Time( );
 #endif
 
         gamma_old = gamma_new;
@@ -3969,178 +4023,297 @@ int PIPECG( reax_system const * const system, control_params const * const contr
 }
 
 
-/* Pipelined Preconditioned Conjugate Residual Method
+/* Pipelined Preconditioned Conjugate Residual Method.
+ * This function performs dual iteration for QEq (2 simultaneous solves)
  *
  * References:
  * 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm,
  *  P. Ghysels and W. Vanroose, Parallel Computing, 2014.
  *  */
-int PIPECR( reax_system const * const system, control_params const * const control,
+int dual_PIPECR( reax_system const * const system, control_params const * const control,
         simulation_data * const data,
-        storage * const workspace, sparse_matrix * const H, real * const b,
-        real tol, real * const x, mpi_datatypes * const  mpi_data )
+        storage * const workspace, sparse_matrix * const H, rvec2 * const b,
+        real tol, rvec2 * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
 {
     int i, j, ret;
-    real alpha, beta, delta, gamma_old, gamma_new, r_norm, b_norm;
-    real redux[3];
+    rvec2 alpha, beta, delta, gamma_old, gamma_new, r_norm, b_norm;
+    real redux[6];
     MPI_Request req;
 #if defined(LOG_PERFORMANCE)
     real time;
+#endif
+
+#if defined(NEUTRAL_TERRITORY)
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            H->NT, workspace->u2 );
+#else
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            system->N, workspace->u2 );
+#endif
 
+#if defined(LOG_PERFORMANCE)
     time = Get_Time( );
 #endif
 
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            x, REAL_PTR_TYPE, MPI_DOUBLE );
+    Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, b, -1.0, -1.0, workspace->u2, system->n );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-#if defined(NEUTRAL_TERRITORY)
-    Sparse_MatVec_local( H, x, workspace->u, H->NT );
-#else
-    Sparse_MatVec_local( H, x, workspace->u, system->N );
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r2,
+            workspace->n2, fresh_pre, LEFT );
+    dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n2,
+            workspace->u2, fresh_pre, RIGHT );
+
+#if defined(LOG_PERFORMANCE)
+    time = Get_Time( );
 #endif
 
+    Dot_local_rvec2( b, b, system->n, &redux[0], &redux[1] );
+    Dot_local_rvec2( workspace->u2, workspace->u2, system->n, &redux[2], &redux[3] );
+
+    ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE, MPI_SUM,
+            MPI_COMM_WORLD, &req );
+    Check_MPI_Error( ret, __FILE__, __LINE__ );
+
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE );
+#if defined(NEUTRAL_TERRITORY)
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->u2,
+            H->NT, workspace->w2 );
+#else
+    Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->u2,
+            system->N, workspace->w2 );
+#endif
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    time = Get_Time( );
 #endif
 
-    Vector_Sum( workspace->r, 1.0, b, -1.0, workspace->u, system->n );
+    ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
+    Check_MPI_Error( ret, __FILE__, __LINE__ );
+    b_norm[0] = SQRT( redux[0] );
+    b_norm[1] = SQRT( redux[1] );
+    r_norm[0] = SQRT( redux[2] );
+    r_norm[1] = SQRT( redux[3] );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+    Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
 #endif
 
-    /* pre-conditioning */
-    if ( control->cm_solver_pre_comp_type == NONE_PC )
-    {
-        Vector_Copy( workspace->u, workspace->r, system->n );
-    }
-    else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
+    for ( i = 0; i < control->cm_solver_max_iters; ++i )
     {
-        for ( j = 0; j < system->n; ++j )
+        if ( r_norm[0] / b_norm[0] <= tol || r_norm[1] / b_norm[1] <= tol )
         {
-            workspace->u[j] = workspace->r[j] * workspace->Hdia_inv[j];
+            break;
         }
 
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w2,
+                workspace->n2, fresh_pre, LEFT );
+        dual_apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n2,
+                workspace->m2, fresh_pre, RIGHT );
+
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        time = Get_Time( );
 #endif
-    }
-    else if ( control->cm_solver_pre_comp_type == SAI_PC )
-    {
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->r, REAL_PTR_TYPE, MPI_DOUBLE );
+
+        Dot_local_rvec2( workspace->w2, workspace->u2, system->n, &redux[0], &redux[1] );
+        Dot_local_rvec2( workspace->m2, workspace->w2, system->n, &redux[2], &redux[3] );
+        Dot_local_rvec2( workspace->u2, workspace->u2, system->n, &redux[4], &redux[5] );
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
-        
+
+        ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 6, MPI_DOUBLE, MPI_SUM,
+                MPI_COMM_WORLD, &req );
+        Check_MPI_Error( ret, __FILE__, __LINE__ );
+
 #if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->u, H->NT );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
+                H->NT, workspace->n2 );
 #else
-        Sparse_MatVec_local( &workspace->H_app_inv, workspace->r, workspace->u, system->n );
+        Dual_Sparse_MatVec( system, control, data, mpi_data, H, workspace->m2,
+                system->N, workspace->n2 );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+        time = Get_Time( );
 #endif
 
-        /* no comm part2 because u is only local portion */
-    }
+        ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
+        Check_MPI_Error( ret, __FILE__, __LINE__ );
+        gamma_new[0] = redux[0];
+        gamma_new[1] = redux[1];
+        delta[0] = redux[2];
+        delta[1] = redux[3];
+        r_norm[0] = SQRT( redux[4] );
+        r_norm[1] = SQRT( redux[5] );
 
-    redux[0] = Dot_local( b, b, system->n );
-    redux[1] = Dot_local( workspace->u, workspace->u, system->n );
+#if defined(LOG_PERFORMANCE)
+        Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
+#endif
 
-    ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE, MPI_SUM,
-            MPI_COMM_WORLD, &req );
-    Check_MPI_Error( ret, __FILE__, __LINE__ );
+        if ( i > 0 )
+        {
+            beta[0] = gamma_new[0] / gamma_old[0];
+            beta[1] = gamma_new[1] / gamma_old[1];
+            alpha[0] = gamma_new[0] / (delta[0] - beta[0] / alpha[0] * gamma_new[0]);
+            alpha[1] = gamma_new[1] / (delta[1] - beta[1] / alpha[1] * gamma_new[1]);
+        }
+        else
+        {
+            beta[0] = 0.0;
+            beta[1] = 0.0;
+            alpha[0] = gamma_new[0] / delta[0];
+            alpha[1] = gamma_new[1] / delta[1];
+        }
+
+        Vector_Sum_rvec2( workspace->z2, 1.0, 1.0, workspace->n2,
+                beta[0], beta[1], workspace->z2, system->n );
+        Vector_Sum_rvec2( workspace->q2, 1.0, 1.0, workspace->m2,
+                beta[0], beta[1], workspace->q2, system->n );
+        Vector_Sum_rvec2( workspace->p2, 1.0, 1.0, workspace->u2,
+                beta[0], beta[1], workspace->p2, system->n );
+        Vector_Sum_rvec2( workspace->d2, 1.0, 1.0, workspace->w2,
+                beta[0], beta[1], workspace->d2, system->n );
+        Vector_Sum_rvec2( x, 1.0, 1.0, x, alpha[0], alpha[1], workspace->p2, system->n );
+        Vector_Sum_rvec2( workspace->u2, 1.0, 1.0, workspace->u2,
+                -1.0 * alpha[0], -1.0 * alpha[1], workspace->q2, system->n );
+        Vector_Sum_rvec2( workspace->w2, 1.0, 1.0, workspace->w2,
+                -1.0 * alpha[0], -1.0 * alpha[1], workspace->z2, system->n );
+        Vector_Sum_rvec2( workspace->r2, 1.0, 1.0, workspace->r2,
+                -1.0 * alpha[0], -1.0 * alpha[1], workspace->d2, system->n );
+
+        gamma_old[0] = gamma_new[0];
+        gamma_old[1] = gamma_new[1];
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
+        Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
+    }
+
+    /* continue to solve the system that has not converged yet */
+    if ( r_norm[0] / b_norm[0] > tol )
+    {
+        Vector_Copy_From_rvec2( workspace->s, workspace->x, 0, system->n );
+
+        i += PIPECR( system, control, data, workspace,
+                H, workspace->b_s, tol, workspace->s, mpi_data, FALSE );
+
+        Vector_Copy_To_rvec2( workspace->x, workspace->s, 0, system->n );
+    }
+    else if ( r_norm[1] / b_norm[1] > tol )
+    {
+        Vector_Copy_From_rvec2( workspace->t, workspace->x, 1, system->n );
+
+        i += PIPECR( system, control, data, workspace,
+                H, workspace->b_t, tol, workspace->t, mpi_data, FALSE );
+
+        Vector_Copy_To_rvec2( workspace->x, workspace->t, 1, system->n );
+    }
+
+    if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE )
+    {
+        fprintf( stderr, "[WARNING] PIPECR convergence failed!\n" );
+        return i;
+    }
+
+    return i;
+}
 
-    Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-            workspace->u, REAL_PTR_TYPE, MPI_DOUBLE );
 
+/* Pipelined Preconditioned Conjugate Residual Method
+ *
+ * References:
+ * 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm,
+ *  P. Ghysels and W. Vanroose, Parallel Computing, 2014.
+ *  */
+int PIPECR( reax_system const * const system, control_params const * const control,
+        simulation_data * const data,
+        storage * const workspace, sparse_matrix * const H, real * const b,
+        real tol, real * const x, mpi_datatypes * const  mpi_data, int fresh_pre )
+{
+    int i, j, ret;
+    real alpha, beta, delta, gamma_old, gamma_new, r_norm, b_norm;
+    real redux[3];
+    MPI_Request req;
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    real time;
 #endif
 
 #if defined(NEUTRAL_TERRITORY)
-    Sparse_MatVec_local( H, workspace->u, workspace->w, H->NT );
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            H->NT, workspace->u );
 #else
-    Sparse_MatVec_local( H, workspace->u, workspace->w, system->N );
+    Sparse_MatVec( system, control, data, mpi_data, H, x, 
+            system->N, workspace->u );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
+    time = Get_Time( );
 #endif
 
-    Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-            H->format, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE );
+    Vector_Sum( workspace->r, 1.0, b, -1.0, workspace->u, system->n );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
 
-    ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
-    Check_MPI_Error( ret, __FILE__, __LINE__ );
-    b_norm = SQRT( redux[0] );
-    r_norm = SQRT( redux[1] );
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->r,
+            workspace->n, fresh_pre, LEFT );
+    apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n,
+            workspace->u, fresh_pre, RIGHT );
 
 #if defined(LOG_PERFORMANCE)
-    Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
+    time = Get_Time( );
 #endif
 
-    for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i )
-    {
-        /* pre-conditioning */
-        if ( control->cm_solver_pre_comp_type == NONE_PC )
-        {
-            Vector_Copy( workspace->m, workspace->w, system->n );
-        }
-        else if ( control->cm_solver_pre_comp_type == JACOBI_PC )
-        {
-            for ( j = 0; j < system->n; ++j )
-            {
-                workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j];
-            }
+    redux[0] = Dot_local( b, b, system->n );
+    redux[1] = Dot_local( workspace->u, workspace->u, system->n );
 
-#if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
-#endif
-        }
-        else if ( control->cm_solver_pre_comp_type == SAI_PC )
-        {
-            Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                    workspace->w, REAL_PTR_TYPE, MPI_DOUBLE );
+    ret = MPI_Iallreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE, MPI_SUM,
+            MPI_COMM_WORLD, &req );
+    Check_MPI_Error( ret, __FILE__, __LINE__ );
 
 #if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+    Update_Timing_Info( &time, &data->timing.cm_solver_vector_ops );
 #endif
-            
+
 #if defined(NEUTRAL_TERRITORY)
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, H->NT );
+    Sparse_MatVec( system, control, data, mpi_data, H, workspace->u, 
+            H->NT, workspace->w );
 #else
-            Sparse_MatVec_local( &workspace->H_app_inv, workspace->w, workspace->m, system->n );
+    Sparse_MatVec( system, control, data, mpi_data, H, workspace->u, 
+            system->N, workspace->w );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-            Update_Timing_Info( &time, &data->timing.cm_solver_pre_app );
+    time = Get_Time( );
 #endif
 
-            /* no comm part2 because m is only local portion */
-        }
+    ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
+    Check_MPI_Error( ret, __FILE__, __LINE__ );
+    b_norm = SQRT( redux[0] );
+    r_norm = SQRT( redux[1] );
+
+#if defined(LOG_PERFORMANCE)
+    Update_Timing_Info( &time, &data->timing.cm_solver_allreduce );
+#endif
+
+    for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i )
+    {
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->w,
+                workspace->n, fresh_pre, LEFT );
+        apply_preconditioner( system, workspace, control, data, mpi_data, workspace->n,
+                workspace->m, fresh_pre, RIGHT );
+
+#if defined(LOG_PERFORMANCE)
+        time = Get_Time( );
+#endif
 
         redux[0] = Dot_local( workspace->w, workspace->u, system->n );
         redux[1] = Dot_local( workspace->m, workspace->w, system->n );
@@ -4154,28 +4327,16 @@ int PIPECR( reax_system const * const system, control_params const * const contr
                 MPI_COMM_WORLD, &req );
         Check_MPI_Error( ret, __FILE__, __LINE__ );
 
-        Sparse_MatVec_Comm_Part1( system, control, mpi_data,
-                workspace->m, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
-#endif
-
 #if defined(NEUTRAL_TERRITORY)
-        Sparse_MatVec_local( H, workspace->m, workspace->n, H->NT );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->m, 
+                H->NT, workspace->n );
 #else
-        Sparse_MatVec_local( H, workspace->m, workspace->n, system->N );
+        Sparse_MatVec( system, control, data, mpi_data, H, workspace->m, 
+                system->N, workspace->n );
 #endif
 
 #if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
-#endif
-
-        Sparse_MatVec_Comm_Part2( system, control, mpi_data,
-                H->format, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE );
-
-#if defined(LOG_PERFORMANCE)
-        Update_Timing_Info( &time, &data->timing.cm_solver_comm );
+        time = Get_Time( );
 #endif
 
         ret = MPI_Wait( &req, MPI_STATUS_IGNORE );
diff --git a/PG-PuReMD/src/lin_alg.h b/PG-PuReMD/src/lin_alg.h
index 6221aff8305e90d27ef686f40c577c43cf6e7c94..242a01ada847d0690bb47beba9e4563be9c72923 100644
--- a/PG-PuReMD/src/lin_alg.h
+++ b/PG-PuReMD/src/lin_alg.h
@@ -32,6 +32,8 @@ extern "C" {
 
 void Sort_Matrix_Rows( sparse_matrix * const );
 
+void jacobi( sparse_matrix const * const, real * const );
+
 void setup_sparse_approx_inverse( reax_system const * const,
         simulation_data * const,
         storage * const, mpi_datatypes * const, 
@@ -42,40 +44,55 @@ real sparse_approx_inverse( reax_system const * const,
         storage * const, mpi_datatypes * const, 
         sparse_matrix * const, sparse_matrix * const, sparse_matrix * const, int );
 
+int dual_SDM( reax_system const * const, control_params const * const,
+        simulation_data * const,
+        storage * const, sparse_matrix * const, rvec2 * const,
+        real, rvec2 * const, mpi_datatypes * const, int );
+
 int SDM( reax_system const * const, control_params const * const,
         simulation_data * const,
         storage * const, sparse_matrix * const, real * const,
-        real, real * const, mpi_datatypes * const );
+        real, real * const, mpi_datatypes * const, int );
 
 int dual_CG( reax_system const * const, control_params const * const,
         simulation_data * const,
         storage * const, sparse_matrix * const,
-        rvec2 * const, real, rvec2 * const, mpi_datatypes * const );
+        rvec2 * const, real, rvec2 * const, mpi_datatypes * const, int );
 
 int CG( reax_system const * const, control_params const * const,
         simulation_data * const,
         storage * const, sparse_matrix * const, real * const,
-        real, real * const, mpi_datatypes * const );
+        real, real * const, mpi_datatypes * const, int );
+
+int dual_BiCGStab( reax_system const * const, control_params const * const,
+        simulation_data * const,
+        storage * const, sparse_matrix * const, rvec2 * const,
+        real, rvec2 * const, mpi_datatypes * const, int );
 
 int BiCGStab( reax_system const * const, control_params const * const,
         simulation_data * const,
         storage * const, sparse_matrix * const, real * const,
-        real, real * const, mpi_datatypes * const );
+        real, real * const, mpi_datatypes * const, int );
 
 int dual_PIPECG( reax_system const * const, control_params const * const,
         simulation_data * const,
         storage * const, sparse_matrix * const,
-        rvec2 * const, real, rvec2 * const, mpi_datatypes * const );
+        rvec2 * const, real, rvec2 * const, mpi_datatypes * const, int );
 
 int PIPECG( reax_system const * const, control_params const * const,
         simulation_data * const,
         storage * const, sparse_matrix * const, real * const,
-        real, real * const, mpi_datatypes * const );
+        real, real * const, mpi_datatypes * const, int );
+
+int dual_PIPECR( reax_system const * const, control_params const * const,
+        simulation_data * const,
+        storage * const, sparse_matrix * const, rvec2 * const,
+        real, rvec2 * const, mpi_datatypes * const, int );
 
 int PIPECR( reax_system const * const, control_params const * const,
         simulation_data * const,
         storage * const, sparse_matrix * const, real * const,
-        real, real * const, mpi_datatypes * const );
+        real, real * const, mpi_datatypes * const, int );
 
 
 #ifdef __cplusplus
diff --git a/PG-PuReMD/src/vector.h b/PG-PuReMD/src/vector.h
index 32b6f45ac14cb4d98e7c86cd4dfdd53024f89e21..37a7422ddb193f42648a25fbe517a31837a52a6d 100644
--- a/PG-PuReMD/src/vector.h
+++ b/PG-PuReMD/src/vector.h
@@ -33,6 +33,14 @@ extern "C"  {
 #endif
 
 #if defined(LAMMPS_REAX) || defined(PURE_REAX)
+
+/* check if all entries of a dense vector are sufficiently close to zero
+ *
+ * inputs:
+ *  v: dense vector
+ *  k: number of entries in v
+ * output: TRUE if all entries are sufficiently close to zero, FALSE otherwise
+ */
 CUDA_HOST_DEVICE static inline int Vector_isZero( real const * const v, int k )
 {
     int i, ret;
@@ -54,6 +62,13 @@ CUDA_HOST_DEVICE static inline int Vector_isZero( real const * const v, int k )
 }
 
 
+/* sets all entries of a dense vector to zero
+ *
+ * inputs:
+ *  v: dense vector
+ *  k: number of entries in v
+ * output: v with entries set to zero
+ */
 CUDA_HOST_DEVICE static inline void Vector_MakeZero( real * const v, int k )
 {
     int i;
@@ -67,6 +82,14 @@ CUDA_HOST_DEVICE static inline void Vector_MakeZero( real * const v, int k )
 }
 
 
+/* copy the entries from one vector to another
+ *
+ * inputs:
+ *  v: dense vector to copy
+ *  k: number of entries in v
+ * output:
+ *  dest: vector copied into
+ */
 CUDA_HOST_DEVICE static inline void Vector_Copy( real * const dest, real const * const v, int k )
 {
     int i;
@@ -80,6 +103,67 @@ CUDA_HOST_DEVICE static inline void Vector_Copy( real * const dest, real const *
 }
 
 
+/* copy the entries from one vector to another
+ *
+ * inputs:
+ *  v: dense vector to copy
+ *  k: number of entries in v
+ * output:
+ *  dest: vector copied into
+ */
+CUDA_HOST_DEVICE static inline void Vector_Copy_rvec2( rvec2 * const dest, rvec2 const * const v, int k )
+{
+    int i;
+
+    assert( k >= 0 );
+
+    for ( i = 0; i < k; ++i )
+    {
+        dest[i][0] = v[i][0];
+        dest[i][1] = v[i][1];
+    }
+}
+
+
+CUDA_HOST_DEVICE static inline void Vector_Copy_From_rvec2( real * const dst, rvec2 const * const src,
+        int index, int k )
+{
+    int i;
+
+    assert( k >= 0 );
+    assert( index >= 0 && index <= 1 );
+
+    for ( i = 0; i < k; ++i )
+    {
+        dst[i] = src[i][index];
+    }
+}
+
+
+CUDA_HOST_DEVICE static inline void Vector_Copy_To_rvec2( rvec2 * const dst, real const * const src,
+        int index, int k )
+{
+    int i;
+
+    assert( k >= 0 );
+    assert( index >= 0 && index <= 1 );
+
+    for ( i = 0; i < k; ++i )
+    {
+        dst[i][index] = src[i];
+    }
+}
+
+
+/* scales the entries of a dense vector by a constant
+ *
+ * inputs:
+ *  c: scaling constant
+ *  v: dense vector whose entries to scale
+ *  k: number of entries in v
+ * output:
+ *  dest: with entries scaled
+ */
 CUDA_HOST_DEVICE static inline void Vector_Scale( real * const dest, real c,
         real const * const v, int k )
 {
@@ -94,6 +178,16 @@ CUDA_HOST_DEVICE static inline void Vector_Scale( real * const dest, real c,
 }
 
 
+/* computed the scaled sum of two dense vector and store
+ * the result in a third vector (SAXPY operation in BLAS)
+ *
+ * inputs:
+ *  c, d: scaling constants
+ *  v, y: dense vector whose entries to scale
+ *  k: number of entries in the vectors
+ * output:
+ *  dest: vector containing the scaled sum
+ */
 CUDA_HOST_DEVICE static inline void Vector_Sum( real * const dest, real c,
         real const * const v, real d, real const * const y, int k )
 {
@@ -108,6 +202,41 @@ CUDA_HOST_DEVICE static inline void Vector_Sum( real * const dest, real c,
 }
 
 
+/* computed the scaled sum of two dense vector and store
+ * the result in a third vector (SAXPY operation in BLAS)
+ *
+ * inputs:
+ *  c, d: scaling constants
+ *  v, y: dense vector whose entries to scale
+ *  k: number of entries in the vectors
+ * output:
+ *  dest: vector containing the scaled sum
+ */
+CUDA_HOST_DEVICE static inline void Vector_Sum_rvec2( rvec2 * const dest, real c0, real c1,
+        rvec2 const * const v, real d0, real d1, rvec2 const * const y, int k )
+{
+    int i;
+
+    assert( k >= 0 );
+
+    for ( i = 0; i < k; ++i )
+    {
+        dest[i][0] = c0 * v[i][0] + d0 * y[i][0];
+        dest[i][1] = c1 * v[i][1] + d1 * y[i][1];
+    }
+}
+
+
+/* add the scaled sum of a dense vector to another vector
+ * and store in-place
+ *
+ * inputs:
+ *  c: scaling constant
+ *  v: dense vector whose entries to scale
+ *  k: number of entries in the vectors
+ * output:
+ *  dest: vector to accumulate with the scaled sum
+ */
 CUDA_HOST_DEVICE static inline void Vector_Add( real * const dest, real c,
         real const * const v, int k )
 {
@@ -122,31 +251,48 @@ CUDA_HOST_DEVICE static inline void Vector_Add( real * const dest, real c,
 }
 
 
-CUDA_HOST_DEVICE static inline real Dot( real const * const v1,
-        real const * const v2, int k )
+/* add the scaled sum of a dense vector to another vector
+ * and store in-place
+ *
+ * inputs:
+ *  c: scaling constant
+ *  v: dense vector whose entries to scale
+ *  k: number of entries in the vectors
+ * output:
+ *  dest: vector to accumulate with the scaled sum
+ */
+CUDA_HOST_DEVICE static inline void Vector_Add_rvec2( rvec2 * const dest, real c0, real c1,
+        rvec2 const * const v, int k )
 {
     int i;
-    real ret;
 
     assert( k >= 0 );
 
-    ret = 0.0;
-
     for ( i = 0; i < k; ++i )
     {
-        ret += v1[i] * v2[i];
+        dest[i][0] += c0 * v[i][0];
+        dest[i][1] += c1 * v[i][1];
     }
-
-    return ret;
 }
 
 
+/* compute the local portions of the inner product of two dense vectors
+ *
+ * inputs:
+ *  workspace: storage container for workspace structures
+ *  v1, v2: dense vectors
+ *  k: number of entries in the vectors
+ * output:
+ *  dot: inner product of the two vector
+ */
 CUDA_HOST_DEVICE static inline real Dot_local( real const * const v1,
         real const * const v2, int k )
 {
     int i;
     real sum;
 
+    assert( k >= 0 );
+
     sum = 0.0;
 
     for ( i = 0; i < k; ++i )
@@ -158,21 +304,30 @@ CUDA_HOST_DEVICE static inline real Dot_local( real const * const v1,
 }
 
 
-CUDA_HOST_DEVICE static inline real Norm( real const * const v, int k )
+/* compute the local portions of the inner product of two dense vectors
+ *
+ * inputs:
+ *  workspace: storage container for workspace structures
+ *  v1, v2: dense vectors
+ *  k: number of entries in the vectors
+ * output:
+ *  dot: inner product of the two vectors
+ */
+CUDA_HOST_DEVICE static inline void Dot_local_rvec2( rvec2 const * const v1,
+        rvec2 const * const v2, int k, real * const sum1, real * const sum2 )
 {
     int i;
-    real ret;
 
     assert( k >= 0 );
 
-    ret = 0.0;
+    *sum1 = 0.0;
+    *sum2 = 0.0;
 
     for ( i = 0; i < k; ++i )
     {
-        ret +=  SQR( v[i] );
+        *sum1 += v1[i][0] * v2[i][0];
+        *sum2 += v1[i][1] * v2[i][1];
     }
-
-    return SQRT( ret );
 }
 
 
diff --git a/sPuReMD/src/allocate.c b/sPuReMD/src/allocate.c
index 92c1d80dba39f85a214db63c377b210d741f2843..4f980bc8c07a26672966c18420cfbfd2eb3f77cd 100644
--- a/sPuReMD/src/allocate.c
+++ b/sPuReMD/src/allocate.c
@@ -36,24 +36,19 @@ void PreAllocate_Space( reax_system * const system,
     {
         system->prealloc_allocated = TRUE;
 
-        system->atoms = scalloc( n, sizeof(reax_atom),
-                "PreAllocate_Space::system->atoms" );
-        workspace->orig_id = scalloc( n, sizeof(int),
-                "PreAllocate_Space::workspace->orid_id" );
+        system->atoms = scalloc( n, sizeof(reax_atom), __FILE__, __LINE__ );
+        workspace->orig_id = scalloc( n, sizeof(int), __FILE__, __LINE__ );
 
         /* bond restriction info */
         if ( control->restrict_bonds )
         {
-            workspace->restricted = scalloc( n, sizeof(int),
-                    "PreAllocate_Space::workspace->restricted_atoms" );
-
-            workspace->restricted_list = scalloc( n, sizeof(int*),
-                    "PreAllocate_Space::workspace->restricted_list" );
+            workspace->restricted = scalloc( n, sizeof(int), __FILE__, __LINE__ );
+            workspace->restricted_list = scalloc( n, sizeof(int*), __FILE__, __LINE__ );
 
             for ( i = 0; i < n; ++i )
             {
                 workspace->restricted_list[i] = scalloc( MAX_RESTRICT, sizeof(int),
-                        "PreAllocate_Space::workspace->restricted_list[i]" );
+                        __FILE__, __LINE__ );
             }
         }
 
@@ -62,48 +57,40 @@ void PreAllocate_Space( reax_system * const system,
                 || control->geo_format == BINARY_RESTART )
         {
             workspace->map_serials = scalloc( MAX_ATOM_ID, sizeof(int),
-                    "Read_BGF::workspace->map_serials" );
+                    __FILE__, __LINE__ );
         }
     }
     else
     {
-        sfree( system->atoms, "PreAllocate_Space::system->atoms" );
-        sfree( workspace->orig_id, "PreAllocate_Space::workspace->orid_id" );
+        sfree( system->atoms, __FILE__, __LINE__ );
+        sfree( workspace->orig_id, __FILE__, __LINE__ );
 
         /* bond restriction info */
         if ( control->restrict_bonds )
         {
-            sfree( workspace->restricted,
-                    "PreAllocate_Space::workspace->restricted_atoms" );
+            sfree( workspace->restricted, __FILE__, __LINE__ );
 
             for ( i = 0; i < n; ++i )
             {
-                sfree( workspace->restricted_list[i],
-                        "PreAllocate_Space::workspace->restricted_list[i]" );
+                sfree( workspace->restricted_list[i], __FILE__, __LINE__ );
             }
 
-            sfree( workspace->restricted_list,
-                    "PreAllocate_Space::workspace->restricted_list" );
+            sfree( workspace->restricted_list, __FILE__, __LINE__ );
         }
 
-        system->atoms = scalloc( n, sizeof(reax_atom),
-                "PreAllocate_Space::system->atoms" );
-        workspace->orig_id = scalloc( n, sizeof(int),
-                "PreAllocate_Space::workspace->orid_id" );
+        system->atoms = scalloc( n, sizeof(reax_atom), __FILE__, __LINE__ );
+        workspace->orig_id = scalloc( n, sizeof(int), __FILE__, __LINE__ );
 
         /* bond restriction info */
         if ( control->restrict_bonds )
         {
-            workspace->restricted = scalloc( n, sizeof(int),
-                    "PreAllocate_Space::workspace->restricted_atoms" );
-
-            workspace->restricted_list = scalloc( n, sizeof(int*),
-                    "PreAllocate_Space::workspace->restricted_list" );
+            workspace->restricted = scalloc( n, sizeof(int), __FILE__, __LINE__ );
+            workspace->restricted_list = scalloc( n, sizeof(int*), __FILE__, __LINE__ );
 
             for ( i = 0; i < n; ++i )
             {
                 workspace->restricted_list[i] = scalloc( MAX_RESTRICT, sizeof(int),
-                        "PreAllocate_Space::workspace->restricted_list[i]" );
+                        __FILE__, __LINE__ );
             }
         }
     }
@@ -136,9 +123,9 @@ void Allocate_Matrix( sparse_matrix * const H, int n, int n_max, int m )
     H->n_max = n_max;
     H->m = m;
 
-    H->start = smalloc( sizeof(unsigned int) * (n_max + 1), "Allocate_Matrix::H->start" );
-    H->j = smalloc( sizeof(unsigned int) * m, "Allocate_Matrix::H->j" );
-    H->val = smalloc( sizeof(real) * m, "Allocate_Matrix::H->val" );
+    H->start = smalloc( sizeof(unsigned int) * (n_max + 1), __FILE__, __LINE__ );
+    H->j = smalloc( sizeof(unsigned int) * m, __FILE__, __LINE__ );
+    H->val = smalloc( sizeof(real) * m, __FILE__, __LINE__ );
 }
 
 
@@ -150,9 +137,9 @@ void Deallocate_Matrix( sparse_matrix * const H )
 {
     H->allocated = FALSE;
 
-    sfree( H->start, "Deallocate_Matrix::H->start" );
-    sfree( H->j, "Deallocate_Matrix::H->j" );
-    sfree( H->val, "Deallocate_Matrix::H->val" );
+    sfree( H->start, __FILE__, __LINE__ );
+    sfree( H->j, __FILE__, __LINE__ );
+    sfree( H->val, __FILE__, __LINE__ );
 }
 
 
@@ -196,8 +183,7 @@ static void Reallocate_Initialize_HBond_List( int n, int num_h, int num_h_max,
 {
     int i, num_hbonds, *hb_top;
 
-    hb_top = scalloc( n, sizeof(int),
-            "Reallocate_Initialize_HBond_List::hb_top" );
+    hb_top = scalloc( n, sizeof(int), __FILE__, __LINE__ );
     num_hbonds = 0;
 
     for ( i = 0; i < n; ++i )
@@ -218,7 +204,7 @@ static void Reallocate_Initialize_HBond_List( int n, int num_h, int num_h_max,
 
     Initialize_HBond_List( n, h_index, hb_top, hbond_list );
 
-    sfree( hb_top, "Reallocate_Initialize_HBond_List::hb_top" );
+    sfree( hb_top, __FILE__, __LINE__ );
 }
 
 
@@ -249,8 +235,7 @@ static void Reallocate_Initialize_Bond_List( int n, int n_max,
     int i;
     int *bond_top;
 
-    bond_top = (int *) scalloc( n, sizeof(int),
-            "Reallocate_Initialize_Bond_List::hb_top" );
+    bond_top = scalloc( n, sizeof(int), __FILE__, __LINE__ );
     *num_bonds = 0;
     *est_3body = 0;
 
@@ -270,7 +255,7 @@ static void Reallocate_Initialize_Bond_List( int n, int n_max,
 
     Initialize_Bond_List( bond_top, bond_list );
 
-    sfree( bond_top, "Reallocate_Initialize_Bond_List::bond_top" );
+    sfree( bond_top, __FILE__, __LINE__ );
 }
 
 
@@ -357,9 +342,9 @@ void Reallocate( reax_system * const system, control_params const * const contro
             {
                 for ( k = 0; k < g->ncell_max[2]; k++ )
                 {
-                    sfree( g->atoms[i][j][k], "Reallocate::g->atoms[i][j][k]" );
+                    sfree( g->atoms[i][j][k], __FILE__, __LINE__ );
                     g->atoms[i][j][k] = scalloc( workspace->realloc.gcell_atoms, sizeof(int),
-                                "Reallocate::g->atoms[i][j][k]" );
+                                __FILE__, __LINE__ );
                 }
             }
         }
diff --git a/sPuReMD/src/analyze.c b/sPuReMD/src/analyze.c
index 33e5e1cdf0644add0906051fd8de36fa5002b7bd..e75da430ac5a29e48ccab8d85e3fd5341660e009 100644
--- a/sPuReMD/src/analyze.c
+++ b/sPuReMD/src/analyze.c
@@ -1069,28 +1069,28 @@ void Analysis( reax_system *system, control_params *control,
 
     steps = data->step - data->prev_steps;
 
-    if ( steps == 1 )
+    /****** Molecular Analysis ******/
+    if ( control->molec_anal
+            && steps % control->freq_molec_anal == 0 )
     {
-        if ( lists[OLD_BONDS]->allocated == FALSE )
+        if ( steps == 1 )
         {
-            Make_List( lists[BONDS]->n, lists[BONDS]->n_max, lists[BONDS]->total_intrs,
-                    TYP_BOND, lists[OLD_BONDS] );
-        }
+            if ( lists[OLD_BONDS]->allocated == FALSE )
+            {
+                Make_List( lists[BONDS]->n, lists[BONDS]->n_max, lists[BONDS]->total_intrs,
+                        TYP_BOND, lists[OLD_BONDS] );
+            }
 
-        if ( control->molec_anal == REACTIONS )
-        {
-            Copy_Bond_List( system, control, lists );
-        }
-        if ( control->diffusion_coef )
-        {
-            Copy_Positions( system, workspace );
+            if ( control->molec_anal == REACTIONS )
+            {
+                Copy_Bond_List( system, control, lists );
+            }
+            if ( control->diffusion_coef )
+            {
+                Copy_Positions( system, workspace );
+            }
         }
-    }
 
-    /****** Molecular Analysis ******/
-    if ( control->molec_anal
-            && steps % control->freq_molec_anal == 0 )
-    {
         if ( control->molec_anal == FRAGMENTS )
         {
             /* discover molecules */
diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c
index 998357a81743362acc038fceddef5dcfb7f33b11..e3771f813dee00681fe33a14d69ad6411f61bd32 100644
--- a/sPuReMD/src/charges.c
+++ b/sPuReMD/src/charges.c
@@ -107,13 +107,13 @@ int is_refactoring_step( control_params * const control,
 #if defined(HAVE_TENSORFLOW)
 static void TF_Tensor_Deallocator( void* data, size_t length, void* arg )
 {
-//    sfree( data, "TF_Tensor_Deallocator::data" );
+//    sfree( data, __FILE__, __LINE__ );
 }
 
 
 static void TF_free( void* data, size_t length )
 {
-        sfree( data, "TF_free::data" );
+        sfree( data, __FILE__, __LINE__ );
 }
 
 
@@ -226,8 +226,8 @@ static void Predict_Charges_TF_LSTM( const reax_system * const system,
 //    batch_size = 3;
     win_size = control->cm_init_guess_win_size;
     batch_size = system->N_cm;
-    obs_flat = smalloc( sizeof(float) * batch_size * win_size, "Predict_Charges_TF_LSTM:obs_flat" );
-    obs_norm = smalloc( sizeof(float) * batch_size, "Predict_Charges_TF_LSTM:obs_norm" );
+    obs_flat = smalloc( sizeof(float) * batch_size * win_size, __FILE__, __LINE__ );
+    obs_norm = smalloc( sizeof(float) * batch_size, __FILE__, __LINE__ );
 
     /* load the frozen model from file in GraphDef format
      *
@@ -327,8 +327,8 @@ static void Predict_Charges_TF_LSTM( const reax_system * const system,
         workspace->s[0][i] = predictions[i] + obs_norm[i];
     }
 
-    sfree( obs_norm, "Predict_Charges_TF_LSTM:obs_norm" );
-    sfree( obs_flat, "Predict_Charges_TF_LSTM:obs_flat" );
+    sfree( obs_norm, __FILE__, __LINE__ );
+    sfree( obs_flat, __FILE__, __LINE__ );
     TF_DeleteTensor( input_tensor[0] );
     TF_DeleteTensor( output_tensor[0] );
     TF_DeleteSession( s.session, status );
@@ -930,13 +930,12 @@ static void Setup_Preconditioner_QEq( const reax_system * const system,
             if ( workspace->Hdia_inv == NULL )
             {
                 workspace->Hdia_inv = scalloc( Hptr->n_max, sizeof( real ),
-                        "Setup_Preconditioner_QEq::workspace->Hdia_inv" );
+                        __FILE__, __LINE__ );
             }
             else if ( realloc == TRUE )
             {
                 workspace->Hdia_inv = srealloc( workspace->Hdia_inv,
-                        sizeof( real ) * Hptr->n_max,
-                        "Setup_Preconditioner_QEq::workspace->Hdia_inv" );
+                        sizeof( real ) * Hptr->n_max, __FILE__, __LINE__ );
             }
             break;
 
@@ -950,7 +949,8 @@ static void Setup_Preconditioner_QEq( const reax_system * const system,
                 Allocate_Matrix( &workspace->L, Hptr->n, Hptr->n_max, fillin );
                 Allocate_Matrix( &workspace->U, Hptr->n, Hptr->n_max, fillin );
             }
-            else if ( workspace->L.m < fillin || realloc == TRUE )
+            else if ( workspace->L.m < fillin || workspace->L.n_max < system->N_cm_max
+                    || realloc == TRUE )
             {
                 Deallocate_Matrix( &workspace->L );
                 Deallocate_Matrix( &workspace->U );
@@ -971,7 +971,8 @@ static void Setup_Preconditioner_QEq( const reax_system * const system,
                 Allocate_Matrix( &workspace->L, Hptr->n, Hptr->n_max, Hptr->m );
                 Allocate_Matrix( &workspace->U, Hptr->n, Hptr->n_max, Hptr->m );
             }
-            else if ( workspace->L.m < Hptr->m || realloc == TRUE )
+            else if ( workspace->L.m < Hptr->m || workspace->L.n_max < system->N_cm_max
+                    || realloc == TRUE )
             {
                 Deallocate_Matrix( &workspace->L );
                 Deallocate_Matrix( &workspace->U );
@@ -992,7 +993,8 @@ static void Setup_Preconditioner_QEq( const reax_system * const system,
                 Allocate_Matrix( &workspace->L, Hptr->n, Hptr->n_max, Hptr->m );
                 Allocate_Matrix( &workspace->U, Hptr->n, Hptr->n_max, Hptr->m );
             }
-            else if ( workspace->L.m < Hptr->m || realloc == TRUE )
+            else if ( workspace->L.m < Hptr->m || workspace->L.n_max < system->N_cm_max
+                    || realloc == TRUE )
             {
                 Deallocate_Matrix( &workspace->L );
                 Deallocate_Matrix( &workspace->U );
@@ -1056,13 +1058,12 @@ static void Setup_Preconditioner_EE( const reax_system * const system,
             if ( workspace->Hdia_inv == NULL )
             {
                 workspace->Hdia_inv = scalloc( Hptr->n_max, sizeof( real ),
-                        "Setup_Preconditioner_EE::workspace->Hdiv_inv" );
+                        __FILE__, __LINE__ );
             }
             else if ( realloc == TRUE )
             {
                 workspace->Hdia_inv = srealloc( workspace->Hdia_inv,
-                        sizeof( real ) * Hptr->n_max,
-                        "Setup_Preconditioner_EE::workspace->Hdiv_inv" );
+                        sizeof( real ) * Hptr->n_max, __FILE__, __LINE__ );
             }
             break;
 
@@ -1088,7 +1089,8 @@ static void Setup_Preconditioner_EE( const reax_system * const system,
                 Allocate_Matrix( &workspace->L, Hptr->n, Hptr->n_max, Hptr->m );
                 Allocate_Matrix( &workspace->U, Hptr->n, Hptr->n_max, Hptr->m );
             }
-            else if ( workspace->L.m < Hptr->m || realloc == TRUE )
+            else if ( workspace->L.m < Hptr->m || workspace->L.n_max < system->N_cm_max
+                    || realloc == TRUE )
             {
                 Deallocate_Matrix( &workspace->L );
                 Deallocate_Matrix( &workspace->U );
@@ -1115,7 +1117,8 @@ static void Setup_Preconditioner_EE( const reax_system * const system,
                 Allocate_Matrix( &workspace->L, Hptr->n, Hptr->n_max, Hptr->m );
                 Allocate_Matrix( &workspace->U, Hptr->n, Hptr->n_max, Hptr->m );
             }
-            else if ( workspace->L.m < Hptr->m || realloc == TRUE )
+            else if ( workspace->L.m < Hptr->m || workspace->L.n_max < system->N_cm_max
+                    || realloc == TRUE )
             {
                 Deallocate_Matrix( &workspace->L );
                 Deallocate_Matrix( &workspace->U );
@@ -1179,13 +1182,12 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system,
             if ( workspace->Hdia_inv == NULL )
             {
                 workspace->Hdia_inv = scalloc( Hptr->n_max, sizeof( real ),
-                        "Setup_Preconditioner_ACKS2::workspace->Hdiv_inv" );
+                        __FILE__, __LINE__ );
             }
             else if ( realloc == TRUE )
             {
                 workspace->Hdia_inv = srealloc( workspace->Hdia_inv,
-                        sizeof( real ) * Hptr->n_max,
-                        "Setup_Preconditioner_ACKS2::workspace->Hdiv_inv" );
+                        sizeof( real ) * Hptr->n_max, __FILE__, __LINE__ );
             }
             break;
 
@@ -1213,7 +1215,8 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system,
                 Allocate_Matrix( &workspace->L, Hptr->n, Hptr->n_max, Hptr->m );
                 Allocate_Matrix( &workspace->U, Hptr->n, Hptr->n_max, Hptr->m );
             }
-            else if ( workspace->L.m < Hptr->m || realloc == TRUE )
+            else if ( workspace->L.m < Hptr->m || workspace->L.n_max < system->N_cm_max
+                    || realloc == TRUE )
             {
                 Deallocate_Matrix( &workspace->L );
                 Deallocate_Matrix( &workspace->U );
@@ -1242,7 +1245,8 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system,
                 Allocate_Matrix( &workspace->L, Hptr->n, Hptr->n_max, Hptr->m );
                 Allocate_Matrix( &workspace->U, Hptr->n, Hptr->n_max, Hptr->m );
             }
-            else if ( workspace->L.m < Hptr->m || realloc == TRUE )
+            else if ( workspace->L.m < Hptr->m || workspace->L.n_max < system->N_cm_max
+                    || realloc == TRUE )
             {
                 Deallocate_Matrix( &workspace->L );
                 Deallocate_Matrix( &workspace->U );
@@ -1508,19 +1512,10 @@ static void EE( reax_system * const system, control_params * const control,
     }
 
 #if defined(QMMM)
-    for ( int i = 0; i < system->N_qm; ++i )
-    {
-        workspace->mask_qmmm[i] = system->atoms[i].qmmm_mask;
-    }
     for ( int i = system->N_qm; i < system->N; ++i )
     {
         workspace->s[0][i] = system->atoms[i].q_init;
-        workspace->mask_qmmm[i] = system->atoms[i].qmmm_mask;
     }
-    workspace->mask_qmmm[system->N_cm - 1] = 1;
-
-    /* Mask the b vector as well */
-    Vector_Mask_qmmm( workspace->b_s, workspace->mask_qmmm, system->N_cm );
 #endif
 
     switch ( control->cm_solver_type )
@@ -1627,33 +1622,19 @@ static void ACKS2( reax_system * const system, control_params * const control,
     if ( data->step % 10 == 0 )
     {
         snprintf( fname, SIZE, "s_%d_%s.out", data->step, control->sim_name );
-        fp = sfopen( fname, "w" );
+        fp = sfopen( fname, "w", __FILE__, __LINE__ );
         Vector_Print( fp, NULL, workspace->s[0], system->N_cm );
-        sfclose( fp, "ACKS2::fp" );
+        sfclose( fp, __FILE__, __LINE__ );
     }
 #undef SIZE
 #endif
 
 #if defined(QMMM)
     /* TODO: further testing needed for QM/MM mode with ACKS2 */
-    for ( int i = 0; i < system->N_qm; ++i )
-    {
-        workspace->mask_qmmm[i] = system->atoms[i].qmmm_mask;
-    }
     for ( int i = system->N_qm; i < system->N; ++i )
     {
         workspace->s[0][i] = system->atoms[i].q_init;
-        workspace->mask_qmmm[i] = system->atoms[i].qmmm_mask;
-    }
-    for ( int i = system->N; i < 2 * system->N; ++i )
-    {
-        workspace->mask_qmmm[i] = system->atoms[i - system->N].qmmm_mask;
     }
-    workspace->mask_qmmm[2 * system->N] = 1;
-    workspace->mask_qmmm[2 * system->N + 1] = 1;
-
-    /* Mask the b vector as well */
-    Vector_Mask_qmmm( workspace->b_s, workspace->mask_qmmm, system->N_cm );
 #endif
 
     switch ( control->cm_solver_type )
@@ -1712,14 +1693,14 @@ void Compute_Charges( reax_system * const system, control_params * const control
 //        Print_Sparse_Matrix_Binary( workspace->H, fname );
 
         snprintf( fname, SIZE, "b_s_%d_%s.out", data->step, control->sim_name );
-        fp = sfopen( fname, "w" );
+        fp = sfopen( fname, "w", __FILE__, __LINE__ );
         Vector_Print( fp, NULL, workspace->b_s, system->N_cm );
-        sfclose( fp, "Compute_Charges::fp" );
+        sfclose( fp, __FILE__, __LINE__ );
 
 //        snprintf( fname, SIZE, "b_t_%d_%s.out", data->step, control->sim_name );
-//        fp = sfopen( fname, "w" );
+//        fp = sfopen( fname, "w", __FILE__, __LINE__ );
 //        Vector_Print( fp, NULL, workspace->b_t, system->N_cm );
-//        sfclose( fp, "Compute_Charges::fp" );
+//        sfclose( fp, __FILE__, __LINE__ );
     }
 #undef SIZE
 #endif
diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c
index c46775a4261b8739a62a0a49c8139db44371b125..8803e10bac2f2604f23103512814c4bc90426d09 100644
--- a/sPuReMD/src/control.c
+++ b/sPuReMD/src/control.c
@@ -562,17 +562,17 @@ void Read_Control_File( const char * const control_file, reax_system * const sys
     int c, i, ret;
     FILE *fp;
 
-    fp = sfopen( control_file, "r" );
+    fp = sfopen( control_file, "r", __FILE__, __LINE__ );
 
     assert( fp != NULL );
 
     if ( fp != NULL )
     {
-        s = smalloc( sizeof(char) * MAX_LINE, "Read_Control_File::s" );
-        tmp = smalloc( sizeof(char*) * MAX_TOKENS, "Read_Control_File::tmp" );
+        s = smalloc( sizeof(char) * MAX_LINE, __FILE__, __LINE__ );
+        tmp = smalloc( sizeof(char*) * MAX_TOKENS, __FILE__, __LINE__ );
         for ( i = 0; i < MAX_TOKENS; i++ )
         {
-            tmp[i] = smalloc( sizeof(char) * MAX_LINE, "Read_Control_File::tmp[i]" );
+            tmp[i] = smalloc( sizeof(char) * MAX_LINE, __FILE__, __LINE__ );
         }
 
         /* read control parameters file */
@@ -601,13 +601,13 @@ void Read_Control_File( const char * const control_file, reax_system * const sys
 
         for ( i = 0; i < MAX_TOKENS; i++ )
         {
-            sfree( tmp[i], "Read_Control_File::tmp[i]" );
+            sfree( tmp[i], __FILE__, __LINE__ );
         }
-        sfree( tmp, "Read_Control_File::tmp" );
-        sfree( s, "Read_Control_File::s" );
+        sfree( tmp, __FILE__, __LINE__ );
+        sfree( s, __FILE__, __LINE__ );
     }
 
-    sfclose( fp, "Read_Control_File::fp" );
+    sfclose( fp, __FILE__, __LINE__ );
 }
 
 
diff --git a/sPuReMD/src/ffield.c b/sPuReMD/src/ffield.c
index 39d78b2026a56c217ca3ab32dc55e8bd93f09f89..b611538d4bc9e366da75d35d2e139f0fbae32151 100644
--- a/sPuReMD/src/ffield.c
+++ b/sPuReMD/src/ffield.c
@@ -36,17 +36,17 @@ void Read_Force_Field( const char * const ffield_file,
     real val;
     FILE *fp;
 
-    fp = sfopen( ffield_file, "r" );
+    fp = sfopen( ffield_file, "r", __FILE__, __LINE__ );
 
     assert( fp != NULL );
 
     if ( fp != NULL )
     {
-        s = smalloc( sizeof(char) * MAX_LINE, "Read_Force_Field::s" );
-        tmp = smalloc( sizeof(char*) * MAX_TOKENS, "Read_Force_Field::tmp" );
+        s = smalloc( sizeof(char) * MAX_LINE, __FILE__, __LINE__ );
+        tmp = smalloc( sizeof(char*) * MAX_TOKENS, __FILE__, __LINE__ );
         for ( i = 0; i < MAX_TOKENS; i++ )
         {
-            tmp[i] = smalloc( sizeof(char) * MAX_TOKEN_LEN, "Read_Force_Field::tmp[i]" );
+            tmp[i] = smalloc( sizeof(char) * MAX_TOKEN_LEN, __FILE__, __LINE__ );
         }
 
         /* reading first header comment */
@@ -66,15 +66,13 @@ void Read_Force_Field( const char * const ffield_file,
 
         if ( system->ffield_params_allocated == FALSE )
         {
-            reax->gp.l = (real*) smalloc( sizeof(real) * n,
-                   "Read_Force_Field::reax->gp-l" );
+            reax->gp.l = (real*) smalloc( sizeof(real) * n, __FILE__, __LINE__ );
 
             reax->gp.max_n_global = n;
         }
         else if ( reax->gp.max_n_global < n )
         {
-            reax->gp.l = (real*) srealloc( reax->gp.l, sizeof(real) * n,
-                   "Read_Force_Field::reax->gp-l" );
+            reax->gp.l = srealloc( reax->gp.l, sizeof(real) * n, __FILE__, __LINE__ );
 
             reax->gp.max_n_global = n;
         }
@@ -106,41 +104,28 @@ void Read_Force_Field( const char * const ffield_file,
             system->ffield_params_allocated = TRUE;
 
             /* Allocating structures in reax_interaction */
-            reax->sbp = (single_body_parameters*) scalloc( n, sizeof(single_body_parameters),
-                    "Read_Force_Field::reax->sbp" );
-            reax->tbp = (two_body_parameters**) scalloc( n, sizeof(two_body_parameters*),
-                    "Read_Force_Field::reax->tbp" );
-            reax->thbp = (three_body_header***) scalloc( n, sizeof(three_body_header**),
-                    "Read_Force_Field::reax->thbp" );
-            reax->hbp = (hbond_parameters***) scalloc( n, sizeof(hbond_parameters**),
-                    "Read_Force_Field::reax->hbp" );
-            reax->fbp = (four_body_header****) scalloc( n, sizeof(four_body_header***),
-                    "Read_Force_Field::reax->fbp" );
+            reax->sbp = scalloc( n, sizeof(single_body_parameters), __FILE__, __LINE__ );
+            reax->tbp = scalloc( n, sizeof(two_body_parameters*), __FILE__, __LINE__ );
+            reax->thbp = scalloc( n, sizeof(three_body_header**), __FILE__, __LINE__ );
+            reax->hbp = scalloc( n, sizeof(hbond_parameters**), __FILE__, __LINE__ );
+            reax->fbp = scalloc( n, sizeof(four_body_header***), __FILE__, __LINE__ );
 
             for ( i = 0; i < n; i++ )
             {
-                reax->tbp[i] = (two_body_parameters*) scalloc( n, sizeof(two_body_parameters),
-                        "Read_Force_Field::reax->tbp[i]" );
-                reax->thbp[i] = (three_body_header**) scalloc( n, sizeof(three_body_header*),
-                        "Read_Force_Field::reax->thbp[i]" );
-                reax->hbp[i] = (hbond_parameters**) scalloc( n, sizeof(hbond_parameters*),
-                        "Read_Force_Field::reax->hbp[i]" );
-                reax->fbp[i] = (four_body_header***) scalloc( n, sizeof(four_body_header**),
-                        "Read_Force_Field::reax->fbp[i]" );
+                reax->tbp[i] = scalloc( n, sizeof(two_body_parameters), __FILE__, __LINE__ );
+                reax->thbp[i] = scalloc( n, sizeof(three_body_header*), __FILE__, __LINE__ );
+                reax->hbp[i] = scalloc( n, sizeof(hbond_parameters*), __FILE__, __LINE__ );
+                reax->fbp[i] = scalloc( n, sizeof(four_body_header**), __FILE__, __LINE__ );
 
                 for ( j = 0; j < n; j++ )
                 {
-                    reax->thbp[i][j] = (three_body_header*) scalloc( n, sizeof(three_body_header),
-                            "Read_Force_Field::reax->thbp[i][j]" );
-                    reax->hbp[i][j] = (hbond_parameters*) scalloc( n, sizeof(hbond_parameters),
-                            "Read_Force_Field::reax->hbp[i][j]" );
-                    reax->fbp[i][j] = (four_body_header**) scalloc( n, sizeof(four_body_header*),
-                            "Read_Force_Field::reax->fbp[i][j]" );
+                    reax->thbp[i][j] = scalloc( n, sizeof(three_body_header), __FILE__, __LINE__ );
+                    reax->hbp[i][j] = scalloc( n, sizeof(hbond_parameters), __FILE__, __LINE__ );
+                    reax->fbp[i][j] = scalloc( n, sizeof(four_body_header*), __FILE__, __LINE__ );
 
                     for ( k = 0; k < n; k++ )
                     {
-                        reax->fbp[i][j][k] = (four_body_header*) scalloc( n, sizeof(four_body_header),
-                                "Read_Force_Field::reax->fbp[i][j][k]" );
+                        reax->fbp[i][j][k] = scalloc( n, sizeof(four_body_header), __FILE__, __LINE__ );
                     }
                 }
             }
@@ -152,66 +137,53 @@ void Read_Force_Field( const char * const ffield_file,
             for ( i = 0; i < reax->max_num_atom_types; i++ )
                 for ( j = 0; j < reax->max_num_atom_types; j++ )
                     for ( k = 0; k < reax->max_num_atom_types; k++ )
-                        sfree( reax->fbp[i][j][k], "Finalize_System::reax->fbp[i][j][k]" );
+                        sfree( reax->fbp[i][j][k], __FILE__, __LINE__ );
 
             for ( i = 0; i < reax->max_num_atom_types; i++ )
                 for ( j = 0; j < reax->max_num_atom_types; j++ )
                 {
-                    sfree( reax->thbp[i][j], "Finalize_System::reax->thbp[i][j]" );
-                    sfree( reax->hbp[i][j], "Finalize_System::reax->hbp[i][j]" );
-                    sfree( reax->fbp[i][j], "Finalize_System::reax->fbp[i][j]" );
+                    sfree( reax->thbp[i][j], __FILE__, __LINE__ );
+                    sfree( reax->hbp[i][j], __FILE__, __LINE__ );
+                    sfree( reax->fbp[i][j], __FILE__, __LINE__ );
                 }
 
             for ( i = 0; i < reax->max_num_atom_types; i++ )
             {
-                sfree( reax->tbp[i], "Finalize_System::reax->tbp[i]" );
-                sfree( reax->thbp[i], "Finalize_System::reax->thbp[i]" );
-                sfree( reax->hbp[i], "Finalize_System::reax->hbp[i]" );
-                sfree( reax->fbp[i], "Finalize_System::reax->fbp[i]" );
+                sfree( reax->tbp[i], __FILE__, __LINE__ );
+                sfree( reax->thbp[i], __FILE__, __LINE__ );
+                sfree( reax->hbp[i], __FILE__, __LINE__ );
+                sfree( reax->fbp[i], __FILE__, __LINE__ );
             }
 
-            sfree( reax->sbp, "Finalize_System::reax->sbp" );
-            sfree( reax->tbp, "Finalize_System::reax->tbp" );
-            sfree( reax->thbp, "Finalize_System::reax->thbp" );
-            sfree( reax->hbp, "Finalize_System::reax->hbp" );
-            sfree( reax->fbp, "Finalize_System::reax->fbp" );
+            sfree( reax->sbp, __FILE__, __LINE__ );
+            sfree( reax->tbp, __FILE__, __LINE__ );
+            sfree( reax->thbp, __FILE__, __LINE__ );
+            sfree( reax->hbp, __FILE__, __LINE__ );
+            sfree( reax->fbp, __FILE__, __LINE__ );
 
             /* Allocating structures in reax_interaction */
-            reax->sbp = (single_body_parameters*) scalloc( n, sizeof(single_body_parameters),
-                    "Read_Force_Field::reax->sbp" );
-            reax->tbp = (two_body_parameters**) scalloc( n, sizeof(two_body_parameters*),
-                    "Read_Force_Field::reax->tbp" );
-            reax->thbp = (three_body_header***) scalloc( n, sizeof(three_body_header**),
-                    "Read_Force_Field::reax->thbp" );
-            reax->hbp = (hbond_parameters***) scalloc( n, sizeof(hbond_parameters**),
-                    "Read_Force_Field::reax->hbp" );
-            reax->fbp = (four_body_header****) scalloc( n, sizeof(four_body_header***),
-                    "Read_Force_Field::reax->fbp" );
+            reax->sbp = scalloc( n, sizeof(single_body_parameters), __FILE__, __LINE__ );
+            reax->tbp = scalloc( n, sizeof(two_body_parameters*), __FILE__, __LINE__ );
+            reax->thbp = scalloc( n, sizeof(three_body_header**), __FILE__, __LINE__ );
+            reax->hbp = scalloc( n, sizeof(hbond_parameters**), __FILE__, __LINE__ );
+            reax->fbp = scalloc( n, sizeof(four_body_header***), __FILE__, __LINE__ );
 
             for ( i = 0; i < n; i++ )
             {
-                reax->tbp[i] = (two_body_parameters*) scalloc( n, sizeof(two_body_parameters),
-                        "Read_Force_Field::reax->tbp[i]" );
-                reax->thbp[i] = (three_body_header**) scalloc( n, sizeof(three_body_header*),
-                        "Read_Force_Field::reax->thbp[i]" );
-                reax->hbp[i] = (hbond_parameters**) scalloc( n, sizeof(hbond_parameters*),
-                        "Read_Force_Field::reax->hbp[i]" );
-                reax->fbp[i] = (four_body_header***) scalloc( n, sizeof(four_body_header**),
-                        "Read_Force_Field::reax->fbp[i]" );
+                reax->tbp[i] = scalloc( n, sizeof(two_body_parameters), __FILE__, __LINE__ );
+                reax->thbp[i] = scalloc( n, sizeof(three_body_header*), __FILE__, __LINE__ );
+                reax->hbp[i] = scalloc( n, sizeof(hbond_parameters*), __FILE__, __LINE__ );
+                reax->fbp[i] = scalloc( n, sizeof(four_body_header**), __FILE__, __LINE__ );
 
                 for ( j = 0; j < n; j++ )
                 {
-                    reax->thbp[i][j] = (three_body_header*) scalloc( n, sizeof(three_body_header),
-                            "Read_Force_Field::reax->thbp[i][j]" );
-                    reax->hbp[i][j] = (hbond_parameters*) scalloc( n, sizeof(hbond_parameters),
-                            "Read_Force_Field::reax->hbp[i][j]" );
-                    reax->fbp[i][j] = (four_body_header**) scalloc( n, sizeof(four_body_header*),
-                            "Read_Force_Field::reax->fbp[i][j]" );
+                    reax->thbp[i][j] = scalloc( n, sizeof(three_body_header), __FILE__, __LINE__ );
+                    reax->hbp[i][j] = scalloc( n, sizeof(hbond_parameters), __FILE__, __LINE__ );
+                    reax->fbp[i][j] = scalloc( n, sizeof(four_body_header*), __FILE__, __LINE__ );
 
                     for ( k = 0; k < n; k++ )
                     {
-                        reax->fbp[i][j][k] = (four_body_header*) scalloc( n, sizeof(four_body_header),
-                                "Read_Force_Field::reax->fbp[i][j][k]" );
+                        reax->fbp[i][j][k] = scalloc( n, sizeof(four_body_header), __FILE__, __LINE__ );
                     }
                 }
             }
@@ -219,23 +191,19 @@ void Read_Force_Field( const char * const ffield_file,
             reax->max_num_atom_types = n;
         }
 
-        tor_flag  = (char****) smalloc( n * sizeof(char***),
-                "Read_Force_Field::tor_flag" );
+        tor_flag = smalloc( n * sizeof(char***), __FILE__, __LINE__ );
 
         for ( i = 0; i < n; i++ )
         {
-            tor_flag[i] = (char***) smalloc( n * sizeof(char**),
-                    "Read_Force_Field::tor_flag[i]" );
+            tor_flag[i] = smalloc( n * sizeof(char**), __FILE__, __LINE__ );
 
             for ( j = 0; j < n; j++ )
             {
-                tor_flag[i][j]  = (char**) smalloc( n * sizeof(char*),
-                        "Read_Force_Field::tor_flag[i][j]" );
+                tor_flag[i][j] = smalloc( n * sizeof(char*), __FILE__, __LINE__ );
 
                 for ( k = 0; k < n; k++ )
                 {
-                    tor_flag[i][j][k]  = (char*) smalloc( n * sizeof(char),
-                            "Read_Force_Field::tor_flag[i][j][k]" );
+                    tor_flag[i][j][k] = smalloc( n * sizeof(char), __FILE__, __LINE__ );
                 }
             }
         }
@@ -847,10 +815,10 @@ void Read_Force_Field( const char * const ffield_file,
         /* deallocate helper storage */
         for ( i = 0; i < MAX_TOKENS; i++ )
         {
-            sfree( tmp[i], "Read_Force_Field::tmp[i]" );
+            sfree( tmp[i], __FILE__, __LINE__ );
         }
-        sfree( tmp, "Read_Force_Field::tmp" );
-        sfree( s, "Read_Force_Field::s" );
+        sfree( tmp, __FILE__, __LINE__ );
+        sfree( s, __FILE__, __LINE__ );
 
         /* deallocate tor_flag */
         for ( i = 0; i < reax->num_atom_types; i++ )
@@ -859,17 +827,17 @@ void Read_Force_Field( const char * const ffield_file,
             {
                 for ( k = 0; k < reax->num_atom_types; k++ )
                 {
-                    sfree( tor_flag[i][j][k], "Read_Force_Field::tor_flag[i][j][k]" );
+                    sfree( tor_flag[i][j][k], __FILE__, __LINE__ );
                 }
 
-                sfree( tor_flag[i][j], "Read_Force_Field::tor_flag[i][j]" );
+                sfree( tor_flag[i][j], __FILE__, __LINE__ );
             }
 
-            sfree( tor_flag[i], "Read_Force_Field::tor_flag[i]" );
+            sfree( tor_flag[i], __FILE__, __LINE__ );
         }
 
-        sfree( tor_flag, "Read_Force_Field::tor_flag" );
+        sfree( tor_flag, __FILE__, __LINE__ );
     }
 
-    sfclose( fp, "Read_Force_Field::fp" );
+    sfclose( fp, __FILE__, __LINE__ );
 }
diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c
index 4b42eaa0bf677acd230ee6e337445aee275ed382..dd59f91291436f0aaa1b553a387a1c409b79d774 100644
--- a/sPuReMD/src/forces.c
+++ b/sPuReMD/src/forces.c
@@ -424,8 +424,8 @@ static inline real Init_Charge_Matrix_Entry_Tab( reax_system *system,
 }
 
 
-static inline real Init_Charge_Matrix_Entry( reax_system *system,
-        control_params *control, static_storage *workspace,
+static inline real Init_Charge_Matrix_Entry( reax_system const * const system,
+        control_params const * const control, static_storage const * const workspace,
         int i, int j, real r_ij, MATRIX_ENTRY_POSITION pos )
 {
     real Tap, dr3gamij_1, dr3gamij_3, ret;
@@ -479,10 +479,10 @@ static inline real Init_Charge_Matrix_Entry( reax_system *system,
 }
 
 
-static void Init_Charge_Matrix_Remaining_Entries( reax_system *system,
-        control_params *control, reax_list *far_nbr_list,
-        sparse_matrix * H, sparse_matrix * H_sp,
-        int * Htop, int * H_sp_top )
+static void Init_Charge_Matrix_Remaining_Entries( reax_system const * const system,
+        control_params const * const control, reax_list const * const far_nbr_list,
+        sparse_matrix * const H, sparse_matrix * const H_sp,
+        int * const Htop, int * const H_sp_top )
 {
     int i, j, pj, target, val_flag;
     real d, xcut, bond_softness, * X_diag;
@@ -495,46 +495,27 @@ static void Init_Charge_Matrix_Remaining_Entries( reax_system *system,
         case EE_CM:
             if ( system->num_molec_charge_constraints == 0 )
             {
-                H->start[system->N_cm - 1] = *Htop;
-                H_sp->start[system->N_cm - 1] = *H_sp_top;
+                H->start[system->N] = *Htop;
+                H_sp->start[system->N] = *H_sp_top;
 
-                for ( i = 0; i < system->N_cm - 1; ++i )
+                for ( i = 0; i < system->N; ++i )
                 {
-#if defined(QMMM)
-                    /* total charge constraint on QM atoms */
-                    if ( system->atoms[i].qmmm_mask == TRUE )
-                    {
-                        H->j[*Htop] = i;
-                        H->val[*Htop] = 1.0;
-
-                        H_sp->j[*H_sp_top] = i;
-                        H_sp->val[*H_sp_top] = 1.0;
-                    }
-                    else
-                    {
-                        H->j[*Htop] = i;
-                        H->val[*Htop] = 0.0; 
-
-                        H_sp->j[*H_sp_top] = i;
-                        H_sp->val[*H_sp_top] = 0.0;
-                    }
-#else
+                    /* total charge constraint on atoms */
                     H->j[*Htop] = i;
                     H->val[*Htop] = 1.0;
 
                     H_sp->j[*H_sp_top] = i;
                     H_sp->val[*H_sp_top] = 1.0;
-#endif
 
                     *Htop = *Htop + 1;
                     *H_sp_top = *H_sp_top + 1;
                 }
 
-                H->j[*Htop] = system->N_cm - 1;
+                H->j[*Htop] = system->N;
                 H->val[*Htop] = 0.0;
                 *Htop = *Htop + 1;
 
-                H_sp->j[*H_sp_top] = system->N_cm - 1;
+                H_sp->j[*H_sp_top] = system->N;
                 H_sp->val[*H_sp_top] = 0.0;
                 *H_sp_top = *H_sp_top + 1;
             }
@@ -548,31 +529,12 @@ static void Init_Charge_Matrix_Remaining_Entries( reax_system *system,
                     for ( j = system->molec_charge_constraint_ranges[2 * i];
                             j <= system->molec_charge_constraint_ranges[2 * i + 1]; ++j )
                     {
-#if defined(QMMM)
-                        /* molecule charge constraint on QM atoms */
-                        if ( system->atoms[i].qmmm_mask == TRUE )
-                        {
-                            H->j[*Htop] = j - 1;
-                            H->val[*Htop] = 1.0;
-
-                            H_sp->j[*H_sp_top] = j - 1;
-                            H_sp->val[*H_sp_top] = 1.0;
-                        }
-                        else
-                        {
-                            H->j[*Htop] = j - 1;
-                            H->val[*Htop] = 0.0; 
-
-                            H_sp->j[*H_sp_top] = j - 1;
-                            H_sp->val[*H_sp_top] = 0.0;
-                        }
-#else
+                        /* molecule charge constraint on atoms */
                         H->j[*Htop] = j - 1;
                         H->val[*Htop] = 1.0;
 
                         H_sp->j[*H_sp_top] = j - 1;
                         H_sp->val[*H_sp_top] = 1.0;
-#endif
 
                         *Htop = *Htop + 1;
                         *H_sp_top = *H_sp_top + 1;
@@ -591,8 +553,7 @@ static void Init_Charge_Matrix_Remaining_Entries( reax_system *system,
             break;
 
         case ACKS2_CM:
-            X_diag = smalloc( sizeof(real) * system->N,
-                    "Init_Charge_Matrix_Remaining_Entries::X_diag" );
+            X_diag = smalloc( sizeof(real) * system->N, __FILE__, __LINE__ );
 
             for ( i = 0; i < system->N; ++i )
             {
@@ -760,7 +721,7 @@ static void Init_Charge_Matrix_Remaining_Entries( reax_system *system,
             H_sp->val[*H_sp_top] = 0.0;
             *H_sp_top = *H_sp_top + 1;
 
-            sfree( X_diag, "Init_Charge_Matrix_Remaining_Entries::X_diag" );
+            sfree( X_diag, __FILE__, __LINE__ );
             break;
 
         default:
@@ -769,131 +730,92 @@ static void Init_Charge_Matrix_Remaining_Entries( reax_system *system,
 }
 
 
-/* Generate bond list (full format), hydrogen bond list (full format),
- * and charge matrix (half symmetric format)
- * from the far neighbors list (with distance updates, if necessary)  */
-static void Init_Forces( reax_system *system, control_params *control,
-        simulation_data *data, static_storage *workspace,
-        reax_list **lists, output_controls *out_control )
+/* Compute the distances and displacement vectors for entries
+ * in the far neighbors list if it's a NOT re-neighboring step */
+static void Init_Distance( reax_system const * const system,
+        control_params const * const control, reax_list ** const lists )
+{
+    int i, j, pj;
+    int start_i, end_i;
+    reax_list *far_nbr_list;
+
+    far_nbr_list = lists[FAR_NBRS];
+
+    for ( i = 0; i < system->N; ++i )
+    {
+        start_i = Start_Index( i, far_nbr_list );
+        end_i = End_Index( i, far_nbr_list );
+
+        /* update distance and displacement vector between atoms i and j (i-j)
+         * for the j atom entry in the far nbr list */
+        for ( pj = start_i; pj < end_i; ++pj )
+        {
+            j = far_nbr_list->far_nbr_list[pj].nbr;
+
+            far_nbr_list->far_nbr_list[pj].d = control->compute_atom_distance(
+                    &system->box, system->atoms[i].x, system->atoms[j].x,
+                    system->atoms[i].rel_map, system->atoms[j].rel_map,
+                    far_nbr_list->far_nbr_list[pj].rel_box,
+                    far_nbr_list->far_nbr_list[pj].dvec );
+        }
+    }
+}
+
+
+/* Compute the charge matrix entries and store the matrix in half format
+ * using the far neighbors list (stored in half format)
+ */
+static void Init_CM_Half( reax_system const * const system,
+        control_params const * const control,
+        static_storage * const workspace, reax_list ** const lists )
 {
     int i, j, pj, target;
     int start_i, end_i;
-    int type_i, type_j;
-    int Htop, H_sp_top, btop_i, btop_j, num_bonds, num_hbonds;
-    int ihb, jhb, ihb_top, jhb_top;
-    int flag, flag_sp, val_flag, renbr;
-    real r_ij, r2, val;
-    real C12, C34, C56;
-    real Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2;
-    real BO, BO_s, BO_pi, BO_pi2;
+    int Htop, H_sp_top;
+    int flag, flag_sp, val_flag;
+    real val;
     sparse_matrix *H, *H_sp;
-    reax_list *far_nbrs, *bonds, *hbonds;
-    single_body_parameters *sbp_i, *sbp_j;
-    two_body_parameters *twbp;
-    far_neighbor_data *nbr_pj;
-    reax_atom *atom_i, *atom_j;
-    bond_data *ibond, *jbond;
-    bond_order_data *bo_ij, *bo_ji;
+    reax_list *far_nbr_list;
 
-    far_nbrs = lists[FAR_NBRS];
-    bonds = lists[BONDS];
-    hbonds = lists[HBONDS];
+    far_nbr_list = lists[FAR_NBRS];
     H = &workspace->H;
     H_sp = &workspace->H_sp;
     Htop = 0;
     H_sp_top = 0;
-    num_bonds = 0;
-    num_hbonds = 0;
-    btop_i = 0;
-    btop_j = 0;
-    renbr = ((data->step - data->prev_steps) % control->reneighbor) == 0 ? TRUE : FALSE;
 
     for ( i = 0; i < system->N; ++i )
     {
-        atom_i = &system->atoms[i];
-        type_i = atom_i->type;
-        start_i = Start_Index( i, far_nbrs );
-        end_i = End_Index( i, far_nbrs );
+        start_i = Start_Index( i, far_nbr_list );
+        end_i = End_Index( i, far_nbr_list );
         H->start[i] = Htop;
         H_sp->start[i] = H_sp_top;
-        btop_i = End_Index( i, bonds );
-        sbp_i = &system->reax_param.sbp[type_i];
-
-        if ( control->hbond_cut > 0.0 )
-        {
-            ihb = sbp_i->p_hbond;
-
-            if ( ihb == H_ATOM )
-            {
-                ihb_top = End_Index( workspace->hbond_index[i], hbonds );
-            }
-            else
-            {
-                ihb_top = -1;
-            }
-        }
-        else
-        {
-            ihb = NON_H_BONDING_ATOM;
-            ihb_top = -1;
-        }
 
         for ( pj = start_i; pj < end_i; ++pj )
         {
-            nbr_pj = &far_nbrs->far_nbr_list[pj];
-            j = nbr_pj->nbr;
+            j = far_nbr_list->far_nbr_list[pj].nbr;
             flag = FALSE;
             flag_sp = FALSE;
 
 #if defined(QMMM)
-            if ( system->atoms[i].qmmm_mask == TRUE
-                    || system->atoms[j].qmmm_mask == TRUE )
-            {
+//            if ( system->atoms[i].qmmm_mask == TRUE
+//                    || system->atoms[j].qmmm_mask == TRUE )
+//            {
 #endif	
-            /* check if reneighboring step --
-             * atomic distances just computed via
-             * Verlet list, so use current distances */
-            if ( renbr == TRUE )
+            if ( far_nbr_list->far_nbr_list[pj].d <= control->nonb_cut )
             {
-                if ( nbr_pj->d <= control->nonb_cut )
-                {
-                    flag = TRUE;
+                flag = TRUE;
 
-                    if ( nbr_pj->d <= control->nonb_sp_cut )
-                    {
-                        flag_sp = TRUE;
-                    }
-                }
-            }
-            /* update atomic distances */
-            else
-            {
-                atom_j = &system->atoms[j];
-                nbr_pj->d = control->compute_atom_distance( &system->box,
-                        atom_i->x, atom_j->x, atom_i->rel_map,
-                        atom_j->rel_map, nbr_pj->rel_box,
-                        nbr_pj->dvec );
-
-                if ( nbr_pj->d <= control->nonb_cut )
+                if ( far_nbr_list->far_nbr_list[pj].d <= control->nonb_sp_cut )
                 {
-                    flag = TRUE;
-
-                    if ( nbr_pj->d <= control->nonb_sp_cut )
-                    {
-                        flag_sp = TRUE;
-                    }
+                    flag_sp = TRUE;
                 }
             }
 
             if ( flag == TRUE )
             {
-                type_j = system->atoms[j].type;
-                sbp_j = &system->reax_param.sbp[type_j];
-                twbp = &system->reax_param.tbp[type_i][type_j];
-                r_ij = nbr_pj->d;
-
                 val = Init_Charge_Matrix_Entry( system, control,
-                            workspace, i, j, r_ij, OFF_DIAGONAL );
+                            workspace, i, j, far_nbr_list->far_nbr_list[pj].d,
+                            OFF_DIAGONAL );
                 val_flag = FALSE;
 
                 for ( target = H->start[i]; target < Htop; ++target )
@@ -935,188 +857,349 @@ static void Init_Forces( reax_system *system, control_params *control,
                         ++H_sp_top;
                     }
                 }
+            }
+#if defined(QMMM)
+//            }
+#endif
+        }
+
+        /* diagonal entry */
+        H->j[Htop] = i;
+        H->val[Htop] = Init_Charge_Matrix_Entry( system, control,
+                workspace, i, i, far_nbr_list->far_nbr_list[pj].d, DIAGONAL );
+        ++Htop;
+
+        H_sp->j[H_sp_top] = i;
+        H_sp->val[H_sp_top] = H->val[Htop - 1];
+        ++H_sp_top;
+    }
+
+    Init_Charge_Matrix_Remaining_Entries( system, control, far_nbr_list,
+            H, H_sp, &Htop, &H_sp_top );
+
+    H->start[system->N_cm] = Htop;
+    H_sp->start[system->N_cm] = H_sp_top;
+}
+
+
+/* Compute entries of the bonds/hbonds lists and store the lists in full format
+ * using the far neighbors list (stored in full format) */
+static void Init_Bond_Full( reax_system const * const system,
+        control_params const * const control,
+        static_storage * const workspace, reax_list ** const lists,
+        int * const num_bonds, int * const num_hbonds )
+{
+    int i, j, pj;
+    int start_i, end_i;
+    int type_i, type_j;
+    int btop_i, btop_j;
+    int ihb, jhb, ihb_top, jhb_top;
+    real r_ij, r2;
+    real C12, C34, C56;
+    real Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2;
+    real BO, BO_s, BO_pi, BO_pi2;
+    reax_list *far_nbrs, *bonds, *hbonds;
+    single_body_parameters *sbp_i, *sbp_j;
+    two_body_parameters *twbp;
+    far_neighbor_data *nbr_pj;
+    bond_data *ibond, *jbond;
+    bond_order_data *bo_ij, *bo_ji;
+
+    far_nbrs = lists[FAR_NBRS];
+    bonds = lists[BONDS];
+    hbonds = lists[HBONDS];
+    *num_bonds = 0;
+    *num_hbonds = 0;
+    btop_i = 0;
+    btop_j = 0;
+
+    for ( i = 0; i < system->N; ++i )
+    {
+        type_i = system->atoms[i].type;
+        start_i = Start_Index( i, far_nbrs );
+        end_i = End_Index( i, far_nbrs );
+        btop_i = End_Index( i, bonds );
+        sbp_i = &system->reax_param.sbp[type_i];
+
+        if ( control->hbond_cut > 0.0 )
+        {
+            ihb = sbp_i->p_hbond;
+
+            if ( ihb == H_ATOM )
+            {
+                ihb_top = End_Index( workspace->hbond_index[i], hbonds );
+            }
+            else
+            {
+                ihb_top = -1;
+            }
+        }
+        else
+        {
+            ihb = NON_H_BONDING_ATOM;
+            ihb_top = -1;
+        }
+
+        for ( pj = start_i; pj < end_i; ++pj )
+        {
+            nbr_pj = &far_nbrs->far_nbr_list[pj];
+            j = nbr_pj->nbr;
+
+#if defined(QMMM)
+            if ( system->atoms[i].qmmm_mask == TRUE
+                    || system->atoms[j].qmmm_mask == TRUE )
+            {
+#endif	
+            if ( nbr_pj->d <= control->nonb_cut  )
+            {
+                type_j = system->atoms[j].type;
+                sbp_j = &system->reax_param.sbp[type_j];
+                twbp = &system->reax_param.tbp[type_i][type_j];
+                r_ij = nbr_pj->d;
+
 #if defined(QMMM)
                 if ( system->atoms[i].qmmm_mask == TRUE
                         && system->atoms[j].qmmm_mask == TRUE )
                 {
 #endif
-                /* hydrogen bond lists */
-                if ( control->hbond_cut > 0.0
-                        && (ihb == H_ATOM || ihb == H_BONDING_ATOM)
-                        && nbr_pj->d <= control->hbond_cut )
+                /* Only non-dummy atoms can form bonds */
+                if ( system->atoms[i].is_dummy == FALSE
+                        && system->atoms[j].is_dummy == FALSE )
                 {
-                    jhb = sbp_j->p_hbond;
 
-                    if ( ihb == H_ATOM && jhb == H_BONDING_ATOM )
-                    {
-                        hbonds->hbond_list[ihb_top].nbr = j;
-                        hbonds->hbond_list[ihb_top].scl = 1;
-                        hbonds->hbond_list[ihb_top].ptr = nbr_pj;
-                        ++ihb_top;
-                        ++num_hbonds;
-                    }
-                    else if ( ihb == H_BONDING_ATOM && jhb == H_ATOM )
+                    /* hydrogen bond lists */
+                    if ( control->hbond_cut > 0.0
+                            && (ihb == H_ATOM || ihb == H_BONDING_ATOM)
+                            && nbr_pj->d <= control->hbond_cut )
                     {
-                        jhb_top = End_Index( workspace->hbond_index[j], hbonds );
-                        hbonds->hbond_list[jhb_top].nbr = i;
-                        hbonds->hbond_list[jhb_top].scl = -1;
-                        hbonds->hbond_list[jhb_top].ptr = nbr_pj;
-                        Set_End_Index( workspace->hbond_index[j], jhb_top + 1, hbonds );
-                        ++num_hbonds;
-                    }
-                }
-
-                /* uncorrected bond orders */
-                if ( nbr_pj->d <= control->bond_cut )
-                {
-                    r2 = SQR( r_ij );
+                        jhb = sbp_j->p_hbond;
 
-                    if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 )
-                    {
-                        C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 );
-                        BO_s = (1.0 + control->bo_cut) * EXP( C12 );
-                    }
-                    else
-                    {
-                        C12 = 0.0;
-                        BO_s = 0.0;
+                        if ( ihb == H_ATOM && jhb == H_BONDING_ATOM )
+                        {
+                            hbonds->hbond_list[ihb_top].nbr = j;
+                            hbonds->hbond_list[ihb_top].scl = 1;
+                            hbonds->hbond_list[ihb_top].ptr = nbr_pj;
+                            ++ihb_top;
+                            ++(*num_hbonds);
+                        }
+                        else if ( ihb == H_BONDING_ATOM && jhb == H_ATOM )
+                        {
+                            jhb_top = End_Index( workspace->hbond_index[j], hbonds );
+                            hbonds->hbond_list[jhb_top].nbr = i;
+                            hbonds->hbond_list[jhb_top].scl = -1;
+                            hbonds->hbond_list[jhb_top].ptr = nbr_pj;
+                            Set_End_Index( workspace->hbond_index[j], jhb_top + 1, hbonds );
+                            ++(*num_hbonds);
+                        }
                     }
 
-                    if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 )
-                    {
-                        C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 );
-                        BO_pi = EXP( C34 );
-                    }
-                    else
+                    /* uncorrected bond orders */
+                    if ( nbr_pj->d <= control->bond_cut )
                     {
-                        C34 = 0.0;
-                        BO_pi = 0.0;
-                    }
+                        r2 = SQR( r_ij );
 
-                    if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 )
-                    {
-                        C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 );
-                        BO_pi2 = EXP( C56 );
-                    }
-                    else
-                    {
-                        C56 = 0.0;
-                        BO_pi2 = 0.0;
-                    }
+                        if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 )
+                        {
+                            C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 );
+                            BO_s = (1.0 + control->bo_cut) * EXP( C12 );
+                        }
+                        else
+                        {
+                            C12 = 0.0;
+                            BO_s = 0.0;
+                        }
 
-                    /* Initially BO values are the uncorrected ones, page 1 */
-                    BO = BO_s + BO_pi + BO_pi2;
+                        if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 )
+                        {
+                            C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 );
+                            BO_pi = EXP( C34 );
+                        }
+                        else
+                        {
+                            C34 = 0.0;
+                            BO_pi = 0.0;
+                        }
 
-                    if ( BO >= control->bo_cut )
-                    {
-                        num_bonds += 2;
-                        /****** bonds i-j and j-i ******/
-                        ibond = &bonds->bond_list[btop_i];
-                        btop_j = End_Index( j, bonds );
-                        jbond = &bonds->bond_list[btop_j];
-
-                        ibond->nbr = j;
-                        jbond->nbr = i;
-                        ibond->d = r_ij;
-                        jbond->d = r_ij;
-                        rvec_Copy( ibond->dvec, nbr_pj->dvec );
-                        rvec_Scale( jbond->dvec, -1, nbr_pj->dvec );
-                        ivec_Copy( ibond->rel_box, nbr_pj->rel_box );
-                        ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box );
-                        ibond->dbond_index = btop_i;
-                        jbond->dbond_index = btop_i;
-                        ibond->sym_index = btop_j;
-                        jbond->sym_index = btop_i;
-                        ++btop_i;
-                        Set_End_Index( j, btop_j + 1, bonds );
-
-                        bo_ij = &ibond->bo_data;
-                        bo_ij->BO = BO;
-                        bo_ij->BO_s = BO_s;
-                        bo_ij->BO_pi = BO_pi;
-                        bo_ij->BO_pi2 = BO_pi2;
-                        bo_ji = &jbond->bo_data;
-                        bo_ji->BO = BO;
-                        bo_ji->BO_s = BO_s;
-                        bo_ji->BO_pi = BO_pi;
-                        bo_ji->BO_pi2 = BO_pi2;
-
-                        /* Bond Order page2-3, derivative of total bond order prime */
-                        Cln_BOp_s = twbp->p_bo2 * C12 / r2;
-                        Cln_BOp_pi = twbp->p_bo4 * C34 / r2;
-                        Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2;
-
-                        /* Only dln_BOp_xx wrt. dr_i is stored here, note that
-                           dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */
-                        rvec_Scale( bo_ij->dln_BOp_s, -bo_ij->BO_s * Cln_BOp_s, ibond->dvec );
-                        rvec_Scale( bo_ij->dln_BOp_pi, -bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec );
-                        rvec_Scale( bo_ij->dln_BOp_pi2,
-                                -bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec );
-                        rvec_Scale( bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s );
-                        rvec_Scale( bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi );
-                        rvec_Scale( bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 );
-
-                        /* Only dBOp wrt. dr_i is stored here, note that
-                           dBOp/dr_i = -dBOp/dr_j and all others are 0 */
-                        rvec_Scale( bo_ij->dBOp, -(bo_ij->BO_s * Cln_BOp_s
-                                    + bo_ij->BO_pi * Cln_BOp_pi
-                                    + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec );
-                        rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp );
-
-                        rvec_Add( workspace->dDeltap_self[i], bo_ij->dBOp );
-                        rvec_Add( workspace->dDeltap_self[j], bo_ji->dBOp );
-
-                        bo_ij->BO_s -= control->bo_cut;
-                        bo_ij->BO -= control->bo_cut;
-                        bo_ji->BO_s -= control->bo_cut;
-                        bo_ji->BO -= control->bo_cut;
-                        workspace->total_bond_order[i] += bo_ij->BO;
-                        workspace->total_bond_order[j] += bo_ji->BO;
-                        bo_ij->Cdbo = 0.0;
-                        bo_ij->Cdbopi = 0.0;
-                        bo_ij->Cdbopi2 = 0.0;
-                        bo_ji->Cdbo = 0.0;
-                        bo_ji->Cdbopi = 0.0;
-                        bo_ji->Cdbopi2 = 0.0;
-
-                        Set_End_Index( j, btop_j + 1, bonds );
+                        if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 )
+                        {
+                            C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 );
+                            BO_pi2 = EXP( C56 );
+                        }
+                        else
+                        {
+                            C56 = 0.0;
+                            BO_pi2 = 0.0;
+                        }
+
+                        /* Initially BO values are the uncorrected ones, page 1 */
+                        BO = BO_s + BO_pi + BO_pi2;
+
+                        if ( BO >= control->bo_cut )
+                        {
+                            *num_bonds += 2;
+                            /****** bonds i-j and j-i ******/
+                            ibond = &bonds->bond_list[btop_i];
+                            btop_j = End_Index( j, bonds );
+                            jbond = &bonds->bond_list[btop_j];
+
+                            ibond->nbr = j;
+                            jbond->nbr = i;
+                            ibond->d = r_ij;
+                            jbond->d = r_ij;
+                            rvec_Copy( ibond->dvec, nbr_pj->dvec );
+                            rvec_Scale( jbond->dvec, -1, nbr_pj->dvec );
+                            ivec_Copy( ibond->rel_box, nbr_pj->rel_box );
+                            ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box );
+                            ibond->dbond_index = btop_i;
+                            jbond->dbond_index = btop_i;
+                            ibond->sym_index = btop_j;
+                            jbond->sym_index = btop_i;
+                            ++btop_i;
+                            Set_End_Index( j, btop_j + 1, bonds );
+
+                            bo_ij = &ibond->bo_data;
+                            bo_ij->BO = BO;
+                            bo_ij->BO_s = BO_s;
+                            bo_ij->BO_pi = BO_pi;
+                            bo_ij->BO_pi2 = BO_pi2;
+                            bo_ji = &jbond->bo_data;
+                            bo_ji->BO = BO;
+                            bo_ji->BO_s = BO_s;
+                            bo_ji->BO_pi = BO_pi;
+                            bo_ji->BO_pi2 = BO_pi2;
+
+                            /* Bond Order page2-3, derivative of total bond order prime */
+                            Cln_BOp_s = twbp->p_bo2 * C12 / r2;
+                            Cln_BOp_pi = twbp->p_bo4 * C34 / r2;
+                            Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2;
+
+                            /* Only dln_BOp_xx wrt. dr_i is stored here, note that
+                               dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */
+                            rvec_Scale( bo_ij->dln_BOp_s, -bo_ij->BO_s * Cln_BOp_s, ibond->dvec );
+                            rvec_Scale( bo_ij->dln_BOp_pi, -bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec );
+                            rvec_Scale( bo_ij->dln_BOp_pi2,
+                                    -bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec );
+                            rvec_Scale( bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s );
+                            rvec_Scale( bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi );
+                            rvec_Scale( bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 );
+
+                            /* Only dBOp wrt. dr_i is stored here, note that
+                               dBOp/dr_i = -dBOp/dr_j and all others are 0 */
+                            rvec_Scale( bo_ij->dBOp, -(bo_ij->BO_s * Cln_BOp_s
+                                        + bo_ij->BO_pi * Cln_BOp_pi
+                                        + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec );
+                            rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp );
+
+                            rvec_Add( workspace->dDeltap_self[i], bo_ij->dBOp );
+                            rvec_Add( workspace->dDeltap_self[j], bo_ji->dBOp );
+
+                            bo_ij->BO_s -= control->bo_cut;
+                            bo_ij->BO -= control->bo_cut;
+                            bo_ji->BO_s -= control->bo_cut;
+                            bo_ji->BO -= control->bo_cut;
+                            workspace->total_bond_order[i] += bo_ij->BO;
+                            workspace->total_bond_order[j] += bo_ji->BO;
+                            bo_ij->Cdbo = 0.0;
+                            bo_ij->Cdbopi = 0.0;
+                            bo_ij->Cdbopi2 = 0.0;
+                            bo_ji->Cdbo = 0.0;
+                            bo_ji->Cdbopi = 0.0;
+                            bo_ji->Cdbopi2 = 0.0;
+
+                            Set_End_Index( j, btop_j + 1, bonds );
+                        }
                     }
+                }
 #if defined(QMMM)
                 }
 #endif
-                }
+            }
 #if defined(QMMM)
             }
 #endif
-            }
         }
 
-        /* diagonal entry */
-        H->j[Htop] = i;
-        H->val[Htop] = Init_Charge_Matrix_Entry( system, control,
-                workspace, i, i, r_ij, DIAGONAL );
-        ++Htop;
-
-        H_sp->j[H_sp_top] = i;
-        H_sp->val[H_sp_top] = H->val[Htop - 1];
-        ++H_sp_top;
-
         Set_End_Index( i, btop_i, bonds );
         if ( ihb == H_ATOM )
         {
             Set_End_Index( workspace->hbond_index[i], ihb_top, hbonds );
         }
     }
+}
 
-    Init_Charge_Matrix_Remaining_Entries( system, control, far_nbrs,
-            H, H_sp, &Htop, &H_sp_top );
 
-    H->start[system->N_cm] = Htop;
-    H_sp->start[system->N_cm] = H_sp_top;
+/* Generate bond list (full format), hydrogen bond list (full format),
+ * and charge matrix (half symmetric format)
+ * from the far neighbors list (with distance updates, if necessary)
+ * */
+static void Init_Forces( reax_system *system, control_params *control,
+        simulation_data *data, static_storage *workspace,
+        reax_list **lists, output_controls *out_control )
+{
+    int renbr;
+    int num_bonds, num_hbonds;
+    static int dist_done = FALSE, cm_done = FALSE, bonds_done = FALSE;
+
+    renbr = ((data->step - data->prev_steps) % control->reneighbor) == 0 ? TRUE : FALSE;
+    num_bonds = 0;
+    num_hbonds = 0;
+
+    if ( renbr == FALSE && dist_done == FALSE )
+    {
+        Init_Distance( system, control, lists );
+
+        dist_done = TRUE;
+    }
+
+    if ( cm_done == FALSE )
+    {
+//        if ( workspace->H.format == SYM_HALF_MATRIX )
+//        {
+            Init_CM_Half( system, control, workspace, lists );
+//        }
+//        else
+//        {
+//            Init_CM_Full( system, control, data, workspace, lists, out_control );
+//        }
+    }
+
+    if ( bonds_done == FALSE )
+    {
+//        if ( lists[FAR_NBRS]->format == HALF_LIST )
+//        {
+//            Init_Bond_Half( system, control, workspace, lists, &num_bonds, &num_hbonds );
+//        }
+//        else
+//        {
+            Init_Bond_Full( system, control, workspace, lists, &num_bonds, &num_hbonds );
+//        }
+    }
+
+//    ret = (workspace->realloc.cm == FALSE
+//            && workspace->realloc.bonds == FALSE
+//            && workspace->realloc.hbonds == FALSE
+//            ? SUCCESS : FAILURE);
+//
+//    if ( workspace->realloc.cm == FALSE )
+//    {
+//        cm_done = TRUE;
+//    }
+//    if ( workspace->realloc.bonds == FALSE && workspace->realloc.hbonds == FALSE )
+//    {
+//        bonds_done = TRUE;
+//    }
+//
+//    if ( ret == SUCCESS )
+//    {
+    dist_done = FALSE;
+    cm_done = FALSE;
+    bonds_done = FALSE;
+//    }
 
     /* validate lists - decide if reallocation is required! */
     Validate_Lists( workspace, lists,
-            data->step, system->N, H->m, Htop, num_bonds, num_hbonds );
+            data->step, system->N, workspace->H.m, workspace->H.start[system->N_cm],
+            num_bonds, num_hbonds );
 
 #if defined(TEST_FORCES)
     /* Calculate_dBO requires a sorted bonds list */
@@ -1211,6 +1294,11 @@ static void Init_Forces_Tab( reax_system *system, control_params *control,
             flag = FALSE;
             flag_sp = FALSE;
 
+#if defined(QMMM)
+            if ( system->atoms[i].qmmm_mask == TRUE
+                    || system->atoms[j].qmmm_mask == TRUE )
+            {
+#endif	
             /* check if reneighboring step --
              * atomic distances just computed via
              * Verlet list, so use current distances */
@@ -1297,149 +1385,165 @@ static void Init_Forces_Tab( reax_system *system, control_params *control,
                     }
                 }
 
-                /* hydrogen bond lists */
-                if ( control->hbond_cut > 0.0
-                        && (ihb == H_ATOM || ihb == H_BONDING_ATOM)
-                        && nbr_pj->d <= control->hbond_cut )
+#if defined(QMMM)
+                if ( system->atoms[i].qmmm_mask == TRUE
+                        && system->atoms[j].qmmm_mask == TRUE )
                 {
-                    jhb = sbp_j->p_hbond;
-
-                    if ( ihb == H_ATOM && jhb == H_BONDING_ATOM )
-                    {
-                        hbonds->hbond_list[ihb_top].nbr = j;
-                        hbonds->hbond_list[ihb_top].scl = 1;
-                        hbonds->hbond_list[ihb_top].ptr = nbr_pj;
-                        ++ihb_top;
-                        ++num_hbonds;
-                    }
-                    else if ( ihb == H_BONDING_ATOM && jhb == H_ATOM )
-                    {
-                        jhb_top = End_Index( workspace->hbond_index[j], hbonds );
-                        hbonds->hbond_list[jhb_top].nbr = i;
-                        hbonds->hbond_list[jhb_top].scl = -1;
-                        hbonds->hbond_list[jhb_top].ptr = nbr_pj;
-                        Set_End_Index( workspace->hbond_index[j], jhb_top + 1, hbonds );
-                        ++num_hbonds;
-                    }
-                }
-
-                /* uncorrected bond orders */
-                if ( nbr_pj->d <= control->bond_cut )
+#endif
+                /* Only non-dummy atoms can form bonds */
+                if ( system->atoms[i].is_dummy == FALSE
+                        && system->atoms[j].is_dummy == FALSE )
                 {
-                    r2 = SQR( r_ij );
-
-                    if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 )
-                    {
-                        C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 );
-                        BO_s = (1.0 + control->bo_cut) * EXP( C12 );
-                    }
-                    else
+                    /* hydrogen bond lists */
+                    if ( control->hbond_cut > 0.0
+                            && (ihb == H_ATOM || ihb == H_BONDING_ATOM)
+                            && nbr_pj->d <= control->hbond_cut )
                     {
-                        C12 = 0.0;
-                        BO_s = 0.0;
-                    }
+                        jhb = sbp_j->p_hbond;
 
-                    if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 )
-                    {
-                        C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 );
-                        BO_pi = EXP( C34 );
-                    }
-                    else
-                    {
-                        C34 = 0.0;
-                        BO_pi = 0.0;
+                        if ( ihb == H_ATOM && jhb == H_BONDING_ATOM )
+                        {
+                            hbonds->hbond_list[ihb_top].nbr = j;
+                            hbonds->hbond_list[ihb_top].scl = 1;
+                            hbonds->hbond_list[ihb_top].ptr = nbr_pj;
+                            ++ihb_top;
+                            ++num_hbonds;
+                        }
+                        else if ( ihb == H_BONDING_ATOM && jhb == H_ATOM )
+                        {
+                            jhb_top = End_Index( workspace->hbond_index[j], hbonds );
+                            hbonds->hbond_list[jhb_top].nbr = i;
+                            hbonds->hbond_list[jhb_top].scl = -1;
+                            hbonds->hbond_list[jhb_top].ptr = nbr_pj;
+                            Set_End_Index( workspace->hbond_index[j], jhb_top + 1, hbonds );
+                            ++num_hbonds;
+                        }
                     }
 
-                    if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 )
+                    /* uncorrected bond orders */
+                    if ( nbr_pj->d <= control->bond_cut )
                     {
-                        C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 );
-                        BO_pi2 = EXP( C56 );
-                    }
-                    else
-                    {
-                        C56 = 0.0;
-                        BO_pi2 = 0.0;
-                    }
+                        r2 = SQR( r_ij );
 
-                    /* Initially BO values are the uncorrected ones, page 1 */
-                    BO = BO_s + BO_pi + BO_pi2;
+                        if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 )
+                        {
+                            C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 );
+                            BO_s = (1.0 + control->bo_cut) * EXP( C12 );
+                        }
+                        else
+                        {
+                            C12 = 0.0;
+                            BO_s = 0.0;
+                        }
 
-                    if ( BO >= control->bo_cut )
-                    {
-                        num_bonds += 2;
-                        /****** bonds i-j and j-i ******/
-                        ibond = &bonds->bond_list[btop_i];
-                        btop_j = End_Index( j, bonds );
-                        jbond = &bonds->bond_list[btop_j];
-
-                        ibond->nbr = j;
-                        jbond->nbr = i;
-                        ibond->d = r_ij;
-                        jbond->d = r_ij;
-                        rvec_Copy( ibond->dvec, nbr_pj->dvec );
-                        rvec_Scale( jbond->dvec, -1, nbr_pj->dvec );
-                        ivec_Copy( ibond->rel_box, nbr_pj->rel_box );
-                        ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box );
-                        ibond->dbond_index = btop_i;
-                        jbond->dbond_index = btop_i;
-                        ibond->sym_index = btop_j;
-                        jbond->sym_index = btop_i;
-                        ++btop_i;
-                        Set_End_Index( j, btop_j + 1, bonds );
-
-                        bo_ij = &ibond->bo_data;
-                        bo_ij->BO = BO;
-                        bo_ij->BO_s = BO_s;
-                        bo_ij->BO_pi = BO_pi;
-                        bo_ij->BO_pi2 = BO_pi2;
-                        bo_ji = &jbond->bo_data;
-                        bo_ji->BO = BO;
-                        bo_ji->BO_s = BO_s;
-                        bo_ji->BO_pi = BO_pi;
-                        bo_ji->BO_pi2 = BO_pi2;
-
-                        /* Bond Order page2-3, derivative of total bond order prime */
-                        Cln_BOp_s = twbp->p_bo2 * C12 / r2;
-                        Cln_BOp_pi = twbp->p_bo4 * C34 / r2;
-                        Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2;
-
-                        /* Only dln_BOp_xx wrt. dr_i is stored here, note that
-                           dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */
-                        rvec_Scale( bo_ij->dln_BOp_s, -bo_ij->BO_s * Cln_BOp_s, ibond->dvec );
-                        rvec_Scale( bo_ij->dln_BOp_pi, -bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec );
-                        rvec_Scale( bo_ij->dln_BOp_pi2,
-                                -bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec );
-                        rvec_Scale( bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s );
-                        rvec_Scale( bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi );
-                        rvec_Scale( bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 );
-
-                        /* Only dBOp wrt. dr_i is stored here, note that
-                           dBOp/dr_i = -dBOp/dr_j and all others are 0 */
-                        rvec_Scale( bo_ij->dBOp, -(bo_ij->BO_s * Cln_BOp_s
-                                    + bo_ij->BO_pi * Cln_BOp_pi
-                                    + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec );
-                        rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp );
-
-                        rvec_Add( workspace->dDeltap_self[i], bo_ij->dBOp );
-                        rvec_Add( workspace->dDeltap_self[j], bo_ji->dBOp );
-
-                        bo_ij->BO_s -= control->bo_cut;
-                        bo_ij->BO -= control->bo_cut;
-                        bo_ji->BO_s -= control->bo_cut;
-                        bo_ji->BO -= control->bo_cut;
-                        workspace->total_bond_order[i] += bo_ij->BO;
-                        workspace->total_bond_order[j] += bo_ji->BO;
-                        bo_ij->Cdbo = 0.0;
-                        bo_ij->Cdbopi = 0.0;
-                        bo_ij->Cdbopi2 = 0.0;
-                        bo_ji->Cdbo = 0.0;
-                        bo_ji->Cdbopi = 0.0;
-                        bo_ji->Cdbopi2 = 0.0;
-
-                        Set_End_Index( j, btop_j + 1, bonds );
+                        if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 )
+                        {
+                            C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 );
+                            BO_pi = EXP( C34 );
+                        }
+                        else
+                        {
+                            C34 = 0.0;
+                            BO_pi = 0.0;
+                        }
+
+                        if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 )
+                        {
+                            C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 );
+                            BO_pi2 = EXP( C56 );
+                        }
+                        else
+                        {
+                            C56 = 0.0;
+                            BO_pi2 = 0.0;
+                        }
+
+                        /* Initially BO values are the uncorrected ones, page 1 */
+                        BO = BO_s + BO_pi + BO_pi2;
+
+                        if ( BO >= control->bo_cut )
+                        {
+                            num_bonds += 2;
+                            /****** bonds i-j and j-i ******/
+                            ibond = &bonds->bond_list[btop_i];
+                            btop_j = End_Index( j, bonds );
+                            jbond = &bonds->bond_list[btop_j];
+
+                            ibond->nbr = j;
+                            jbond->nbr = i;
+                            ibond->d = r_ij;
+                            jbond->d = r_ij;
+                            rvec_Copy( ibond->dvec, nbr_pj->dvec );
+                            rvec_Scale( jbond->dvec, -1, nbr_pj->dvec );
+                            ivec_Copy( ibond->rel_box, nbr_pj->rel_box );
+                            ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box );
+                            ibond->dbond_index = btop_i;
+                            jbond->dbond_index = btop_i;
+                            ibond->sym_index = btop_j;
+                            jbond->sym_index = btop_i;
+                            ++btop_i;
+                            Set_End_Index( j, btop_j + 1, bonds );
+
+                            bo_ij = &ibond->bo_data;
+                            bo_ij->BO = BO;
+                            bo_ij->BO_s = BO_s;
+                            bo_ij->BO_pi = BO_pi;
+                            bo_ij->BO_pi2 = BO_pi2;
+                            bo_ji = &jbond->bo_data;
+                            bo_ji->BO = BO;
+                            bo_ji->BO_s = BO_s;
+                            bo_ji->BO_pi = BO_pi;
+                            bo_ji->BO_pi2 = BO_pi2;
+
+                            /* Bond Order page2-3, derivative of total bond order prime */
+                            Cln_BOp_s = twbp->p_bo2 * C12 / r2;
+                            Cln_BOp_pi = twbp->p_bo4 * C34 / r2;
+                            Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2;
+
+                            /* Only dln_BOp_xx wrt. dr_i is stored here, note that
+                               dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */
+                            rvec_Scale( bo_ij->dln_BOp_s, -bo_ij->BO_s * Cln_BOp_s, ibond->dvec );
+                            rvec_Scale( bo_ij->dln_BOp_pi, -bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec );
+                            rvec_Scale( bo_ij->dln_BOp_pi2,
+                                    -bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec );
+                            rvec_Scale( bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s );
+                            rvec_Scale( bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi );
+                            rvec_Scale( bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 );
+
+                            /* Only dBOp wrt. dr_i is stored here, note that
+                               dBOp/dr_i = -dBOp/dr_j and all others are 0 */
+                            rvec_Scale( bo_ij->dBOp, -(bo_ij->BO_s * Cln_BOp_s
+                                        + bo_ij->BO_pi * Cln_BOp_pi
+                                        + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec );
+                            rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp );
+
+                            rvec_Add( workspace->dDeltap_self[i], bo_ij->dBOp );
+                            rvec_Add( workspace->dDeltap_self[j], bo_ji->dBOp );
+
+                            bo_ij->BO_s -= control->bo_cut;
+                            bo_ij->BO -= control->bo_cut;
+                            bo_ji->BO_s -= control->bo_cut;
+                            bo_ji->BO -= control->bo_cut;
+                            workspace->total_bond_order[i] += bo_ij->BO;
+                            workspace->total_bond_order[j] += bo_ji->BO;
+                            bo_ij->Cdbo = 0.0;
+                            bo_ij->Cdbopi = 0.0;
+                            bo_ij->Cdbopi2 = 0.0;
+                            bo_ji->Cdbo = 0.0;
+                            bo_ji->Cdbopi = 0.0;
+                            bo_ji->Cdbopi2 = 0.0;
+
+                            Set_End_Index( j, btop_j + 1, bonds );
+                        }
                     }
                 }
+#if defined(QMMM)
+                }
+#endif
             }
+#if defined(QMMM)
+            }
+#endif
         }
 
         /* diagonal entry */
@@ -1588,8 +1692,38 @@ void Estimate_Storage_Sizes( reax_system *system, control_params *control,
         }
     }
 
-    *Htop += system->N;
+    switch ( control->charge_method )
+    {
+        case QEQ_CM:
+            break;
+
+        case EE_CM:
+            if ( system->num_molec_charge_constraints == 0 )
+            {
+                *Htop += system->N_cm;
+            }
+            else
+            {
+                for ( i = 0; i < system->num_molec_charge_constraints; ++i )
+                {
+                    *Htop += system->molec_charge_constraint_ranges[2 * i + 1]
+                        - system->molec_charge_constraint_ranges[2 * i] + 1;
+                }
+            }
+            break;
+
+        case ACKS2_CM:
+            *Htop = 2 * *Htop + 3 * system->N + 2;
+            break;
+
+        default:
+            fprintf( stderr, "[ERROR] Unknown charge method type. Terminating...\n" );
+            exit( INVALID_INPUT );
+            break;
+    }
+
     *Htop *= SAFE_ZONE;
+
     for ( i = 0; i < system->N; ++i )
     {
         hb_top[i] = MAX( hb_top[i] * SAFE_HBONDS, MIN_HBONDS );
diff --git a/sPuReMD/src/geo_tools.c b/sPuReMD/src/geo_tools.c
index 32e0fe573c97062b9b89904fd4b2f37f2ce6c132..dfd2c5b4ed85310298e76589af090d429fd0e7a9 100644
--- a/sPuReMD/src/geo_tools.c
+++ b/sPuReMD/src/geo_tools.c
@@ -284,6 +284,14 @@ static int Count_Atoms( reax_system *system, FILE *fp, int geo_format )
 }
 
 
+/* Parser for geometry file in free-form custom PuReMD format
+ *
+ * geo_file: filename for custom geometry file to parse
+ * system: struct containing atom-related information
+ * control: struct containing simulation parameters
+ * data: struct containing information on active simulations
+ * workspace: struct containing intermediate structures used for calculations
+ */
 void Read_Geo( const char * const geo_file, reax_system* system, control_params *control,
         simulation_data *data, static_storage *workspace )
 {
@@ -293,7 +301,7 @@ void Read_Geo( const char * const geo_file, reax_system* system, control_params
     char element[3], name[9];
     reax_atom *atom;
 
-    geo = sfopen( geo_file, "r" );
+    geo = sfopen( geo_file, "r", __FILE__, __LINE__ );
 
     if ( Read_Box_Info( system, geo, CUSTOM ) == FAILURE )
     {
@@ -339,14 +347,32 @@ void Read_Geo( const char * const geo_file, reax_system* system, control_params
         rvec_MakeZero( atom->v );
         rvec_MakeZero( atom->f );
         atom->q = 0.0;
+            
+        /* check for dummy atom */
+        if ( strncmp( element, "X\0", 2 ) == 0 )
+        {
+           atom->is_dummy = TRUE;
+        }
+        else
+        {
+            atom->is_dummy = FALSE;            
+        }		
 
         top++;
     }
 
-    sfclose( geo, "Read_Geo::geo" );
+    sfclose( geo, __FILE__, __LINE__ );
 }
 
 
+/* Parser for geometry file in fixed-form PDB format
+ *
+ * pdb_file: filename for PDB geometry file to parse
+ * system: struct containing atom-related information
+ * control: struct containing simulation parameters
+ * data: struct containing information on active simulations
+ * workspace: struct containing intermediate structures used for calculations
+ */
 void Read_PDB( const char * const pdb_file, reax_system* system, control_params *control,
         simulation_data *data, static_storage *workspace )
 {
@@ -363,7 +389,7 @@ void Read_PDB( const char * const pdb_file, reax_system* system, control_params
     rvec x;
     reax_atom *atom;
 
-    pdb = sfopen( pdb_file, "r" );
+    pdb = sfopen( pdb_file, "r", __FILE__, __LINE__ );
 
     Allocate_Tokenizer_Space( &s, MAX_LINE, &s1, MAX_LINE,
             &tmp, MAX_TOKENS, MAX_TOKEN_LEN );
@@ -478,28 +504,38 @@ void Read_PDB( const char * const pdb_file, reax_system* system, control_params
                 pdb_serial = (int) sstrtod( serial, __FILE__, __LINE__ );
                 workspace->orig_id[top] = pdb_serial;
 
+                strncpy( atom->name, atom_name, sizeof(atom->name) - 1 );
+                atom->name[sizeof(atom->name) - 1] = '\0';
                 Trim_Spaces( element, sizeof(element) );
                 for ( i = 0; i < sizeof(element) - 1; ++i )
                 {
                     element[i] = toupper( element[i] );
                 }
                 atom->type = Get_Atom_Type( &system->reax_param, element, sizeof(element) );
-                strncpy( atom->name, atom_name, sizeof(atom->name) - 1 );
-                atom->name[sizeof(atom->name) - 1] = '\0';
+            
+                /* check for dummy atom */
+                if ( strncmp( element, "X\0", 2 ) == 0 )
+                {
+                    system->atoms[top].is_dummy = TRUE;
+                }
+                else
+                {
+                    system->atoms[top].is_dummy = FALSE;            
+                }		
 
                 rvec_Copy( atom->x, x );
                 rvec_MakeZero( atom->v );
                 rvec_MakeZero( atom->f );
                 atom->q = 0;
 
-                top++;
-
 #if defined(DEBUG_FOCUS)
                 fprintf( stderr, "[INFO] atom: id = %d, name = %s, serial = %d, type = %d, ",
                         top, atom->name, pdb_serial, atom->type );
                 fprintf( stderr, "x = (%7.3f, %7.3f, %7.3f)\n",
                         atom->x[0], atom->x[1], atom->x[2] );
 #endif
+
+                top++;
             }
 
             c++;
@@ -515,8 +551,7 @@ void Read_PDB( const char * const pdb_file, reax_system* system, control_params
             if ( control->restrict_bonds )
             {
                 /* error check */
-//                Check_Input_Range( c1 - 2, 0, MAX_RESTRICT,
-//                        "CONECT line exceeds max num restrictions allowed.\n" );
+//                Check_Input_Range( c1 - 2, 0, MAX_RESTRICT, __FILE__, __LINE__ );
 
                 /* read bond restrictions */
                 // pdb_serial = sstrtol( tmp[1], __FILE__, __LINE__ );
@@ -553,7 +588,7 @@ void Read_PDB( const char * const pdb_file, reax_system* system, control_params
         exit( INVALID_INPUT );
     }
 
-    sfclose( pdb, "Read_PDB::pdb" );
+    sfclose( pdb, __FILE__, __LINE__ );
 
     Deallocate_Tokenizer_Space( &s, &s1, &tmp, MAX_TOKENS );
 } 
@@ -595,7 +630,7 @@ void Write_PDB( reax_system* system, reax_list* bonds, simulation_data *data,
                 sizeof(control->sim_name) ),
             control->sim_name, snprintf(NULL, 0, "%d", data->step), data->step );
     fname[sizeof(fname) - 1] = '\0';
-    pdb = sfopen( fname, "w" );
+    pdb = sfopen( fname, "w", __FILE__, __LINE__ );
     fprintf( pdb, PDB_CRYST1_FORMAT_O,
              "CRYST1",
              system->box.box_norms[0], system->box.box_norms[1],
@@ -626,12 +661,13 @@ void Write_PDB( reax_system* system, reax_list* bonds, simulation_data *data,
         exit( INVALID_INPUT );
     }
 
-    sfclose( pdb, "Write_PDB::pdb" );
+    sfclose( pdb, __FILE__, __LINE__ );
 }
 
 
 /* Parser for geometry files in BGF format
  *
+ * bgf_file: filename for BGF file to parse
  * system: struct containing atom-related information
  * control: struct containing simulation parameters
  * data: struct containing information on active simulations
@@ -656,7 +692,7 @@ void Read_BGF( const char * const bgf_file, reax_system* system, control_params
     ratom = 0;
     crystx_found = FALSE;
 
-    bgf = sfopen( bgf_file, "r" );
+    bgf = sfopen( bgf_file, "r", __FILE__, __LINE__ );
 
     Allocate_Tokenizer_Space( &line, MAX_LINE, &backup, MAX_LINE,
             &tokens, MAX_TOKENS, MAX_TOKEN_LEN );
@@ -696,9 +732,9 @@ void Read_BGF( const char * const bgf_file, reax_system* system, control_params
         if ( system->prealloc_allocated == FALSE && num_mcc > 0 )
         {
             system->molec_charge_constraints = smalloc(
-                    sizeof(real) * num_mcc, "Read_BGF::molec_charge_constraints" );
+                    sizeof(real) * num_mcc, __FILE__, __LINE__ );
             system->molec_charge_constraint_ranges = smalloc(
-                    sizeof(int) * 2 * num_mcc, "Read_BGF::molec_charge_constraint_ranges" );
+                    sizeof(int) * 2 * num_mcc, __FILE__, __LINE__ );
 
             system->max_num_molec_charge_constraints = num_mcc;
         }
@@ -706,14 +742,14 @@ void Read_BGF( const char * const bgf_file, reax_system* system, control_params
         {
             if ( system->max_num_molec_charge_constraints > 0 )
             {
-                sfree( system->molec_charge_constraints, "Read_BGF::molec_charge_constraints" );
-                sfree( system->molec_charge_constraint_ranges, "Read_BGF::molec_charge_constraint_ranges" );
+                sfree( system->molec_charge_constraints, __FILE__, __LINE__ );
+                sfree( system->molec_charge_constraint_ranges, __FILE__, __LINE__ );
             }
 
             system->molec_charge_constraints = smalloc(
-                    sizeof(real) * num_mcc, "Read_BGF::molec_charge_constraints" );
+                    sizeof(real) * num_mcc, __FILE__, __LINE__ );
             system->molec_charge_constraint_ranges = smalloc(
-                    sizeof(int) * 2 * num_mcc, "Read_BGF::molec_charge_constraint_ranges" );
+                    sizeof(int) * 2 * num_mcc, __FILE__, __LINE__ );
 
             system->max_num_molec_charge_constraints = num_mcc;
         }
@@ -819,7 +855,7 @@ void Read_BGF( const char * const bgf_file, reax_system* system, control_params
 
             /* add to mapping */
             bgf_serial = sstrtod( serial, __FILE__, __LINE__ );
-            Check_Input_Range( bgf_serial, 0, MAX_ATOM_ID, "Invalid bgf serial" );
+            Check_Input_Range( bgf_serial, 0, MAX_ATOM_ID, __FILE__, __LINE__ );
             workspace->map_serials[ bgf_serial ] = atom_cnt;
             workspace->orig_id[ atom_cnt ] = bgf_serial;
 
@@ -845,6 +881,16 @@ void Read_BGF( const char * const bgf_file, reax_system* system, control_params
             }
             system->atoms[atom_cnt].type =
                 Get_Atom_Type( &system->reax_param, element, sizeof(element) );
+            
+            /* check for dummy atom */
+            if ( strncmp( element, "X\0", 2 ) == 0 )
+            {
+                system->atoms[atom_cnt].is_dummy = TRUE;
+            }
+            else
+            {
+                system->atoms[atom_cnt].is_dummy = FALSE;            
+            }		
 
 #if defined(DEBUG_FOCUS)
             fprintf( stderr,
@@ -863,8 +909,7 @@ void Read_BGF( const char * const bgf_file, reax_system* system, control_params
             if ( control->restrict_bonds )
             {
                 /* check number of restrictions */
-                Check_Input_Range( token_cnt - 2, 0, MAX_RESTRICT,
-                        "CONECT line exceeds max restrictions allowed.\n" );
+                Check_Input_Range( token_cnt - 2, 0, MAX_RESTRICT, __FILE__, __LINE__ );
 
                 /* read bond restrictions */
                 bgf_serial = sstrtol( tokens[1], __FILE__, __LINE__ );
@@ -920,5 +965,5 @@ void Read_BGF( const char * const bgf_file, reax_system* system, control_params
 
     Deallocate_Tokenizer_Space( &line, &backup, &tokens, MAX_TOKENS );
 
-    sfclose( bgf, "Read_BGF::bgf" );
+    sfclose( bgf, __FILE__, __LINE__ );
 }
diff --git a/sPuReMD/src/grid.c b/sPuReMD/src/grid.c
index f5bfde6686432c6d760dea90b34893b6fbe913b6..3c6eb47bdfe8348e4d821364254125e8d82df2d1 100644
--- a/sPuReMD/src/grid.c
+++ b/sPuReMD/src/grid.c
@@ -89,101 +89,101 @@ static void Allocate_Space_for_Grid( reax_system * const system, int alloc )
     if ( alloc == TRUE )
     {
         /* allocate space for the new grid */
-        g->atoms = (int****) scalloc( g->ncell_max[0], sizeof( int*** ),
-                "Allocate_Space_for_Grid::g->atoms" );
+        g->atoms = scalloc( g->ncell_max[0], sizeof( int*** ),
+                __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
-            g->atoms[i] = (int***) scalloc( g->ncell_max[1], sizeof( int** ),
-                    "Allocate_Space_for_Grid::g->atoms[i]" );
+            g->atoms[i] = scalloc( g->ncell_max[1], sizeof( int** ),
+                    __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
-                g->atoms[i][j] = (int**) scalloc( g->ncell_max[2], sizeof( int* ),
-                        "Allocate_Space_for_Grid::g->atoms[i][j]" );
+                g->atoms[i][j] = scalloc( g->ncell_max[2], sizeof( int* ),
+                        __FILE__, __LINE__ );
 
-        g->top = (int***) scalloc( g->ncell_max[0], sizeof( int** ),
-                "Allocate_Space_for_Grid::g->top" );
+        g->top = scalloc( g->ncell_max[0], sizeof( int** ),
+                __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
-            g->top[i] = (int**) scalloc( g->ncell_max[1], sizeof( int* ),
-                    "Allocate_Space_for_Grid::g->top[i]" );
+            g->top[i] = scalloc( g->ncell_max[1], sizeof( int* ),
+                    __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
-                g->top[i][j] = (int*) scalloc( g->ncell_max[2], sizeof( int ),
-                        "Allocate_Space_for_Grid::g->top[i][j]" );
+                g->top[i][j] = scalloc( g->ncell_max[2], sizeof( int ),
+                        __FILE__, __LINE__ );
 
-        g->mark = (int***) scalloc( g->ncell_max[0], sizeof( int** ),
-                "Allocate_Space_for_Grid::g->mark" );
+        g->mark = scalloc( g->ncell_max[0], sizeof( int** ),
+                __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
-            g->mark[i] = (int**) scalloc( g->ncell_max[1], sizeof( int* ),
-                    "Allocate_Space_for_Grid::g->mark[i]" );
+            g->mark[i] = scalloc( g->ncell_max[1], sizeof( int* ),
+                    __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
-                g->mark[i][j] = (int*) scalloc( g->ncell_max[2], sizeof( int ),
-                        "Allocate_Space_for_Grid::g->mark[i][j]" );
+                g->mark[i][j] = scalloc( g->ncell_max[2], sizeof( int ),
+                        __FILE__, __LINE__ );
 
-        g->start = (int***) scalloc( g->ncell_max[0], sizeof( int** ),
-                "Allocate_Space_for_Grid::g->start" );
+        g->start = scalloc( g->ncell_max[0], sizeof( int** ),
+                __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
-            g->start[i] = (int**) scalloc( g->ncell_max[1], sizeof( int* ),
-                    "Allocate_Space_for_Grid::g->start[i]" );
+            g->start[i] = scalloc( g->ncell_max[1], sizeof( int* ),
+                    __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
-                g->start[i][j] = (int*) scalloc( g->ncell_max[2], sizeof( int ),
-                        "Allocate_Space_for_Grid::g->start[i][j]" );
+                g->start[i][j] = scalloc( g->ncell_max[2], sizeof( int ),
+                        __FILE__, __LINE__ );
 
-        g->end = (int***) scalloc( g->ncell_max[0], sizeof( int** ),
-                "Allocate_Space_for_Grid::g->end" );
+        g->end = scalloc( g->ncell_max[0], sizeof( int** ),
+                __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
-            g->end[i] = (int**) scalloc( g->ncell_max[1], sizeof( int* ),
-                    "Allocate_Space_for_Grid::g->end[i]" );
+            g->end[i] = scalloc( g->ncell_max[1], sizeof( int* ),
+                    __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
-                g->end[i][j] = (int*) scalloc( g->ncell_max[2], sizeof( int ),
-                        "Allocate_Space_for_Grid::g->end[i][j]" );
+                g->end[i][j] = scalloc( g->ncell_max[2], sizeof( int ),
+                        __FILE__, __LINE__ );
 
-        g->nbrs = (ivec****) scalloc( g->ncell_max[0], sizeof( ivec*** ),
-                "Allocate_Space_for_Grid::g->nbrs" );
+        g->nbrs = scalloc( g->ncell_max[0], sizeof( ivec*** ),
+                __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
-            g->nbrs[i] = (ivec***) scalloc( g->ncell_max[1], sizeof( ivec** ),
-                    "Allocate_Space_for_Grid::g->nbrs[i]" );
+            g->nbrs[i] = scalloc( g->ncell_max[1], sizeof( ivec** ),
+                    __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
-                g->nbrs[i][j] = (ivec**) scalloc( g->ncell_max[2], sizeof( ivec* ),
-                        "Allocate_Space_for_Grid::g->nbrs[i][j]" );
+                g->nbrs[i][j] = scalloc( g->ncell_max[2], sizeof( ivec* ),
+                        __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
                 for ( k = 0; k < g->ncell_max[2]; k++ )
-                    g->nbrs[i][j][k] = (ivec*) smalloc( g->max_nbrs * sizeof( ivec ),
-                           "Allocate_Space_for_Grid::g->nbrs[i][j][k]" );
+                    g->nbrs[i][j][k] = smalloc( g->max_nbrs * sizeof( ivec ),
+                           __FILE__, __LINE__ );
 
-        g->nbrs_cp = (rvec****) scalloc( g->ncell_max[0], sizeof( rvec*** ),
-                "Allocate_Space_for_Grid::g->nbrs_cp" );
+        g->nbrs_cp = scalloc( g->ncell_max[0], sizeof( rvec*** ),
+                __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
-            g->nbrs_cp[i] = (rvec***) scalloc( g->ncell_max[1], sizeof( rvec** ),
-                    "Allocate_Space_for_Grid::g->nbrs_cp[i]" );
+            g->nbrs_cp[i] = scalloc( g->ncell_max[1], sizeof( rvec** ),
+                    __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
-                g->nbrs_cp[i][j] = (rvec**) scalloc( g->ncell_max[2], sizeof( rvec* ),
-                        "Allocate_Space_for_Grid::g->nbrs_cp[i][j]" );
+                g->nbrs_cp[i][j] = scalloc( g->ncell_max[2], sizeof( rvec* ),
+                        __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
                 for ( k = 0; k < g->ncell_max[2]; k++ )
-                    g->nbrs_cp[i][j][k] = (rvec*) smalloc( g->max_nbrs * sizeof( rvec ),
-                           "Allocate_Space_for_Grid::g->nbrs_cp[i][j][k]" );
+                    g->nbrs_cp[i][j][k] = smalloc( g->max_nbrs * sizeof( rvec ),
+                           __FILE__, __LINE__ );
     }
 
     for ( i = 0; i < g->ncell[0]; i++ )
@@ -235,8 +235,8 @@ static void Allocate_Space_for_Grid( reax_system * const system, int alloc )
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
                 for ( k = 0; k < g->ncell_max[2]; k++ )
-                    g->atoms[i][j][k] = (int*) smalloc( g->max_atoms * sizeof( int ),
-                           "Allocate_Space_for_Grid::g->atoms[i][j][k]" );
+                    g->atoms[i][j][k] = smalloc( g->max_atoms * sizeof( int ),
+                           __FILE__, __LINE__ );
     }
     /* case: grid large enough but max. atoms per grid cells insufficient */
     else if ( g->max_atoms > 0 && g->max_atoms < max_atoms )
@@ -246,13 +246,13 @@ static void Allocate_Space_for_Grid( reax_system * const system, int alloc )
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
                 for ( k = 0; k < g->ncell_max[2]; k++ )
-                    sfree( g->atoms[i][j][k], "Allocate_Space_for_Grid::g->atoms[i][j][k]" );
+                    sfree( g->atoms[i][j][k], __FILE__, __LINE__ );
 
         for ( i = 0; i < g->ncell_max[0]; i++ )
             for ( j = 0; j < g->ncell_max[1]; j++ )
                 for ( k = 0; k < g->ncell_max[2]; k++ )
-                    g->atoms[i][j][k] = (int*) smalloc( g->max_atoms * sizeof( int ),
-                           "Allocate_Space_for_Grid::g->atoms[i][j][k]" );
+                    g->atoms[i][j][k] = smalloc( g->max_atoms * sizeof( int ),
+                           __FILE__, __LINE__ );
     }
 
     for ( i = 0; i < g->ncell[0]; i++ )
@@ -274,41 +274,41 @@ static void Deallocate_Grid_Space( grid * const g )
         for ( j = 0; j < g->ncell_max[1]; j++ )
             for ( k = 0; k < g->ncell_max[2]; k++ )
             {
-                sfree( g->atoms[i][j][k], "Deallocate_Grid_Space::g->atoms[i][j][k]" );
-                sfree( g->nbrs[i][j][k], "Deallocate_Grid_Space::g->nbrs[i][j][k]" );
-                sfree( g->nbrs_cp[i][j][k], "Deallocate_Grid_Space::g->nbrs_cp[i][j][k]" );
+                sfree( g->atoms[i][j][k], __FILE__, __LINE__ );
+                sfree( g->nbrs[i][j][k], __FILE__, __LINE__ );
+                sfree( g->nbrs_cp[i][j][k], __FILE__, __LINE__ );
             }
 
     for ( i = 0; i < g->ncell_max[0]; i++ )
         for ( j = 0; j < g->ncell_max[1]; j++ )
         {
-            sfree( g->atoms[i][j], "Deallocate_Grid_Space::g->atoms[i][j]" );
-            sfree( g->top[i][j], "Deallocate_Grid_Space::g->top[i][j]" );
-            sfree( g->mark[i][j], "Deallocate_Grid_Space::g->mark[i][j]" );
-            sfree( g->start[i][j], "Deallocate_Grid_Space::g->start[i][j]" );
-            sfree( g->end[i][j], "Deallocate_Grid_Space::g->end[i][j]" );
-            sfree( g->nbrs[i][j], "Deallocate_Grid_Space::g->nbrs[i][j]" );
-            sfree( g->nbrs_cp[i][j], "Deallocate_Grid_Space::g->nbrs_cp[i][j]" );
+            sfree( g->atoms[i][j], __FILE__, __LINE__ );
+            sfree( g->top[i][j], __FILE__, __LINE__ );
+            sfree( g->mark[i][j], __FILE__, __LINE__ );
+            sfree( g->start[i][j], __FILE__, __LINE__ );
+            sfree( g->end[i][j], __FILE__, __LINE__ );
+            sfree( g->nbrs[i][j], __FILE__, __LINE__ );
+            sfree( g->nbrs_cp[i][j], __FILE__, __LINE__ );
         }
 
     for ( i = 0; i < g->ncell_max[0]; i++ )
     {
-        sfree( g->atoms[i], "Deallocate_Grid_Space::g->atoms[i]" );
-        sfree( g->top[i], "Deallocate_Grid_Space::g->top[i]" );
-        sfree( g->mark[i], "Deallocate_Grid_Space::g->mark[i]" );
-        sfree( g->start[i], "Deallocate_Grid_Space::g->start[i]" );
-        sfree( g->end[i], "Deallocate_Grid_Space::g->end[i]" );
-        sfree( g->nbrs[i], "Deallocate_Grid_Space::g->nbrs[i]" );
-        sfree( g->nbrs_cp[i], "Deallocate_Grid_Space::g->nbrs_cp[i]" );
+        sfree( g->atoms[i], __FILE__, __LINE__ );
+        sfree( g->top[i], __FILE__, __LINE__ );
+        sfree( g->mark[i], __FILE__, __LINE__ );
+        sfree( g->start[i], __FILE__, __LINE__ );
+        sfree( g->end[i], __FILE__, __LINE__ );
+        sfree( g->nbrs[i], __FILE__, __LINE__ );
+        sfree( g->nbrs_cp[i], __FILE__, __LINE__ );
     }
 
-    sfree( g->atoms, "Deallocate_Grid_Space::g->atoms" );
-    sfree( g->top, "Deallocate_Grid_Space::g->top" );
-    sfree( g->mark, "Deallocate_Grid_Space::g->mark" );
-    sfree( g->start, "Deallocate_Grid_Space::g->start" );
-    sfree( g->end, "Deallocate_Grid_Space::g->end" );
-    sfree( g->nbrs, "Deallocate_Grid_Space::g->nbrs" );
-    sfree( g->nbrs_cp, "Deallocate_Grid_Space::g->nbrs_cp" );
+    sfree( g->atoms, __FILE__, __LINE__ );
+    sfree( g->top, __FILE__, __LINE__ );
+    sfree( g->mark, __FILE__, __LINE__ );
+    sfree( g->start, __FILE__, __LINE__ );
+    sfree( g->end, __FILE__, __LINE__ );
+    sfree( g->nbrs, __FILE__, __LINE__ );
+    sfree( g->nbrs_cp, __FILE__, __LINE__ );
 }
 
 
@@ -750,19 +750,19 @@ static void Free_Storage( static_storage * const workspace,
 
     for ( i = 0; i < control->cm_solver_restart + 1; ++i )
     {
-        sfree( workspace->v[i], "Free_Storage::workspace->v[i]" );
+        sfree( workspace->v[i], __FILE__, __LINE__ );
     }
-    sfree( workspace->v, "Free_Storage::workspace->v" );
+    sfree( workspace->v, __FILE__, __LINE__ );
 
     for ( i = 0; i < 3; ++i )
     {
-        sfree( workspace->s[i], "Free_Storage::workspace->s[i]" );
-        sfree( workspace->t[i], "Free_Storage::workspace->t[i]" );
+        sfree( workspace->s[i], __FILE__, __LINE__ );
+        sfree( workspace->t[i], __FILE__, __LINE__ );
     }
-    sfree( workspace->s, "Free_Storage::workspace->s" );
-    sfree( workspace->t, "Free_Storage::workspace->t" );
+    sfree( workspace->s, __FILE__, __LINE__ );
+    sfree( workspace->t, __FILE__, __LINE__ );
 
-    sfree( workspace->orig_id, "Free_Storage::workspace->orig_id" );
+    sfree( workspace->orig_id, __FILE__, __LINE__ );
 }
 
 
@@ -793,23 +793,23 @@ void Reorder_Atoms( reax_system * const system, static_storage * const workspace
     top = 0;
     g = &system->g;
 
-    new_atoms = scalloc( system->N, sizeof(reax_atom), "Reorder_Atoms::new_atoms" );
-    orig_id = scalloc( system->N, sizeof(int), "Reorder_Atoms::orig_id" );
-    f_old = scalloc( system->N, sizeof(rvec), "Reorder_Atoms::f_old" );
+    new_atoms = scalloc( system->N, sizeof(reax_atom), __FILE__, __LINE__ );
+    orig_id = scalloc( system->N, sizeof(int), __FILE__, __LINE__ );
+    f_old = scalloc( system->N, sizeof(rvec), __FILE__, __LINE__ );
 
-    s = scalloc( 5, sizeof(real *), "Reorder_Atoms::s" );
-    t = scalloc( 5, sizeof(real *), "Reorder_Atoms::t" );
+    s = scalloc( 5, sizeof(real *), __FILE__, __LINE__ );
+    t = scalloc( 5, sizeof(real *), __FILE__, __LINE__ );
     for ( i = 0; i < 5; ++i )
     {
-        s[i] = scalloc( system->N_cm, sizeof(real), "Reorder_Atoms::s[i]" );
-        t[i] = scalloc( system->N_cm, sizeof(real), "Reorder_Atoms::t[i]" );
+        s[i] = scalloc( system->N_cm, sizeof(real), __FILE__, __LINE__ );
+        t[i] = scalloc( system->N_cm, sizeof(real), __FILE__, __LINE__ );
     }
 
     v = scalloc( control->cm_solver_restart + 1, sizeof(real *),
-            "Reorder_Atoms::v" );
+            __FILE__, __LINE__ );
     for ( i = 0; i < control->cm_solver_restart + 1; ++i )
     {
-        v[i] = scalloc( system->N_cm, sizeof(real), "Reorder_Atoms::v[i]" );
+        v[i] = scalloc( system->N_cm, sizeof(real), __FILE__, __LINE__ );
     }
 
     for ( i = 0; i < g->ncell[0]; i++ )
@@ -837,7 +837,7 @@ void Reorder_Atoms( reax_system * const system, static_storage * const workspace
         }
     }
 
-    sfree( system->atoms, "Reorder_Atoms::system->atoms" );
+    sfree( system->atoms, __FILE__, __LINE__ );
     Free_Storage( workspace, control );
 
     system->atoms = new_atoms;
diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c
index c97368cb6e67a068bbf09184ce6ab26d0f444be0..7b43cb96dd53e2a986e4ae69f0e42591c49a0295 100644
--- a/sPuReMD/src/init_md.c
+++ b/sPuReMD/src/init_md.c
@@ -167,7 +167,7 @@ static void Init_Simulation_Data( reax_system *system, control_params *control,
             || control->ensemble == aNPT || control->compute_pressure == TRUE) )
     {
         data->press_local = smalloc( sizeof( rtensor ) * control->num_threads,
-               "Init_Simulation_Data::data->press_local" );
+               __FILE__, __LINE__ );
     }
 #endif
 
@@ -331,42 +331,42 @@ static void Init_Workspace( reax_system *system, control_params *control,
     {
         /* hydrogen bond list */
         workspace->hbond_index = smalloc( system->N_max * sizeof( int ),
-               "Init_Workspace::workspace->hbond_index" );
+               __FILE__, __LINE__ );
 
         /* bond order related storage  */
         workspace->total_bond_order = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->bond_order" );
+               __FILE__, __LINE__ );
         workspace->Deltap = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->Deltap" );
+               __FILE__, __LINE__ );
         workspace->Deltap_boc = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->Deltap_boc" );
+               __FILE__, __LINE__ );
         workspace->dDeltap_self = smalloc( system->N_max * sizeof( rvec ),
-               "Init_Workspace::workspace->dDeltap_self" );
+               __FILE__, __LINE__ );
 
         workspace->Delta = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->Delta" );
+               __FILE__, __LINE__ );
         workspace->Delta_lp = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->Delta_lp" );
+               __FILE__, __LINE__ );
         workspace->Delta_lp_temp = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->Delta_lp_temp" );
+               __FILE__, __LINE__ );
         workspace->dDelta_lp = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->dDelta_lp" );
+               __FILE__, __LINE__ );
         workspace->dDelta_lp_temp = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->dDelta_lp_temp" );
+               __FILE__, __LINE__ );
         workspace->Delta_e = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->Delta_e" );
+               __FILE__, __LINE__ );
         workspace->Delta_boc = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->Delta_boc" );
+               __FILE__, __LINE__ );
         workspace->nlp = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->nlp" );
+               __FILE__, __LINE__ );
         workspace->nlp_temp = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->nlp_temp" );
+               __FILE__, __LINE__ );
         workspace->Clp = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->Clp" );
+               __FILE__, __LINE__ );
         workspace->CdDelta = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->CdDelta" );
+               __FILE__, __LINE__ );
         workspace->vlpex = smalloc( system->N_max * sizeof( real ),
-               "Init_Workspace::workspace->vlpex" );
+               __FILE__, __LINE__ );
     }
 
     /* charge method storage */
@@ -374,23 +374,26 @@ static void Init_Workspace( reax_system *system, control_params *control,
     {
         case QEQ_CM:
             system->N_cm = system->N;
-            system->N_cm_max = system->N_max;
-            break;
-        case EE_CM:
-            if ( system->num_molec_charge_constraints == 0 )
+            if ( realloc == TRUE || system->N_cm > system->N_cm_max )
             {
-                system->N_cm = system->N + 1;
-                system->N_cm_max = system->N_max + 1;
+                system->N_cm_max = system->N_max;
             }
-            else
+            break;
+        case EE_CM:
+            system->N_cm = system->N
+                + (system->num_molec_charge_constraints == 0 ? 1 : system->num_molec_charge_constraints);
+            if ( realloc == TRUE || system->N_cm > system->N_cm_max )
             {
-                system->N_cm = system->N + system->num_molec_charge_constraints;
-                system->N_cm_max = system->N_max + system->num_molec_charge_constraints;
+                system->N_cm_max = system->N_max
+                    + (system->num_molec_charge_constraints == 0 ? 1 : system->num_molec_charge_constraints);
             }
             break;
         case ACKS2_CM:
             system->N_cm = 2 * system->N + 2;
-            system->N_cm_max = 2 * system->N_max + 2;
+            if ( realloc == TRUE || system->N_cm > system->N_cm_max )
+            {
+                system->N_cm_max = 2 * system->N_max + 2;
+            }
             break;
         default:
             fprintf( stderr, "[ERROR] Unknown charge method type. Terminating...\n" );
@@ -408,27 +411,27 @@ static void Init_Workspace( reax_system *system, control_params *control,
                 || control->cm_solver_pre_comp_type == FG_ILUT_PC )
         {
             workspace->droptol = scalloc( system->N_cm_max, sizeof( real ),
-                    "Init_Workspace::workspace->droptol" );
+                    __FILE__, __LINE__ );
         }
 
         workspace->b_s = scalloc( system->N_cm_max, sizeof( real ),
-                "Init_Workspace::workspace->b_s" );
+                __FILE__, __LINE__ );
         workspace->b_t = scalloc( system->N_cm_max, sizeof( real ),
-                "Init_Workspace::workspace->b_t" );
+                __FILE__, __LINE__ );
         workspace->b_prc = scalloc( system->N_cm_max * 2, sizeof( real ),
-                "Init_Workspace::workspace->b_prc" );
+                __FILE__, __LINE__ );
         workspace->b_prm = scalloc( system->N_cm_max * 2, sizeof( real ),
-                "Init_Workspace::workspace->b_prm" );
+                __FILE__, __LINE__ );
         workspace->s = scalloc( 5, sizeof( real* ),
-                "Init_Workspace::workspace->s" );
+                __FILE__, __LINE__ );
         workspace->t = scalloc( 5, sizeof( real* ),
-                "Init_Workspace::workspace->t" );
+                __FILE__, __LINE__ );
         for ( i = 0; i < 5; ++i )
         {
             workspace->s[i] = scalloc( system->N_cm_max, sizeof( real ),
-                    "Init_Workspace::workspace->s[i]" );
+                    __FILE__, __LINE__ );
             workspace->t[i] = scalloc( system->N_cm_max, sizeof( real ),
-                    "Init_Workspace::workspace->t[i]" );
+                    __FILE__, __LINE__ );
         }
     }
 
@@ -494,81 +497,81 @@ static void Init_Workspace( reax_system *system, control_params *control,
             case GMRES_S:
             case GMRES_H_S:
                 workspace->y = scalloc( control->cm_solver_restart + 1, sizeof( real ),
-                        "Init_Workspace::workspace->y" );
+                        __FILE__, __LINE__ );
                 workspace->z = scalloc( control->cm_solver_restart + 1, sizeof( real ),
-                        "Init_Workspace::workspace->z" );
+                        __FILE__, __LINE__ );
                 workspace->g = scalloc( control->cm_solver_restart + 1, sizeof( real ),
-                        "Init_Workspace::workspace->g" );
+                        __FILE__, __LINE__ );
                 workspace->h = scalloc( control->cm_solver_restart + 1, sizeof( real*),
-                        "Init_Workspace::workspace->h" );
+                        __FILE__, __LINE__ );
                 workspace->hs = scalloc( control->cm_solver_restart + 1, sizeof( real ),
-                        "Init_Workspace::workspace->hs" );
+                        __FILE__, __LINE__ );
                 workspace->hc = scalloc( control->cm_solver_restart + 1, sizeof( real ),
-                        "Init_Workspace::workspace->hc" );
+                        __FILE__, __LINE__ );
                 workspace->rn = scalloc( control->cm_solver_restart + 1, sizeof( real*),
-                        "Init_Workspace::workspace->rn" );
+                        __FILE__, __LINE__ );
                 workspace->v = scalloc( control->cm_solver_restart + 1, sizeof( real*),
-                        "Init_Workspace::workspace->v" );
+                        __FILE__, __LINE__ );
 
                 for ( i = 0; i < control->cm_solver_restart + 1; ++i )
                 {
                     workspace->h[i] = scalloc( control->cm_solver_restart + 1, sizeof( real ),
-                            "Init_Workspace::workspace->h[i]" );
+                            __FILE__, __LINE__ );
                     workspace->rn[i] = scalloc( system->N_cm_max * 2, sizeof( real ),
-                            "Init_Workspace::workspace->rn[i]" );
+                            __FILE__, __LINE__ );
                     workspace->v[i] = scalloc( system->N_cm_max, sizeof( real ),
-                            "Init_Workspace::workspace->v[i]" );
+                            __FILE__, __LINE__ );
                 }
 
                 workspace->r = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->r" );
+                        __FILE__, __LINE__ );
                 workspace->d = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->d" );
+                        __FILE__, __LINE__ );
                 workspace->q = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->q" );
+                        __FILE__, __LINE__ );
                 workspace->p = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->p" );
+                        __FILE__, __LINE__ );
                 break;
 
             case CG_S:
                 workspace->r = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->r" );
+                        __FILE__, __LINE__ );
                 workspace->d = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->d" );
+                        __FILE__, __LINE__ );
                 workspace->q = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->q" );
+                        __FILE__, __LINE__ );
                 workspace->p = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->p" );
+                        __FILE__, __LINE__ );
                 break;
 
             case SDM_S:
                 workspace->r = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->r" );
+                        __FILE__, __LINE__ );
                 workspace->d = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->d" );
+                        __FILE__, __LINE__ );
                 workspace->q = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->q" );
+                        __FILE__, __LINE__ );
                 break;
 
             case BiCGStab_S:
                 workspace->r = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->r" );
+                        __FILE__, __LINE__ );
                 workspace->r_hat = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->r_hat" );
+                        __FILE__, __LINE__ );
                 workspace->d = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->d" );
+                        __FILE__, __LINE__ );
                 workspace->q = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->q" );
+                        __FILE__, __LINE__ );
                 workspace->q_hat = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->q_hat" );
+                        __FILE__, __LINE__ );
                 workspace->p = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->p" );
+                        __FILE__, __LINE__ );
                 workspace->y = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->y" );
+                        __FILE__, __LINE__ );
                 workspace->z = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->z" );
+                        __FILE__, __LINE__ );
                 workspace->g = scalloc( system->N_cm_max, sizeof( real ),
-                        "Init_Workspace::workspace->g" );
+                        __FILE__, __LINE__ );
                 break;
 
             default:
@@ -580,7 +583,7 @@ static void Init_Workspace( reax_system *system, control_params *control,
 #if defined(_OPENMP)
         /* SpMV related */
         workspace->b_local = smalloc( control->num_threads * system->N_cm_max * sizeof(real),
-                "Init_Workspace::b_local" );
+                __FILE__, __LINE__ );
 #endif
     }
 
@@ -593,19 +596,19 @@ static void Init_Workspace( reax_system *system, control_params *control,
                 control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA )
         {
             workspace->row_levels_L = smalloc( system->N_cm_max * sizeof(unsigned int),
-                    "Init_Workspace::row_levels_L" );
+                    __FILE__, __LINE__ );
             workspace->level_rows_L = smalloc( system->N_cm_max * sizeof(unsigned int),
-                    "Init_Workspace::level_rows_L" );
+                    __FILE__, __LINE__ );
             workspace->level_rows_cnt_L = smalloc( (system->N_cm_max + 1) * sizeof(unsigned int),
-                    "Init_Workspace::level_rows_cnt_L" );
+                    __FILE__, __LINE__ );
             workspace->row_levels_U = smalloc( system->N_cm_max * sizeof(unsigned int),
-                    "Init_Workspace::row_levels_U" );
+                    __FILE__, __LINE__ );
             workspace->level_rows_U = smalloc( system->N_cm_max * sizeof(unsigned int),
-                    "Init_Workspace::level_rows_U" );
+                    __FILE__, __LINE__ );
             workspace->level_rows_cnt_U = smalloc( (system->N_cm_max + 1) * sizeof(unsigned int),
-                    "Init_Workspace::level_rows_cnt_U" );
+                    __FILE__, __LINE__ );
             workspace->top = smalloc( (system->N_cm_max + 1) * sizeof(unsigned int),
-                    "Init_Workspace::top" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -626,21 +629,21 @@ static void Init_Workspace( reax_system *system, control_params *control,
         if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA )
         {
             workspace->color = smalloc( sizeof(unsigned int) * system->N_cm_max,
-                    "Init_Workspace::color" );
+                    __FILE__, __LINE__ );
             workspace->to_color = smalloc( sizeof(unsigned int) * system->N_cm_max,
-                    "Init_Workspace::to_color" );
+                    __FILE__, __LINE__ );
             workspace->conflict = smalloc( sizeof(unsigned int) * system->N_cm_max,
-                    "setup_graph_coloring::conflict" );
+                    __FILE__, __LINE__ );
             workspace->conflict_cnt = smalloc( sizeof(unsigned int) * (control->num_threads + 1),
-                    "Init_Workspace::conflict_cnt" );
+                    __FILE__, __LINE__ );
             workspace->recolor = smalloc( sizeof(unsigned int) * system->N_cm_max,
-                    "Init_Workspace::recolor" );
+                    __FILE__, __LINE__ );
             workspace->color_top = smalloc( sizeof(unsigned int) * (system->N_cm_max + 1),
-                    "Init_Workspace::color_top" );
+                    __FILE__, __LINE__ );
             workspace->permuted_row_col = smalloc( sizeof(unsigned int) * system->N_cm_max,
-                    "Init_Workspace::premuted_row_col" );
+                    __FILE__, __LINE__ );
             workspace->permuted_row_col_inv = smalloc( sizeof(unsigned int) * system->N_cm_max,
-                    "Init_Workspace::premuted_row_col_inv" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -658,8 +661,8 @@ static void Init_Workspace( reax_system *system, control_params *control,
         if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA 
                 || control->cm_solver_pre_comp_type == ILUTP_PC )
         {
-            workspace->y_p = smalloc( sizeof(real) * system->N_cm_max, "Init_Workspace::y_p" );
-            workspace->x_p = smalloc( sizeof(real) * system->N_cm_max, "Init_Workspace::x_p" );
+            workspace->y_p = smalloc( sizeof(real) * system->N_cm_max, __FILE__, __LINE__ );
+            workspace->x_p = smalloc( sizeof(real) * system->N_cm_max, __FILE__, __LINE__ );
         }
         else
         {
@@ -671,15 +674,15 @@ static void Init_Workspace( reax_system *system, control_params *control,
         if ( control->cm_solver_pre_app_type == JACOBI_ITER_PA )
         {
             workspace->Dinv_L = smalloc( sizeof(real) * system->N_cm_max,
-                    "Init_Workspace::Dinv_L" );
+                    __FILE__, __LINE__ );
             workspace->Dinv_U = smalloc( sizeof(real) * system->N_cm_max,
-                    "Init_Workspace::Dinv_U" );
+                    __FILE__, __LINE__ );
             workspace->Dinv_b = smalloc( sizeof(real) * system->N_cm_max,
-                    "Init_Workspace::Dinv_b" );
+                    __FILE__, __LINE__ );
             workspace->rp = smalloc( sizeof(real) * system->N_cm_max,
-                    "Init_Workspace::rp" );
+                    __FILE__, __LINE__ );
             workspace->rp2 = smalloc( sizeof(real) * system->N_cm_max,
-                    "Init_Workspace::rp2" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -694,38 +697,33 @@ static void Init_Workspace( reax_system *system, control_params *control,
         if ( control->cm_solver_pre_comp_type == ILUTP_PC )
         {
             workspace->perm_ilutp = smalloc( sizeof( int ) * system->N_cm_max,
-                   "Init_Workspace::workspace->perm_ilutp" );
+                   __FILE__, __LINE__ );
         }
         else
         {
             workspace->perm_ilutp = NULL;
         }
 
-#if defined(QMMM)
-        workspace->mask_qmmm = smalloc( system->N_cm_max * sizeof( int ),
-               "Init_Workspace::workspace->mask_qmmm" );
-#endif
-
         /* integrator storage */
         workspace->a = smalloc( system->N_max * sizeof( rvec ),
-               "Init_Workspace::workspace->a" );
+               __FILE__, __LINE__ );
         workspace->f_old = smalloc( system->N_max * sizeof( rvec ),
-               "Init_Workspace::workspace->f_old" );
+               __FILE__, __LINE__ );
         workspace->v_const = smalloc( system->N_max * sizeof( rvec ),
-               "Init_Workspace::workspace->v_const" );
+               __FILE__, __LINE__ );
 
 #if defined(_OPENMP)
         workspace->f_local = smalloc( control->num_threads * system->N_max * sizeof( rvec ),
-               "Init_Workspace::workspace->f_local" );
+               __FILE__, __LINE__ );
 #endif
 
         /* storage for analysis */
         if ( control->molec_anal || control->diffusion_coef )
         {
             workspace->mark = scalloc( system->N_max, sizeof(int),
-                    "Init_Workspace::workspace->mark" );
+                    __FILE__, __LINE__ );
             workspace->old_mark = scalloc( system->N_max, sizeof(int),
-                    "Init_Workspace::workspace->old_mark" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -735,7 +733,7 @@ static void Init_Workspace( reax_system *system, control_params *control,
         if ( control->diffusion_coef )
         {
             workspace->x_old = scalloc( system->N_max, sizeof( rvec ),
-                    "Init_Workspace::workspace->x_old" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -745,31 +743,31 @@ static void Init_Workspace( reax_system *system, control_params *control,
 
 #if defined(TEST_FORCES)
     workspace->dDelta = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->dDelta" );
+           __FILE__, __LINE__ );
     workspace->f_ele = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_ele" );
+           __FILE__, __LINE__ );
     workspace->f_vdw = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_vdw" );
+           __FILE__, __LINE__ );
     workspace->f_be = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_be" );
+           __FILE__, __LINE__ );
     workspace->f_lp = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_lp" );
+           __FILE__, __LINE__ );
     workspace->f_ov = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_ov" );
+           __FILE__, __LINE__ );
     workspace->f_un = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_un" );
+           __FILE__, __LINE__ );
     workspace->f_ang = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_ang" );
+           __FILE__, __LINE__ );
     workspace->f_coa = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_coa" );
+           __FILE__, __LINE__ );
     workspace->f_pen = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_pen" );
+           __FILE__, __LINE__ );
     workspace->f_hb = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_hb" );
+           __FILE__, __LINE__ );
     workspace->f_tor = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_tor" );
+           __FILE__, __LINE__ );
     workspace->f_con = smalloc( system->N_max * sizeof( rvec ),
-           "Init_Workspace::workspace->f_con" );
+           __FILE__, __LINE__ );
 #endif
 
     workspace->realloc.num_far = -1;
@@ -790,7 +788,7 @@ static void Init_Lists( reax_system *system, control_params *control,
         simulation_data *data, static_storage *workspace,
         reax_list **lists, output_controls *out_control, int realloc )
 {
-    int i, num_nbrs, num_bonds, num_hbonds, num_3body, Htop, max_nnz;
+    int i, num_nbrs, num_bonds, num_hbonds, num_3body, Htop;
     int *hb_top, *bond_top;
 
     num_nbrs = Estimate_Num_Neighbors( system, control, workspace, lists );
@@ -817,41 +815,26 @@ static void Init_Lists( reax_system *system, control_params *control,
     Generate_Neighbor_Lists( system, control, data, workspace, lists );
 
     Htop = 0;
-    hb_top = scalloc( system->N, sizeof(int), "Init_Lists::hb_top" );
-    bond_top = scalloc( system->N, sizeof(int), "Init_Lists::bond_top" );
+    hb_top = scalloc( system->N, sizeof(int), __FILE__, __LINE__ );
+    bond_top = scalloc( system->N, sizeof(int), __FILE__, __LINE__ );
     num_3body = 0;
 
     Estimate_Storage_Sizes( system, control, lists, &Htop,
             hb_top, bond_top, &num_3body );
     num_3body = MAX( num_3body, MIN_BONDS );
 
-    switch ( control->charge_method )
-    {
-        case QEQ_CM:
-            max_nnz = Htop;
-            break;
-        case EE_CM:
-            max_nnz = Htop + system->N_cm;
-            break;
-        case ACKS2_CM:
-            max_nnz = 2 * Htop + 3 * system->N + 2;
-            break;
-        default:
-            max_nnz = Htop;
-            break;
-    }
-
     if ( workspace->H.allocated == FALSE )
     {
-        Allocate_Matrix( &workspace->H, system->N_cm, system->N_cm_max, max_nnz );
+        Allocate_Matrix( &workspace->H, system->N_cm, system->N_cm_max, Htop );
     }
-    else if ( realloc == TRUE || workspace->H.m < max_nnz )
+    else if ( realloc == TRUE || workspace->H.m < Htop
+            || workspace->H.n_max < system->N_cm_max )
     {
         if ( workspace->H.allocated == TRUE )
         {
             Deallocate_Matrix( &workspace->H );
         }
-        Allocate_Matrix( &workspace->H, system->N_cm, system->N_cm_max, max_nnz );
+        Allocate_Matrix( &workspace->H, system->N_cm, system->N_cm_max, Htop );
     }
     else
     {
@@ -864,9 +847,10 @@ static void Init_Lists( reax_system *system, control_params *control,
          *   If so, need to refactor Estimate_Storage_Sizes
          *   to use various cut-off distances as parameters
          *   (non-bonded, hydrogen, 3body, etc.) */
-        Allocate_Matrix( &workspace->H_sp, system->N_cm, system->N_cm_max, max_nnz );
+        Allocate_Matrix( &workspace->H_sp, system->N_cm, system->N_cm_max, Htop );
     }
-    else if ( realloc == TRUE || workspace->H_sp.m < max_nnz )
+    else if ( realloc == TRUE || workspace->H_sp.m < Htop
+            || workspace->H.n_max < system->N_cm_max )
     {
         if ( workspace->H_sp.allocated == TRUE )
         {
@@ -876,7 +860,7 @@ static void Init_Lists( reax_system *system, control_params *control,
          *   If so, need to refactor Estimate_Storage_Sizes
          *   to use various cut-off distances as parameters
          *   (non-bonded, hydrogen, 3body, etc.) */
-        Allocate_Matrix( &workspace->H_sp, system->N_cm, system->N_cm_max, max_nnz );
+        Allocate_Matrix( &workspace->H_sp, system->N_cm, system->N_cm_max, Htop );
     }
     else
     {
@@ -1032,8 +1016,8 @@ static void Init_Lists( reax_system *system, control_params *control,
     }
 #endif
 
-    sfree( hb_top, "Init_Lists::hb_top" );
-    sfree( bond_top, "Init_Lists::bond_top" );
+    sfree( hb_top, __FILE__, __LINE__ );
+    sfree( bond_top, __FILE__, __LINE__ );
 }
 
 
@@ -1048,7 +1032,7 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
         strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
         temp[TEMP_SIZE - 5] = '\0';
         strcat( temp, ".trj" );
-        out_control->trj = sfopen( temp, "w" );
+        out_control->trj = sfopen( temp, "w", __FILE__, __LINE__ );
         out_control->write_header( system, control, workspace, out_control );
     }
     else
@@ -1061,7 +1045,7 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
         strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
         temp[TEMP_SIZE - 5] = '\0';
         strcat( temp, ".out" );
-        out_control->out = sfopen( temp, "w" );
+        out_control->out = sfopen( temp, "w", __FILE__, __LINE__ );
         fprintf( out_control->out, "%-6s%16s%16s%16s%11s%11s%13s%13s%13s\n",
                  "step", "total_energy", "poten_energy", "kin_energy",
                  "temp", "target", "volume", "press", "target" );
@@ -1070,7 +1054,7 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
         strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
         temp[TEMP_SIZE - 5] = '\0';
         strcat( temp, ".pot" );
-        out_control->pot = sfopen( temp, "w" );
+        out_control->pot = sfopen( temp, "w", __FILE__, __LINE__ );
         fprintf( out_control->pot,
                  "%-6s%13s%13s%13s%13s%13s%13s%13s%13s%13s%13s%13s\n",
                  "step", "ebond", "eatom", "elp", "eang", "ecoa", "ehb",
@@ -1080,7 +1064,7 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
         strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
         temp[TEMP_SIZE - 5] = '\0';
         strcat( temp, ".log" );
-        out_control->log = sfopen( temp, "w" );
+        out_control->log = sfopen( temp, "w", __FILE__, __LINE__ );
         fprintf( out_control->log, "%-6s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n",
                  "step", "total", "neighbors", "init", "bonded",
                  "nonbonded", "cm", "cm_sort", "s_iters", "pre_comp", "pre_app",
@@ -1099,7 +1083,7 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
         strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
         temp[TEMP_SIZE - 5] = '\0';
         strcat( temp, ".prs" );
-        out_control->prs = sfopen( temp, "w" );
+        out_control->prs = sfopen( temp, "w", __FILE__, __LINE__ );
 #if defined(DEBUG) || defined(DEBUG_FOCUS)
         fprintf( out_control->prs, "%-8s %13s %13s %13s %13s %13s %13s\n",
                 "step", "KExx", "KEyy", "KEzz",
@@ -1120,11 +1104,11 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
     if ( output_enabled == TRUE && control->molec_anal )
     {
         snprintf( temp, TEMP_SIZE, "%.*s.mol", TEMP_SIZE - 5, control->sim_name );
-        out_control->mol = sfopen( temp, "w" );
+        out_control->mol = sfopen( temp, "w", __FILE__, __LINE__ );
         if ( control->num_ignored )
         {
             snprintf( temp, TEMP_SIZE, "%.*s.ign", TEMP_SIZE - 5, control->sim_name );
-            out_control->ign = sfopen( temp, "w" );
+            out_control->ign = sfopen( temp, "w", __FILE__, __LINE__ );
         }
     }
     else
@@ -1138,7 +1122,7 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
         strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
         temp[TEMP_SIZE - 5] = '\0';
         strcat( temp, ".dpl" );
-        out_control->dpl = sfopen( temp, "w" );
+        out_control->dpl = sfopen( temp, "w", __FILE__, __LINE__ );
         fprintf( out_control->dpl,
                  "Step      Molecule Count  Avg. Dipole Moment Norm\n" );
         fflush( out_control->dpl );
@@ -1153,7 +1137,7 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
         strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
         temp[TEMP_SIZE - 6] = '\0';
         strcat( temp, ".drft" );
-        out_control->drft = sfopen( temp, "w" );
+        out_control->drft = sfopen( temp, "w", __FILE__, __LINE__ );
         fprintf( out_control->drft, "Step     Type Count   Avg Squared Disp\n" );
         fflush( out_control->drft );
     }
@@ -1167,62 +1151,62 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".ebond" );
-    out_control->ebond = sfopen( temp, "w" );
+    out_control->ebond = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
     temp[TEMP_SIZE - 5] = '\0';
     strcat( temp, ".elp" );
-    out_control->elp = sfopen( temp, "w" );
+    out_control->elp = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
     temp[TEMP_SIZE - 5] = '\0';
     strcat( temp, ".eov" );
-    out_control->eov = sfopen( temp, "w" );
+    out_control->eov = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
     temp[TEMP_SIZE - 5] = '\0';
     strcat( temp, ".eun" );
-    out_control->eun = sfopen( temp, "w" );
+    out_control->eun = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".eval" );
-    out_control->eval = sfopen( temp, "w" );
+    out_control->eval = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".epen" );
-    out_control->epen = sfopen( temp, "w" );
+    out_control->epen = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".ecoa" );
-    out_control->ecoa = sfopen( temp, "w" );
+    out_control->ecoa = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
     temp[TEMP_SIZE - 5] = '\0';
     strcat( temp, ".ehb" );
-    out_control->ehb = sfopen( temp, "w" );
+    out_control->ehb = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".etor" );
-    out_control->etor = sfopen( temp, "w" );
+    out_control->etor = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".econ" );
-    out_control->econ = sfopen( temp, "w" );
+    out_control->econ = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".evdw" );
-    out_control->evdw = sfopen( temp, "w" );
+    out_control->evdw = sfopen( temp, "w", __FILE__, __LINE__ );
 
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".ecou" );
-    out_control->ecou = sfopen( temp, "w" );
+    out_control->ecou = sfopen( temp, "w", __FILE__, __LINE__ );
 #endif
 
 
@@ -1231,67 +1215,67 @@ static void Init_Out_Controls( reax_system *system, control_params *control,
     strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
     temp[TEMP_SIZE - 5] = '\0';
     strcat( temp, ".fbo" );
-    out_control->fbo = sfopen( temp, "w" );
+    out_control->fbo = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open bond orders derivatives file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".fdbo" );
-    out_control->fdbo = sfopen( temp, "w" );
+    out_control->fdbo = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open bond forces file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 7 );
     temp[TEMP_SIZE - 7] = '\0';
     strcat( temp, ".fbond" );
-    out_control->fbond = sfopen( temp, "w" );
+    out_control->fbond = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open lone-pair forces file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".flp" );
-    out_control->flp = sfopen( temp, "w" );
+    out_control->flp = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open overcoordination forces file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 7 );
     temp[TEMP_SIZE - 7] = '\0';
     strcat( temp, ".fatom" );
-    out_control->fatom = sfopen( temp, "w" );
+    out_control->fatom = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open angle forces file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 8 );
     temp[TEMP_SIZE - 8] = '\0';
     strcat( temp, ".f3body" );
-    out_control->f3body = sfopen( temp, "w" );
+    out_control->f3body = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open hydrogen bond forces file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 5 );
     temp[TEMP_SIZE - 5] = '\0';
     strcat( temp, ".fhb" );
-    out_control->fhb = sfopen( temp, "w" );
+    out_control->fhb = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open torsion forces file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 8 );
     temp[TEMP_SIZE - 8] = '\0';
     strcat( temp, ".f4body" );
-    out_control->f4body = sfopen( temp, "w" );
+    out_control->f4body = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open nonbonded forces file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 7 );
     temp[TEMP_SIZE - 7] = '\0';
     strcat( temp, ".fnonb" );
-    out_control->fnonb = sfopen( temp, "w" );
+    out_control->fnonb = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open total force file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 6 );
     temp[TEMP_SIZE - 6] = '\0';
     strcat( temp, ".ftot" );
-    out_control->ftot = sfopen( temp, "w" );
+    out_control->ftot = sfopen( temp, "w", __FILE__, __LINE__ );
 
     /* open coulomb forces file */
     strncpy( temp, control->sim_name, TEMP_SIZE - 7 );
     temp[TEMP_SIZE - 7] = '\0';
     strcat( temp, ".ftot2" );
-    out_control->ftot2 = sfopen( temp, "w" );
+    out_control->ftot2 = sfopen( temp, "w", __FILE__, __LINE__ );
 #endif
 
 #undef TEMP_SIZE
@@ -1359,8 +1343,8 @@ static void Finalize_System( reax_system *system, control_params *control,
 
     if ( system->max_num_molec_charge_constraints > 0 )
     {
-        sfree( system->molec_charge_constraints, "Read_BGF::molec_charge_constraints" );
-        sfree( system->molec_charge_constraint_ranges, "Read_BGF::molec_charge_constraint_ranges" );
+        sfree( system->molec_charge_constraints, __FILE__, __LINE__ );
+        sfree( system->molec_charge_constraint_ranges, __FILE__, __LINE__ );
     }
 
     system->max_num_molec_charge_constraints = 0;
@@ -1372,7 +1356,7 @@ static void Finalize_System( reax_system *system, control_params *control,
 
     if ( reset == FALSE )
     {
-        sfree( reax->gp.l, "Finalize_System::reax->gp.l" );
+        sfree( reax->gp.l, __FILE__, __LINE__ );
 
         for ( i = 0; i < reax->max_num_atom_types; i++ )
         {
@@ -1380,27 +1364,27 @@ static void Finalize_System( reax_system *system, control_params *control,
             {
                 for ( k = 0; k < reax->max_num_atom_types; k++ )
                 {
-                    sfree( reax->fbp[i][j][k], "Finalize_System::reax->fbp[i][j][k]" );
+                    sfree( reax->fbp[i][j][k], __FILE__, __LINE__ );
                 }
 
-                sfree( reax->thbp[i][j], "Finalize_System::reax->thbp[i][j]" );
-                sfree( reax->hbp[i][j], "Finalize_System::reax->hbp[i][j]" );
-                sfree( reax->fbp[i][j], "Finalize_System::reax->fbp[i][j]" );
+                sfree( reax->thbp[i][j], __FILE__, __LINE__ );
+                sfree( reax->hbp[i][j], __FILE__, __LINE__ );
+                sfree( reax->fbp[i][j], __FILE__, __LINE__ );
             }
 
-            sfree( reax->tbp[i], "Finalize_System::reax->tbp[i]" );
-            sfree( reax->thbp[i], "Finalize_System::reax->thbp[i]" );
-            sfree( reax->hbp[i], "Finalize_System::reax->hbp[i]" );
-            sfree( reax->fbp[i], "Finalize_System::reax->fbp[i]" );
+            sfree( reax->tbp[i], __FILE__, __LINE__ );
+            sfree( reax->thbp[i], __FILE__, __LINE__ );
+            sfree( reax->hbp[i], __FILE__, __LINE__ );
+            sfree( reax->fbp[i], __FILE__, __LINE__ );
         }
 
-        sfree( reax->sbp, "Finalize_System::reax->sbp" );
-        sfree( reax->tbp, "Finalize_System::reax->tbp" );
-        sfree( reax->thbp, "Finalize_System::reax->thbp" );
-        sfree( reax->hbp, "Finalize_System::reax->hbp" );
-        sfree( reax->fbp, "Finalize_System::reax->fbp" );
+        sfree( reax->sbp, __FILE__, __LINE__ );
+        sfree( reax->tbp, __FILE__, __LINE__ );
+        sfree( reax->thbp, __FILE__, __LINE__ );
+        sfree( reax->hbp, __FILE__, __LINE__ );
+        sfree( reax->fbp, __FILE__, __LINE__ );
 
-        sfree( system->atoms, "Finalize_System::system->atoms" );
+        sfree( system->atoms, __FILE__, __LINE__ );
     }
 }
 
@@ -1412,7 +1396,7 @@ static void Finalize_Simulation_Data( reax_system *system, control_params *contr
     if ( control->ensemble == sNPT || control->ensemble == iNPT
             || control->ensemble == aNPT || control->compute_pressure == TRUE )
     {
-        sfree( data->press_local, "Finalize_Simulation_Data::data->press_local" );
+        sfree( data->press_local, __FILE__, __LINE__ );
     }
 #endif
 }
@@ -1423,29 +1407,29 @@ static void Finalize_Workspace( reax_system *system, control_params *control,
 {
     int i;
 
-    sfree( workspace->hbond_index, "Finalize_Workspace::workspace->hbond_index" );
-    sfree( workspace->total_bond_order, "Finalize_Workspace::workspace->total_bond_order" );
-    sfree( workspace->Deltap, "Finalize_Workspace::workspace->Deltap" );
-    sfree( workspace->Deltap_boc, "Finalize_Workspace::workspace->Deltap_boc" );
-    sfree( workspace->dDeltap_self, "Finalize_Workspace::workspace->dDeltap_self" );
-    sfree( workspace->Delta, "Finalize_Workspace::workspace->Delta" );
-    sfree( workspace->Delta_lp, "Finalize_Workspace::workspace->Delta_lp" );
-    sfree( workspace->Delta_lp_temp, "Finalize_Workspace::workspace->Delta_lp_temp" );
-    sfree( workspace->dDelta_lp, "Finalize_Workspace::workspace->dDelta_lp" );
-    sfree( workspace->dDelta_lp_temp, "Finalize_Workspace::workspace->dDelta_lp_temp" );
-    sfree( workspace->Delta_e, "Finalize_Workspace::workspace->Delta_e" );
-    sfree( workspace->Delta_boc, "Finalize_Workspace::workspace->Delta_boc" );
-    sfree( workspace->nlp, "Finalize_Workspace::workspace->nlp" );
-    sfree( workspace->nlp_temp, "Finalize_Workspace::workspace->nlp_temp" );
-    sfree( workspace->Clp, "Finalize_Workspace::workspace->Clp" );
-    sfree( workspace->CdDelta, "Finalize_Workspace::workspace->CdDelta" );
-    sfree( workspace->vlpex, "Finalize_Workspace::workspace->vlpex" );
+    sfree( workspace->hbond_index, __FILE__, __LINE__ );
+    sfree( workspace->total_bond_order, __FILE__, __LINE__ );
+    sfree( workspace->Deltap, __FILE__, __LINE__ );
+    sfree( workspace->Deltap_boc, __FILE__, __LINE__ );
+    sfree( workspace->dDeltap_self, __FILE__, __LINE__ );
+    sfree( workspace->Delta, __FILE__, __LINE__ );
+    sfree( workspace->Delta_lp, __FILE__, __LINE__ );
+    sfree( workspace->Delta_lp_temp, __FILE__, __LINE__ );
+    sfree( workspace->dDelta_lp, __FILE__, __LINE__ );
+    sfree( workspace->dDelta_lp_temp, __FILE__, __LINE__ );
+    sfree( workspace->Delta_e, __FILE__, __LINE__ );
+    sfree( workspace->Delta_boc, __FILE__, __LINE__ );
+    sfree( workspace->nlp, __FILE__, __LINE__ );
+    sfree( workspace->nlp_temp, __FILE__, __LINE__ );
+    sfree( workspace->Clp, __FILE__, __LINE__ );
+    sfree( workspace->CdDelta, __FILE__, __LINE__ );
+    sfree( workspace->vlpex, __FILE__, __LINE__ );
 
     if ( reset == FALSE && (control->geo_format == BGF
             || control->geo_format == ASCII_RESTART
             || control->geo_format == BINARY_RESTART) )
     {
-        sfree( workspace->map_serials, "Finalize_Workspace::workspace->map_serials" );
+        sfree( workspace->map_serials, __FILE__, __LINE__ );
     }
 
     if ( workspace->H.allocated == TRUE )
@@ -1487,27 +1471,27 @@ static void Finalize_Workspace( reax_system *system, control_params *control,
 
     for ( i = 0; i < 5; ++i )
     {
-        sfree( workspace->s[i], "Finalize_Workspace::workspace->s[i]" );
-        sfree( workspace->t[i], "Finalize_Workspace::workspace->t[i]" );
+        sfree( workspace->s[i], __FILE__, __LINE__ );
+        sfree( workspace->t[i], __FILE__, __LINE__ );
     }
 
     if ( control->cm_solver_pre_comp_type == JACOBI_PC )
     {
-        sfree( workspace->Hdia_inv, "Finalize_Workspace::workspace->Hdia_inv" );
+        sfree( workspace->Hdia_inv, __FILE__, __LINE__ );
     }
     if ( control->cm_solver_pre_comp_type == ICHOLT_PC
             || (control->cm_solver_pre_comp_type == ILUT_PC && control->cm_solver_pre_comp_droptol > 0.0 )
             || control->cm_solver_pre_comp_type == ILUTP_PC
             || control->cm_solver_pre_comp_type == FG_ILUT_PC )
     {
-        sfree( workspace->droptol, "Finalize_Workspace::workspace->droptol" );
+        sfree( workspace->droptol, __FILE__, __LINE__ );
     }
-    sfree( workspace->b_s, "Finalize_Workspace::workspace->b_s" );
-    sfree( workspace->b_t, "Finalize_Workspace::workspace->b_t" );
-    sfree( workspace->b_prc, "Finalize_Workspace::workspace->b_prc" );
-    sfree( workspace->b_prm, "Finalize_Workspace::workspace->b_prm" );
-    sfree( workspace->s, "Finalize_Workspace::workspace->s" );
-    sfree( workspace->t, "Finalize_Workspace::workspace->t" );
+    sfree( workspace->b_s, __FILE__, __LINE__ );
+    sfree( workspace->b_t, __FILE__, __LINE__ );
+    sfree( workspace->b_prc, __FILE__, __LINE__ );
+    sfree( workspace->b_prm, __FILE__, __LINE__ );
+    sfree( workspace->s, __FILE__, __LINE__ );
+    sfree( workspace->t, __FILE__, __LINE__ );
 
     switch ( control->cm_solver_type )
     {
@@ -1515,49 +1499,49 @@ static void Finalize_Workspace( reax_system *system, control_params *control,
         case GMRES_H_S:
             for ( i = 0; i < control->cm_solver_restart + 1; ++i )
             {
-                sfree( workspace->h[i], "Finalize_Workspace::workspace->h[i]" );
-                sfree( workspace->rn[i], "Finalize_Workspace::workspace->rn[i]" );
-                sfree( workspace->v[i], "Finalize_Workspace::workspace->v[i]" );
+                sfree( workspace->h[i], __FILE__, __LINE__ );
+                sfree( workspace->rn[i], __FILE__, __LINE__ );
+                sfree( workspace->v[i], __FILE__, __LINE__ );
             }
 
-            sfree( workspace->y, "Finalize_Workspace::workspace->y" );
-            sfree( workspace->z, "Finalize_Workspace::workspace->z" );
-            sfree( workspace->g, "Finalize_Workspace::workspace->g" );
-            sfree( workspace->h, "Finalize_Workspace::workspace->h" );
-            sfree( workspace->hs, "Finalize_Workspace::workspace->hs" );
-            sfree( workspace->hc, "Finalize_Workspace::workspace->hc" );
-            sfree( workspace->rn, "Finalize_Workspace::workspace->rn" );
-            sfree( workspace->v, "Finalize_Workspace::workspace->v" );
-
-            sfree( workspace->r, "Finalize_Workspace::workspace->r" );
-            sfree( workspace->d, "Finalize_Workspace::workspace->d" );
-            sfree( workspace->q, "Finalize_Workspace::workspace->q" );
-            sfree( workspace->p, "Finalize_Workspace::workspace->p" );
+            sfree( workspace->y, __FILE__, __LINE__ );
+            sfree( workspace->z, __FILE__, __LINE__ );
+            sfree( workspace->g, __FILE__, __LINE__ );
+            sfree( workspace->h, __FILE__, __LINE__ );
+            sfree( workspace->hs, __FILE__, __LINE__ );
+            sfree( workspace->hc, __FILE__, __LINE__ );
+            sfree( workspace->rn, __FILE__, __LINE__ );
+            sfree( workspace->v, __FILE__, __LINE__ );
+
+            sfree( workspace->r, __FILE__, __LINE__ );
+            sfree( workspace->d, __FILE__, __LINE__ );
+            sfree( workspace->q, __FILE__, __LINE__ );
+            sfree( workspace->p, __FILE__, __LINE__ );
             break;
 
         case CG_S:
-            sfree( workspace->r, "Finalize_Workspace::workspace->r" );
-            sfree( workspace->d, "Finalize_Workspace::workspace->d" );
-            sfree( workspace->q, "Finalize_Workspace::workspace->q" );
-            sfree( workspace->p, "Finalize_Workspace::workspace->p" );
+            sfree( workspace->r, __FILE__, __LINE__ );
+            sfree( workspace->d, __FILE__, __LINE__ );
+            sfree( workspace->q, __FILE__, __LINE__ );
+            sfree( workspace->p, __FILE__, __LINE__ );
             break;
 
         case SDM_S:
-            sfree( workspace->r, "Finalize_Workspace::workspace->r" );
-            sfree( workspace->d, "Finalize_Workspace::workspace->d" );
-            sfree( workspace->q, "Finalize_Workspace::workspace->q" );
+            sfree( workspace->r, __FILE__, __LINE__ );
+            sfree( workspace->d, __FILE__, __LINE__ );
+            sfree( workspace->q, __FILE__, __LINE__ );
             break;
 
         case BiCGStab_S:
-            sfree( workspace->r, "Finalize_Workspace::workspace->r" );
-            sfree( workspace->r_hat, "Finalize_Workspace::workspace->r_hat" );
-            sfree( workspace->d, "Finalize_Workspace::workspace->d" );
-            sfree( workspace->q, "Finalize_Workspace::workspace->q" );
-            sfree( workspace->q_hat, "Finalize_Workspace::workspace->q_hat" );
-            sfree( workspace->p, "Finalize_Workspace::workspace->p" );
-            sfree( workspace->y, "Finalize_Workspace::workspace->y" );
-            sfree( workspace->z, "Finalize_Workspace::workspace->z" );
-            sfree( workspace->g, "Finalize_Workspace::workspace->g" );
+            sfree( workspace->r, __FILE__, __LINE__ );
+            sfree( workspace->r_hat, __FILE__, __LINE__ );
+            sfree( workspace->d, __FILE__, __LINE__ );
+            sfree( workspace->q, __FILE__, __LINE__ );
+            sfree( workspace->q_hat, __FILE__, __LINE__ );
+            sfree( workspace->p, __FILE__, __LINE__ );
+            sfree( workspace->y, __FILE__, __LINE__ );
+            sfree( workspace->z, __FILE__, __LINE__ );
+            sfree( workspace->g, __FILE__, __LINE__ );
             break;
 
         default:
@@ -1568,87 +1552,83 @@ static void Finalize_Workspace( reax_system *system, control_params *control,
 
     /* SpMV related */
 #if defined(_OPENMP)
-    sfree( workspace->b_local, "Finalize_Workspace::b_local" );
+    sfree( workspace->b_local, __FILE__, __LINE__ );
 #endif
 
     /* level scheduling related */
     if ( control->cm_solver_pre_app_type == TRI_SOLVE_LEVEL_SCHED_PA ||
             control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA )
     {
-        sfree( workspace->row_levels_L, "Finalize_Workspace::row_levels_L" );
-        sfree( workspace->level_rows_L, "Finalize_Workspace::level_rows_L" );
-        sfree( workspace->level_rows_cnt_L, "Finalize_Workspace::level_rows_cnt_L" );
-        sfree( workspace->row_levels_U, "Finalize_Workspace::row_levels_U" );
-        sfree( workspace->level_rows_U, "Finalize_Workspace::level_rows_U" );
-        sfree( workspace->level_rows_cnt_U, "Finalize_Workspace::level_rows_cnt_U" );
-        sfree( workspace->top, "Finalize_Workspace::top" );
+        sfree( workspace->row_levels_L, __FILE__, __LINE__ );
+        sfree( workspace->level_rows_L, __FILE__, __LINE__ );
+        sfree( workspace->level_rows_cnt_L, __FILE__, __LINE__ );
+        sfree( workspace->row_levels_U, __FILE__, __LINE__ );
+        sfree( workspace->level_rows_U, __FILE__, __LINE__ );
+        sfree( workspace->level_rows_cnt_U, __FILE__, __LINE__ );
+        sfree( workspace->top, __FILE__, __LINE__ );
     }
 
     /* graph coloring related */
     if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA )
     {
-        sfree( workspace->color, "Finalize_Workspace::workspace->color" );
-        sfree( workspace->to_color, "Finalize_Workspace::workspace->to_color" );
-        sfree( workspace->conflict, "Finalize_Workspace::workspace->conflict" );
-        sfree( workspace->conflict_cnt, "Finalize_Workspace::workspace->conflict_cnt" );
-        sfree( workspace->recolor, "Finalize_Workspace::workspace->recolor" );
-        sfree( workspace->color_top, "Finalize_Workspace::workspace->color_top" );
-        sfree( workspace->permuted_row_col, "Finalize_Workspace::workspace->permuted_row_col" );
-        sfree( workspace->permuted_row_col_inv, "Finalize_Workspace::workspace->permuted_row_col_inv" );
+        sfree( workspace->color, __FILE__, __LINE__ );
+        sfree( workspace->to_color, __FILE__, __LINE__ );
+        sfree( workspace->conflict, __FILE__, __LINE__ );
+        sfree( workspace->conflict_cnt, __FILE__, __LINE__ );
+        sfree( workspace->recolor, __FILE__, __LINE__ );
+        sfree( workspace->color_top, __FILE__, __LINE__ );
+        sfree( workspace->permuted_row_col, __FILE__, __LINE__ );
+        sfree( workspace->permuted_row_col_inv, __FILE__, __LINE__ );
     }
 
     /* graph coloring related OR ILUTP preconditioner */
     if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA 
             || control->cm_solver_pre_comp_type == ILUTP_PC )
     {
-        sfree( workspace->y_p, "Finalize_Workspace::workspace->y_p" );
-        sfree( workspace->x_p, "Finalize_Workspace::workspace->x_p" );
+        sfree( workspace->y_p, __FILE__, __LINE__ );
+        sfree( workspace->x_p, __FILE__, __LINE__ );
     }
 
     /* Jacobi iteration related */
     if ( control->cm_solver_pre_app_type == JACOBI_ITER_PA )
     {
-        sfree( workspace->Dinv_L, "Finalize_Workspace::Dinv_L" );
-        sfree( workspace->Dinv_U, "Finalize_Workspace::Dinv_U" );
-        sfree( workspace->Dinv_b, "Finalize_Workspace::Dinv_b" );
-        sfree( workspace->rp, "Finalize_Workspace::rp" );
-        sfree( workspace->rp2, "Finalize_Workspace::rp2" );
+        sfree( workspace->Dinv_L, __FILE__, __LINE__ );
+        sfree( workspace->Dinv_U, __FILE__, __LINE__ );
+        sfree( workspace->Dinv_b, __FILE__, __LINE__ );
+        sfree( workspace->rp, __FILE__, __LINE__ );
+        sfree( workspace->rp2, __FILE__, __LINE__ );
     }
 
     /* ILUTP preconditioner related */
     if ( control->cm_solver_pre_comp_type == ILUTP_PC )
     {
-        sfree( workspace->perm_ilutp, "Finalize_Workspace::workspace->perm_ilutp" );
+        sfree( workspace->perm_ilutp, __FILE__, __LINE__ );
     }
 
-#if defined(QMMM)
-    sfree( workspace->mask_qmmm, "Init_Workspace::workspace->mask_qmmm" );
-#endif
-
     /* integrator storage */
-    sfree( workspace->a, "Finalize_Workspace::workspace->a" );
-    sfree( workspace->f_old, "Finalize_Workspace::workspace->f_old" );
-    sfree( workspace->v_const, "Finalize_Workspace::workspace->v_const" );
+    sfree( workspace->a, __FILE__, __LINE__ );
+    sfree( workspace->f_old, __FILE__, __LINE__ );
+    sfree( workspace->v_const, __FILE__, __LINE__ );
 
 #if defined(_OPENMP)
-    sfree( workspace->f_local, "Finalize_Workspace::workspace->f_local" );
+    sfree( workspace->f_local, __FILE__, __LINE__ );
 #endif
 
     /* storage for analysis */
     if ( control->molec_anal || control->diffusion_coef )
     {
-        sfree( workspace->mark, "Finalize_Workspace::workspace->mark" );
-        sfree( workspace->old_mark, "Finalize_Workspace::workspace->old_mark" );
+        sfree( workspace->mark, __FILE__, __LINE__ );
+        sfree( workspace->old_mark, __FILE__, __LINE__ );
     }
 
     if ( control->diffusion_coef )
     {
-        sfree( workspace->x_old, "Finalize_Workspace::workspace->x_old" );
+        sfree( workspace->x_old, __FILE__, __LINE__ );
     }
 
     if ( reset == FALSE )
     {
-        sfree( workspace->orig_id, "Finalize_Workspace::workspace->orig_id" );
+        sfree( workspace->orig_id, __FILE__, __LINE__ );
 
         /* space for keeping restriction info, if any */
         if ( control->restrict_bonds )
@@ -1656,28 +1636,28 @@ static void Finalize_Workspace( reax_system *system, control_params *control,
             for ( i = 0; i < system->N; ++i )
             {
                 sfree( workspace->restricted_list[i],
-                        "Finalize_Workspace::workspace->restricted_list[i]" );
+                        __FILE__, __LINE__ );
             }
 
-            sfree( workspace->restricted, "Finalize_Workspace::workspace->restricted" );
-            sfree( workspace->restricted_list, "Finalize_Workspace::workspace->restricted_list" );
+            sfree( workspace->restricted, __FILE__, __LINE__ );
+            sfree( workspace->restricted_list, __FILE__, __LINE__ );
         }
     }
 
 #if defined(TEST_FORCES)
-    sfree( workspace->dDelta, "Finalize_Workspace::workspace->dDelta" );
-    sfree( workspace->f_ele, "Finalize_Workspace::workspace->f_ele" );
-    sfree( workspace->f_vdw, "Finalize_Workspace::workspace->f_vdw" );
-    sfree( workspace->f_be, "Finalize_Workspace::workspace->f_be" );
-    sfree( workspace->f_lp, "Finalize_Workspace::workspace->f_lp" );
-    sfree( workspace->f_ov, "Finalize_Workspace::workspace->f_ov" );
-    sfree( workspace->f_un, "Finalize_Workspace::workspace->f_un" );
-    sfree( workspace->f_ang, "Finalize_Workspace::workspace->f_ang" );
-    sfree( workspace->f_coa, "Finalize_Workspace::workspace->f_coa" );
-    sfree( workspace->f_pen, "Finalize_Workspace::workspace->f_pen" );
-    sfree( workspace->f_hb, "Finalize_Workspace::workspace->f_hb" );
-    sfree( workspace->f_tor, "Finalize_Workspace::workspace->f_tor" );
-    sfree( workspace->f_con, "Finalize_Workspace::workspace->f_con" );
+    sfree( workspace->dDelta, __FILE__, __LINE__ );
+    sfree( workspace->f_ele, __FILE__, __LINE__ );
+    sfree( workspace->f_vdw, __FILE__, __LINE__ );
+    sfree( workspace->f_be, __FILE__, __LINE__ );
+    sfree( workspace->f_lp, __FILE__, __LINE__ );
+    sfree( workspace->f_ov, __FILE__, __LINE__ );
+    sfree( workspace->f_un, __FILE__, __LINE__ );
+    sfree( workspace->f_ang, __FILE__, __LINE__ );
+    sfree( workspace->f_coa, __FILE__, __LINE__ );
+    sfree( workspace->f_pen, __FILE__, __LINE__ );
+    sfree( workspace->f_hb, __FILE__, __LINE__ );
+    sfree( workspace->f_tor, __FILE__, __LINE__ );
+    sfree( workspace->f_con, __FILE__, __LINE__ );
 #endif
 }
 
@@ -1696,6 +1676,10 @@ static void Finalize_Lists( reax_list **lists )
     {
         Delete_List( TYP_BOND, lists[BONDS] );
     }
+    if ( lists[OLD_BONDS]->allocated == TRUE )
+    {
+        Delete_List( TYP_BOND, lists[OLD_BONDS] );
+    }
     if ( lists[THREE_BODIES]->allocated == TRUE )
     {
         Delete_List( TYP_THREE_BODY, lists[THREE_BODIES] );
@@ -1719,70 +1703,70 @@ void Finalize_Out_Controls( reax_system *system, control_params *control,
 {
     if ( out_control->write_steps > 0 )
     {
-        sfclose( out_control->trj, "Finalize_Out_Controls::out_control->trj" );
+        sfclose( out_control->trj, __FILE__, __LINE__ );
     }
 
     if ( out_control->log_update_freq > 0 )
     {
-        sfclose( out_control->out, "Finalize_Out_Controls::out_control->out" );
-        sfclose( out_control->pot, "Finalize_Out_Controls::out_control->pot" );
-        sfclose( out_control->log, "Finalize_Out_Controls::out_control->log" );
+        sfclose( out_control->out, __FILE__, __LINE__ );
+        sfclose( out_control->pot, __FILE__, __LINE__ );
+        sfclose( out_control->log, __FILE__, __LINE__ );
     }
 
     if ( control->ensemble == sNPT || control->ensemble == iNPT
             || control->ensemble == aNPT || control->compute_pressure == TRUE )
     {
-        sfclose( out_control->prs, "Finalize_Out_Controls::out_control->prs" );
+        sfclose( out_control->prs, __FILE__, __LINE__ );
     }
 
     if ( control->molec_anal )
     {
-        sfclose( out_control->mol, "Finalize_Out_Controls::out_control->mol" );
+        sfclose( out_control->mol, __FILE__, __LINE__ );
 
         if ( control->num_ignored )
         {
-            sfclose( out_control->ign, "Finalize_Out_Controls::out_control->ign" );
+            sfclose( out_control->ign, __FILE__, __LINE__ );
         }
     }
 
     if ( control->dipole_anal )
     {
-        sfclose( out_control->dpl, "Finalize_Out_Controls::out_control->dpl" );
+        sfclose( out_control->dpl, __FILE__, __LINE__ );
     }
 
     if ( control->diffusion_coef )
     {
-        sfclose( out_control->drft, "Finalize_Out_Controls::out_control->drft" );
+        sfclose( out_control->drft, __FILE__, __LINE__ );
     }
 
 
 #if defined(TEST_ENERGY)
-    sfclose( out_control->ebond, "Finalize_Out_Controls::out_control->ebond" );
-    sfclose( out_control->elp, "Finalize_Out_Controls::out_control->elp" );
-    sfclose( out_control->eov, "Finalize_Out_Controls::out_control->eov" );
-    sfclose( out_control->eun, "Finalize_Out_Controls::out_control->eun" );
-    sfclose( out_control->eval, "Finalize_Out_Controls::out_control->eval" );
-    sfclose( out_control->epen, "Finalize_Out_Controls::out_control->epen" );
-    sfclose( out_control->ecoa, "Finalize_Out_Controls::out_control->ecoa" );
-    sfclose( out_control->ehb, "Finalize_Out_Controls::out_control->ehb" );
-    sfclose( out_control->etor, "Finalize_Out_Controls::out_control->etor" );
-    sfclose( out_control->econ, "Finalize_Out_Controls::out_control->econ" );
-    sfclose( out_control->evdw, "Finalize_Out_Controls::out_control->evdw" );
-    sfclose( out_control->ecou, "Finalize_Out_Controls::out_control->ecou" );
+    sfclose( out_control->ebond, __FILE__, __LINE__ );
+    sfclose( out_control->elp, __FILE__, __LINE__ );
+    sfclose( out_control->eov, __FILE__, __LINE__ );
+    sfclose( out_control->eun, __FILE__, __LINE__ );
+    sfclose( out_control->eval, __FILE__, __LINE__ );
+    sfclose( out_control->epen, __FILE__, __LINE__ );
+    sfclose( out_control->ecoa, __FILE__, __LINE__ );
+    sfclose( out_control->ehb, __FILE__, __LINE__ );
+    sfclose( out_control->etor, __FILE__, __LINE__ );
+    sfclose( out_control->econ, __FILE__, __LINE__ );
+    sfclose( out_control->evdw, __FILE__, __LINE__ );
+    sfclose( out_control->ecou, __FILE__, __LINE__ );
 #endif
 
 #if defined(TEST_FORCES)
-    sfclose( out_control->fbo, "Finalize_Out_Controls::out_control->fbo" );
-    sfclose( out_control->fdbo, "Finalize_Out_Controls::out_control->fdbo" );
-    sfclose( out_control->fbond, "Finalize_Out_Controls::out_control->fbond" );
-    sfclose( out_control->flp, "Finalize_Out_Controls::out_control->flp" );
-    sfclose( out_control->fatom, "Finalize_Out_Controls::out_control->fatom" );
-    sfclose( out_control->f3body, "Finalize_Out_Controls::out_control->f3body" );
-    sfclose( out_control->fhb, "Finalize_Out_Controls::out_control->fhb" );
-    sfclose( out_control->f4body, "Finalize_Out_Controls::out_control->f4body" );
-    sfclose( out_control->fnonb, "Finalize_Out_Controls::out_control->fnonb" );
-    sfclose( out_control->ftot, "Finalize_Out_Controls::out_control->ftot" );
-    sfclose( out_control->ftot2, "Finalize_Out_Controls::out_control->ftot2" );
+    sfclose( out_control->fbo, __FILE__, __LINE__ );
+    sfclose( out_control->fdbo, __FILE__, __LINE__ );
+    sfclose( out_control->fbond, __FILE__, __LINE__ );
+    sfclose( out_control->flp, __FILE__, __LINE__ );
+    sfclose( out_control->fatom, __FILE__, __LINE__ );
+    sfclose( out_control->f3body, __FILE__, __LINE__ );
+    sfclose( out_control->fhb, __FILE__, __LINE__ );
+    sfclose( out_control->f4body, __FILE__, __LINE__ );
+    sfclose( out_control->fnonb, __FILE__, __LINE__ );
+    sfclose( out_control->ftot, __FILE__, __LINE__ );
+    sfclose( out_control->ftot2, __FILE__, __LINE__ );
 #endif
 }
 
diff --git a/sPuReMD/src/io_tools.c b/sPuReMD/src/io_tools.c
index 24fda64895773228647cffb54e4b562eeeb2420b..c0332ccd7e00c61a66a9b08c1b0170b828fe97e0 100644
--- a/sPuReMD/src/io_tools.c
+++ b/sPuReMD/src/io_tools.c
@@ -400,7 +400,7 @@ void Print_Near_Neighbors( reax_system *system, control_params *control,
     reax_list *near_nbrs = lists[NEAR_NBRS];
 
     snprintf( fname, MAX_STR, "%.*s.near_nbrs", MAX_STR - 11, control->sim_name );
-    fout = sfopen( fname, "w" );
+    fout = sfopen( fname, "w", __FILE__, __LINE__ );
 
     for ( i = 0; i < system->N; ++i )
     {
@@ -420,7 +420,7 @@ void Print_Near_Neighbors( reax_system *system, control_params *control,
         }
     }
 
-    sfclose( fout, "Print_Near_Neighbors::fout" );
+    sfclose( fout, __FILE__, __LINE__ );
 }
 
 
@@ -434,7 +434,7 @@ void Print_Near_Neighbors2( reax_system *system, control_params *control,
     reax_list *near_nbrs = lists[NEAR_NBRS];
 
     snprintf( fname, MAX_STR, "%.*s.near_nbrs_lgj", MAX_STR - 15, control->sim_name );
-    fout = sfopen( fname, "w" );
+    fout = sfopen( fname, "w", __FILE__, __LINE__ );
 
     for ( i = 0; i < system->N; ++i )
     {
@@ -455,7 +455,7 @@ void Print_Near_Neighbors2( reax_system *system, control_params *control,
         fprintf( fout, "\n");
     }
 
-    sfclose( fout, "Print_Near_Neighbors2::fout" );
+    sfclose( fout, __FILE__, __LINE__ );
 }
 
 
@@ -473,7 +473,7 @@ void Print_Far_Neighbors( reax_system const * const system,
     far_nbrs = lists[FAR_NBRS];
 
     snprintf( fname, MAX_STR, "%.*s.%010d.far_nbrs", MAX_STR - 21, control->sim_name, data->step );
-    fout = sfopen( fname, "w" );
+    fout = sfopen( fname, "w", __FILE__, __LINE__ );
 
     for ( i = 0; i < system->N; ++i )
     {
@@ -506,7 +506,7 @@ void Print_Far_Neighbors( reax_system const * const system,
         }
     }
 
-    sfclose( fout, "Print_Far_Neighbors::fout" );
+    sfclose( fout, __FILE__, __LINE__ );
 }
 
 
@@ -525,7 +525,7 @@ void Print_Far_Neighbors2( reax_system *system, control_params *control,
     reax_list *far_nbrs = lists[FAR_NBRS];
 
     snprintf( fname, MAX_STR, "%.*s.far_nbrs_lgj", MAX_STR - 14, control->sim_name );
-    fout = sfopen( fname, "w" );
+    fout = sfopen( fname, "w", __FILE__, __LINE__ );
     int num = 0;
     int temp[500];
 
@@ -546,7 +546,7 @@ void Print_Far_Neighbors2( reax_system *system, control_params *control,
         fprintf( fout, "\n");
     }
 
-    sfclose( fout, "Print_Far_Neighbors2::fout" );
+    sfclose( fout, __FILE__, __LINE__ );
 }
 
 
@@ -559,7 +559,7 @@ void Print_Total_Force( reax_system *system, control_params *control,
     FILE *fout;
 
     snprintf( fname, MAX_STR, "%.*s.%d.forces", MAX_STR - 10, control->sim_name, data->step );
-    fout = sfopen( fname, "w" );
+    fout = sfopen( fname, "w", __FILE__, __LINE__ );
 
     for ( i = 0; i < system->N; ++i )
     {
@@ -577,7 +577,7 @@ void Print_Total_Force( reax_system *system, control_params *control,
 //    fflush( out_control->ftot );
     fflush( fout );
 
-    sfclose( fout, "Print_Total_Force::fout" );
+    sfclose( fout, __FILE__, __LINE__ );
 }
 
 
@@ -711,7 +711,7 @@ void Print_Linear_System( reax_system *system, control_params *control,
     FILE *out;
 
     snprintf( fname, 100, "%.*s.state%10d.out", 79, control->sim_name, step );
-    out = sfopen( fname, "w" );
+    out = sfopen( fname, "w", __FILE__, __LINE__ );
 
     for ( i = 0; i < system->N_cm; i++ )
         fprintf( out, "%6d%2d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n",
@@ -720,16 +720,16 @@ void Print_Linear_System( reax_system *system, control_params *control,
                  system->atoms[i].x[2],
                  workspace->s[0][i], workspace->b_s[i],
                  workspace->t[0][i], workspace->b_t[i]  );
-    sfclose( out, "Print_Linear_System::out" );
+    sfclose( out, __FILE__, __LINE__ );
 
     // snprintf( fname, 100, "x2_%d", step );
-    // out = sfopen( fname, "w" );
+    // out = sfopen( fname, "w", __FILE__, __LINE__ );
     // for( i = 0; i < system->N; i++ )
     // fprintf( out, "%g\n", workspace->s_t[i+system->N] );
-    // sfclose( out, "Print_Linear_System::out" );
+    // sfclose( out, __FILE__, __LINE__ );
 
     snprintf( fname, 100, "%.*s.H%10d.out", 83, control->sim_name, step );
-    out = sfopen( fname, "w" );
+    out = sfopen( fname, "w", __FILE__, __LINE__ );
     H = &workspace->H;
 
     for ( i = 0; i < system->N_cm; ++i )
@@ -749,10 +749,10 @@ void Print_Linear_System( reax_system *system, control_params *control,
                  workspace->orig_id[i], workspace->orig_id[i], H->val[j] );
     }
 
-    sfclose( out, "Print_Linear_System::out" );
+    sfclose( out, __FILE__, __LINE__ );
 
     snprintf( fname, 100, "%.*s.H_sp%10d.out", 80, control->sim_name, step );
-    out = sfopen( fname, "w" );
+    out = sfopen( fname, "w", __FILE__, __LINE__ );
     H = &workspace->H_sp;
 
     for ( i = 0; i < system->N_cm; ++i )
@@ -772,19 +772,19 @@ void Print_Linear_System( reax_system *system, control_params *control,
                  workspace->orig_id[i], workspace->orig_id[i], H->val[j] );
     }
 
-    sfclose( out, "Print_Linear_System::out" );
+    sfclose( out, __FILE__, __LINE__ );
 
     /*snprintf( fname, 100, "%.*s.b_s%10d", 84, control->sim_name, step );
-      out = sfopen( fname, "w" );
+      out = sfopen( fname, "w", __FILE__, __LINE__ );
       for( i = 0; i < system->N; i++ )
       fprintf( out, "%12.7f\n", workspace->b_s[i] );
-      sfclose( out, "Print_Linear_System::out" );
+      sfclose( out, __FILE__, __LINE__ );
 
       snprintf( fname, 100, "%.*s.b_t%10d", 84, control->sim_name, step );
-      out = sfopen( fname, "w" );
+      out = sfopen( fname, "w", __FILE__, __LINE__ );
       for( i = 0; i < system->N; i++ )
       fprintf( out, "%12.7f\n", workspace->b_t[i] );
-      sfclose( out, "Print_Linear_System::out" );*/
+      sfclose( out, __FILE__, __LINE__ );*/
 }
 
 
@@ -796,7 +796,7 @@ void Print_Charges( reax_system *system, control_params *control,
     FILE *fout;
 
     snprintf( fname, 100, "%.*s.q%010d", 87, control->sim_name, step );
-    fout = sfopen( fname, "w" );
+    fout = sfopen( fname, "w", __FILE__, __LINE__ );
 
     for ( i = 0; i < system->N; ++i )
     {
@@ -805,7 +805,7 @@ void Print_Charges( reax_system *system, control_params *control,
                  workspace->s[0][i], workspace->t[0][i], system->atoms[i].q );
     }
 
-    sfclose( fout, "Print_Charges::fout" );
+    sfclose( fout, __FILE__, __LINE__ );
 }
 
 
@@ -849,11 +849,11 @@ void Print_Sparse_Matrix2( sparse_matrix *A, char *fname, char *mode )
    
     if ( mode == NULL )
     {
-        f = sfopen( fname, "w" );
+        f = sfopen( fname, "w", __FILE__, __LINE__ );
     }
     else
     {
-        f = sfopen( fname, mode );
+        f = sfopen( fname, mode, __FILE__, __LINE__ );
     }
 
     for ( i = 0; i < A->n; ++i )
@@ -871,7 +871,7 @@ void Print_Sparse_Matrix2( sparse_matrix *A, char *fname, char *mode )
         fprintf( f, "%6d %6d %24.15f\n", i + 1, A->j[A->start[i + 1] - 1] + 1, A->val[A->start[i + 1] - 1] );
     }
 
-    sfclose( f, "Print_Sparse_Matrix2::f" );
+    sfclose( f, __FILE__, __LINE__ );
 }
 
 
@@ -886,7 +886,7 @@ void Read_Sparse_Matrix2( sparse_matrix *A, char *fname )
     real val;
     FILE *f;
    
-    f = sfopen( fname, "r" );
+    f = sfopen( fname, "r", __FILE__, __LINE__ );
     top = 0;
     cur_row = 0;
 
@@ -908,7 +908,7 @@ void Read_Sparse_Matrix2( sparse_matrix *A, char *fname )
 
     A->start[A->n] = top;
 
-    sfclose( f, "Read_Sparse_Matrix2::f" );
+    sfclose( f, __FILE__, __LINE__ );
 }
 
 
@@ -923,14 +923,14 @@ void Read_Permutation_Matrix( unsigned int *v, char *fname )
     double val;
     FILE *f;
    
-    f = sfopen( fname, "r" );
+    f = sfopen( fname, "r", __FILE__, __LINE__ );
 
     while ( fscanf( f, "%6u %6u %24lf", &row, &col, &val ) == 3 )
     {
         v[row - 1] = col - 1;
     }
 
-    sfclose( f, "Read_Permuation_Matrix::f" );
+    sfclose( f, __FILE__, __LINE__ );
 }
 
 
@@ -941,7 +941,7 @@ void Print_Sparse_Matrix_Binary( sparse_matrix *A, char *fname )
     int i, j, temp;
     FILE *f;
    
-    f = sfopen( fname, "wb" );
+    f = sfopen( fname, "wb", __FILE__, __LINE__ );
 
     /* header: # rows, # nonzeros */
     fwrite( &A->n, sizeof(unsigned int), 1, f );
@@ -967,7 +967,7 @@ void Print_Sparse_Matrix_Binary( sparse_matrix *A, char *fname )
         }
     }
 
-    sfclose( f, "Print_Sparse_Matrix_Binary::f" );
+    sfclose( f, __FILE__, __LINE__ );
 }
 
 
@@ -976,7 +976,7 @@ void Print_Bonds( reax_system *system, reax_list *bonds, char *fname )
     int i, pj;
     bond_data *pbond;
     bond_order_data *bo_ij;
-    FILE *f = sfopen( fname, "w" );
+    FILE *f = sfopen( fname, "w", __FILE__, __LINE__ );
 
     for ( i = 0; i < system->N; ++i )
     {
@@ -992,14 +992,14 @@ void Print_Bonds( reax_system *system, reax_list *bonds, char *fname )
         }
     }
 
-    sfclose( f, "Print_Bonds::f" );
+    sfclose( f, __FILE__, __LINE__ );
 }
 
 
 void Print_Bond_List2( reax_system *system, reax_list *bonds, char *fname )
 {
     int i, j, id_i, id_j, nbr, pj;
-    FILE *f = sfopen( fname, "w" );
+    FILE *f = sfopen( fname, "w", __FILE__, __LINE__ );
     int temp[500];
     int num = 0;
 
diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c
index 5f630a41a296d0f53991d50f6f95ea3feaa978a1..c40baca37319a6ff6809b7eacf658f3c568db20c 100644
--- a/sPuReMD/src/lin_alg.c
+++ b/sPuReMD/src/lin_alg.c
@@ -113,13 +113,15 @@ static int compare_matrix_entry(const void *v1, const void *v2)
 void Sort_Matrix_Rows( sparse_matrix * const A )
 {
     unsigned int i, j, si, ei;
+    size_t temp_size;
     sparse_matrix_entry *temp;
 
 #if defined(_OPENMP)
 //    #pragma omp parallel default(none) private(i, j, si, ei, temp) shared(stderr)
 #endif
     {
-        temp = smalloc( sizeof(sparse_matrix_entry) * (A->n + 1), "Sort_Matrix_Rows::temp" );
+        temp = NULL;
+        temp_size = 0;
 
         /* sort each row of A using column indices */
 #if defined(_OPENMP)
@@ -130,6 +132,16 @@ void Sort_Matrix_Rows( sparse_matrix * const A )
             si = A->start[i];
             ei = A->start[i + 1];
 
+            if ( temp_size < ei - si )
+            {
+                if ( temp != NULL )
+                {
+                    sfree( temp, __FILE__, __LINE__ );
+                }
+                temp = smalloc( sizeof(sparse_matrix_entry) * (ei - si), __FILE__, __LINE__ );
+                temp_size = ei - si;
+            }
+
             for ( j = 0; j < (ei - si); ++j )
             {
                 temp[j].j = A->j[si + j];
@@ -146,7 +158,7 @@ void Sort_Matrix_Rows( sparse_matrix * const A )
             }
         }
 
-        sfree( temp, "Sort_Matrix_Rows::temp" );
+        sfree( temp, __FILE__, __LINE__ );
     }
 }
 
@@ -171,9 +183,13 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A,
     {
         Allocate_Matrix( A_full, A->n, A->n_max, 2 * A->m - A->n );
     }
-    else if ( A_full->m < 2 * A->m - A->n || realloc == TRUE )
+    else if ( A_full->m < 2 * A->m - A->n || A_full->n_max < A->n_max
+            || realloc == TRUE )
     {
-        Deallocate_Matrix( A_full );
+        if ( A_full->allocated == TRUE )
+        {
+            Deallocate_Matrix( A_full );
+        }
         Allocate_Matrix( A_full, A->n, A->n_max, 2 * A->m - A->n );
     }
 
@@ -236,14 +252,17 @@ void setup_sparse_approx_inverse( const sparse_matrix * const A,
     {
         Allocate_Matrix( A_spar_patt, A->n, A->n_max, A->m );
     }
-    else if ( A_spar_patt->m < A->m || realloc == TRUE )
+    else if ( A_spar_patt->m < A->m || A_spar_patt->n_max < A->n_max
+            || realloc == TRUE )
     {
-        Deallocate_Matrix( A_spar_patt );
+        if ( A_spar_patt->allocated == TRUE )
+        {
+            Deallocate_Matrix( A_spar_patt );
+        }
         Allocate_Matrix( A_spar_patt, A->n, A->n_max, A->m );
     }
 
-    list = smalloc( sizeof(real) * A->start[A->n],
-            "setup_sparse_approx_inverse::list" );
+    list = smalloc( sizeof(real) * A->start[A->n], __FILE__, __LINE__ );
 
     /* quick-select algorithm for finding the k-th greatest element in the matrix, where
      *  list: values from the matrix
@@ -356,9 +375,13 @@ void setup_sparse_approx_inverse( const sparse_matrix * const A,
         Allocate_Matrix( A_app_inv, A_spar_patt_full->n,
                 A_spar_patt_full->n_max, A_spar_patt_full->m );
     }
-    else if ( A_app_inv->m < A->m || realloc == TRUE )
+    else if ( A_app_inv->m < A->m || A_app_inv->n_max < A->n_max
+            || realloc == TRUE )
     {
-        Deallocate_Matrix( A_app_inv );
+        if ( A_app_inv->allocated == TRUE )
+        {
+            Deallocate_Matrix( A_app_inv );
+        }
 
         /* A_app_inv has the same sparsity pattern
          * as A_spar_patt_full (omit non-zero values) */
@@ -366,7 +389,7 @@ void setup_sparse_approx_inverse( const sparse_matrix * const A,
                 A_spar_patt_full->n_max, A_spar_patt_full->m );
     }
 
-    sfree( list, "setup_sparse_approx_inverse::list" );
+    sfree( list, __FILE__, __LINE__ );
 }
 
 
@@ -399,7 +422,7 @@ void Calculate_Droptol( const sparse_matrix * const A,
             if ( droptol_local == NULL )
             {
                 droptol_local = smalloc( omp_get_num_threads() * A->n * sizeof(real),
-                        "Calculate_Droptol::droptol_local" );
+                        __FILE__, __LINE__ );
             }
         }
 
@@ -476,7 +499,7 @@ void Calculate_Droptol( const sparse_matrix * const A,
 #if defined(_OPENMP)
         #pragma omp master
         {
-            sfree( droptol_local, "Calculate_Droptol::droptol_local" );
+            sfree( droptol_local, __FILE__, __LINE__ );
         }
 #endif
     }
@@ -512,7 +535,11 @@ int Estimate_LU_Fill( const sparse_matrix * const A, const real * const droptol
 }
 
 
-/* Jacobi preconditioner computation */
+/* Compute diagonal inverese (Jacobi) preconditioner
+ *
+ * H: matrix used to compute preconditioner, in CSR format
+ * Hdia_inv: computed diagonal inverse preconditioner
+ */
 real jacobi( const sparse_matrix * const H, real * const Hdia_inv )
 {
     unsigned int i;
@@ -552,9 +579,9 @@ real ICHOLT( const sparse_matrix * const A, const real * const droptol,
 
     start = Get_Time( );
 
-    Utop = smalloc( (A->n + 1) * sizeof(unsigned int), "ICHOLT::Utop" );
-    tmp_j = smalloc( A->n * sizeof(int), "ICHOLT::Utop" );
-    tmp_val = smalloc( A->n * sizeof(real), "ICHOLT::Utop" );
+    Utop = smalloc( (A->n + 1) * sizeof(unsigned int), __FILE__, __LINE__ );
+    tmp_j = smalloc( A->n * sizeof(int), __FILE__, __LINE__ );
+    tmp_val = smalloc( A->n * sizeof(real), __FILE__, __LINE__ );
 
     Ltop = 0;
     tmptop = 0;
@@ -683,9 +710,9 @@ real ICHOLT( const sparse_matrix * const A, const real * const droptol,
 
     //    fprintf( stderr, "nnz(U): %d, max: %d\n", Utop[U->n], U->n * 50 );
 
-    sfree( tmp_val, "ICHOLT::tmp_val" );
-    sfree( tmp_j, "ICHOLT::tmp_j" );
-    sfree( Utop, "ICHOLT::Utop" );
+    sfree( tmp_val, __FILE__, __LINE__ );
+    sfree( tmp_j, __FILE__, __LINE__ );
+    sfree( Utop, __FILE__, __LINE__ );
 
     return Get_Timing_Info( start );
 }
@@ -824,8 +851,8 @@ real ILUT( const sparse_matrix * const A, const real * const droptol,
     start = Get_Time( );
 
     /* use a dense vector with masking for the intermediate row w */
-    w = smalloc( sizeof(real) * A->n, "ILUT::w" );
-    nz_mask = smalloc( sizeof(unsigned int) * A->n, "ILUT::nz_mask" );
+    w = smalloc( sizeof(real) * A->n, __FILE__, __LINE__ );
+    nz_mask = smalloc( sizeof(unsigned int) * A->n, __FILE__, __LINE__ );
 
     compute_full_sparse_matrix( A, A_full, FALSE );
 
@@ -900,8 +927,8 @@ real ILUT( const sparse_matrix * const A, const real * const droptol,
         if ( Ltop + nz_cnt > L->m )
         {
             L->m = MAX( (5 * nz_cnt) + L->m, (unsigned int) (L->m * SAFE_ZONE) );
-            L->j = srealloc( L->j, sizeof(unsigned int) * L->m, "ILUT::L->j" );
-            L->val = srealloc( L->val, sizeof(real) * L->m, "ILUT::L->val" );
+            L->j = srealloc( L->j, sizeof(unsigned int) * L->m, __FILE__, __LINE__ );
+            L->val = srealloc( L->val, sizeof(real) * L->m, __FILE__, __LINE__ );
         }
 
         /* copy w[0:i-1] to row i of L */
@@ -928,8 +955,8 @@ real ILUT( const sparse_matrix * const A, const real * const droptol,
         if ( Utop + nz_cnt > U->m )
         {
             U->m = MAX( (5 * nz_cnt) + U->m, (unsigned int) (U->m * SAFE_ZONE) );
-            U->j = srealloc( U->j, sizeof(unsigned int) * U->m, "ILUT::L->j" );
-            U->val = srealloc( U->val, sizeof(real) * U->m, "ILUT::L->val" );
+            U->j = srealloc( U->j, sizeof(unsigned int) * U->m, __FILE__, __LINE__ );
+            U->val = srealloc( U->val, sizeof(real) * U->m, __FILE__, __LINE__ );
         }
 
         /* diagonal for U */
@@ -954,8 +981,8 @@ real ILUT( const sparse_matrix * const A, const real * const droptol,
     U->start[U->n] = Utop;
 
     Deallocate_Matrix( A_full );
-    sfree( nz_mask, "ILUT::nz_mask" );
-    sfree( w, "ILUT::w" );
+    sfree( nz_mask, __FILE__, __LINE__ );
+    sfree( w, __FILE__, __LINE__ );
 
     return Get_Timing_Info( start );
 }
@@ -980,10 +1007,10 @@ real ILUTP( const sparse_matrix * const A, const real * const droptol,
     start = Get_Time( );
 
     /* use a dense vector with masking for the intermediate row w */
-    w = smalloc( sizeof(real) * A->n, "ILUTP::w" );
-    nz_mask = smalloc( sizeof(int) * A->n, "ILUTP::nz_mask" );
-    perm = smalloc( sizeof(int) * A->n, "ILUTP::perm" );
-    perm_inv = smalloc( sizeof(int) * A->n, "ILUTP::perm_inv" );
+    w = smalloc( sizeof(real) * A->n, __FILE__, __LINE__ );
+    nz_mask = smalloc( sizeof(int) * A->n, __FILE__, __LINE__ );
+    perm = smalloc( sizeof(int) * A->n, __FILE__, __LINE__ );
+    perm_inv = smalloc( sizeof(int) * A->n, __FILE__, __LINE__ );
 
     compute_full_sparse_matrix( A, A_full, FALSE );
 
@@ -1112,10 +1139,10 @@ real ILUTP( const sparse_matrix * const A, const real * const droptol,
     U->start[U->n] = Utop;
 
     Deallocate_Matrix( A_full );
-    sfree( perm_inv, "ILUTP::perm_inv" );
-    sfree( perm, "ILUTP::perm" );
-    sfree( nz_mask, "ILUTP::nz_mask" );
-    sfree( w, "ILUTP::w" );
+    sfree( perm_inv, __FILE__, __LINE__ );
+    sfree( perm, __FILE__, __LINE__ );
+    sfree( nz_mask, __FILE__, __LINE__ );
+    sfree( w, __FILE__, __LINE__ );
 
     return Get_Timing_Info( start );
 }
@@ -1144,9 +1171,9 @@ real FG_ICHOLT( const sparse_matrix * const A, const real * droptol,
     Allocate_Matrix( &DAD, A->n, A->n_max, A->m );
     Allocate_Matrix( &U_T_temp, A->n, A->n_max, A->m );
 
-    D = smalloc( sizeof(real) * A->n, "FG_ICHOLT::D" );
-    D_inv = smalloc( sizeof(real) * A->n, "FG_ICHOLT::D_inv" );
-    gamma = smalloc( sizeof(real) * A->n, "FG_ICHOLT::gamma" );
+    D = smalloc( sizeof(real) * A->n, __FILE__, __LINE__ );
+    D_inv = smalloc( sizeof(real) * A->n, __FILE__, __LINE__ );
+    gamma = smalloc( sizeof(real) * A->n, __FILE__, __LINE__ );
 
 #if defined(_OPENMP)
     #pragma omp parallel for schedule(dynamic,512) \
@@ -1321,9 +1348,9 @@ real FG_ICHOLT( const sparse_matrix * const A, const real * droptol,
 
     Deallocate_Matrix( &U_T_temp );
     Deallocate_Matrix( &DAD );
-    sfree( gamma, "FG_ICHOLT::gamma" );
-    sfree( D_inv, "FG_ICHOLT::D_inv" );
-    sfree( D, "FG_ICHOLT::D" );
+    sfree( gamma, __FILE__, __LINE__ );
+    sfree( D_inv, __FILE__, __LINE__ );
+    sfree( D, __FILE__, __LINE__ );
 
     return Get_Timing_Info( start );
 }
@@ -1353,9 +1380,9 @@ real FG_ILUT( const sparse_matrix * const A, const real * droptol,
     Allocate_Matrix( &L_temp, A->n, A->n_max, A->m );
     Allocate_Matrix( &U_T_temp, A->n, A->n_max, A->m );
 
-    D = smalloc( sizeof(real) * A->n, "FG_ILUT::D" );
-    D_inv = smalloc( sizeof(real) * A->n, "FG_ILUT::D_inv" );
-    gamma = smalloc( sizeof(real) * A->n, "FG_ILUT::gamma" );
+    D = smalloc( sizeof(real) * A->n, __FILE__, __LINE__ );
+    D_inv = smalloc( sizeof(real) * A->n, __FILE__, __LINE__ );
+    gamma = smalloc( sizeof(real) * A->n, __FILE__, __LINE__ );
 
 #if defined(_OPENMP)
     #pragma omp parallel for schedule(dynamic,512) \
@@ -1617,9 +1644,9 @@ real FG_ILUT( const sparse_matrix * const A, const real * droptol,
     Deallocate_Matrix( &U_T_temp );
     Deallocate_Matrix( &L_temp );
     Deallocate_Matrix( &DAD );
-    sfree( gamma, "FG_ILUT::gamma" );
-    sfree( D_inv, "FG_ILUT::D_inv" );
-    sfree( D, "FG_ILUT::D_inv" );
+    sfree( gamma, __FILE__, __LINE__ );
+    sfree( D_inv, __FILE__, __LINE__ );
+    sfree( D, __FILE__, __LINE__ );
 
     return Get_Timing_Info( start );
 }
@@ -1677,10 +1704,10 @@ real sparse_approx_inverse( const sparse_matrix * const A,
     shared(stderr)
 #endif
     {
-        X = smalloc( sizeof(char) * A->n, "sparse_approx_inverse::X" );
-        Y = smalloc( sizeof(char) * A->n, "sparse_approx_inverse::Y" );
-        pos_x = smalloc( sizeof(int) * A->n, "sparse_approx_inverse::pos_x" );
-        pos_y = smalloc( sizeof(int) * A->n, "sparse_approx_inverse::pos_y" );
+        X = smalloc( sizeof(char) * A->n, __FILE__, __LINE__ );
+        Y = smalloc( sizeof(char) * A->n, __FILE__, __LINE__ );
+        pos_x = smalloc( sizeof(int) * A->n, __FILE__, __LINE__ );
+        pos_y = smalloc( sizeof(int) * A->n, __FILE__, __LINE__ );
 
         e_j = NULL;
         dense_matrix = NULL;
@@ -1750,15 +1777,13 @@ real sparse_approx_inverse( const sparse_matrix * const A,
             /* N x M dense matrix */
             if ( dense_matrix == NULL )
             {
-                dense_matrix = smalloc( sizeof(real) * N * M,
-                        "sparse_approx_inverse::dense_matrix" );
+                dense_matrix = smalloc( sizeof(real) * N * M, __FILE__, __LINE__ );
                 dense_matrix_size = sizeof(real) * N * M;
             }
             else if ( dense_matrix_size < sizeof(real) * N * M )
             {
-                sfree( dense_matrix, "sparse_approx_inverse::dense_matrix" );
-                dense_matrix = smalloc( sizeof(real) * N * M,
-                        "sparse_approx_inverse::dense_matrix" );
+                sfree( dense_matrix, __FILE__, __LINE__ );
+                dense_matrix = smalloc( sizeof(real) * N * M, __FILE__, __LINE__ );
                 dense_matrix_size = sizeof(real) * N * M;
             }
 
@@ -1786,13 +1811,13 @@ real sparse_approx_inverse( const sparse_matrix * const A,
              * that is the full column of the identity matrix */
             if ( e_j == NULL )
             {
-                e_j = smalloc( sizeof(real) * M, "sparse_approx_inverse::e_j" );
+                e_j = smalloc( sizeof(real) * M, __FILE__, __LINE__ );
                 e_j_size = sizeof(real) * M;
             }
             else if ( e_j_size < sizeof(real) * M )
             {
-                sfree( e_j, "sparse_approx_inverse::e_j"  );
-                e_j = smalloc( sizeof(real) * M, "sparse_approx_inverse::e_j" );
+                sfree( e_j, __FILE__, __LINE__  );
+                e_j = smalloc( sizeof(real) * M, __FILE__, __LINE__ );
                 e_j_size = sizeof(real) * M;
             }
 
@@ -1833,12 +1858,12 @@ real sparse_approx_inverse( const sparse_matrix * const A,
             }
         }
 
-        sfree( dense_matrix, "sparse_approx_inverse::dense_matrix" );
-        sfree( e_j, "sparse_approx_inverse::e_j"  );
-        sfree( pos_y, "sparse_approx_inverse::pos_y" );
-        sfree( pos_x, "sparse_approx_inverse::pos_x" );
-        sfree( Y, "sparse_approx_inverse::Y" );
-        sfree( X, "sparse_approx_inverse::X" );
+        sfree( dense_matrix, __FILE__, __LINE__ );
+        sfree( e_j, __FILE__, __LINE__  );
+        sfree( pos_y, __FILE__, __LINE__ );
+        sfree( pos_x, __FILE__, __LINE__ );
+        sfree( Y, __FILE__, __LINE__ );
+        sfree( X, __FILE__, __LINE__ );
     }
 
     return Get_Timing_Info( start );
@@ -1945,7 +1970,7 @@ void Transpose( const sparse_matrix * const A, sparse_matrix * const A_t )
 {
     unsigned int i, j, pj, *A_t_top;
 
-    A_t_top = scalloc( A->n + 1, sizeof(unsigned int), "Transpose::A_t_top" );
+    A_t_top = scalloc( A->n + 1, sizeof(unsigned int), __FILE__, __LINE__ );
 
     for ( i = 0; i < A->n + 1; ++i )
     {
@@ -1980,7 +2005,7 @@ void Transpose( const sparse_matrix * const A, sparse_matrix * const A_t )
         }
     }
 
-    sfree( A_t_top, "Transpose::A_t_top" );
+    sfree( A_t_top, __FILE__, __LINE__ );
 }
 
 
@@ -2317,8 +2342,8 @@ void graph_coloring( const control_params * const control,
             }
         }
 
-        fb_color = smalloc( sizeof(int) * A->n, "graph_coloring::fb_color" );
-        conflict_local = smalloc( sizeof(unsigned int) * A->n, "graph_coloring::fb_color" );
+        fb_color = smalloc( sizeof(int) * A->n, __FILE__, __LINE__ );
+        conflict_local = smalloc( sizeof(unsigned int) * A->n, __FILE__, __LINE__ );
 
         while ( workspace->recolor_cnt > 0 )
         {
@@ -2415,8 +2440,8 @@ void graph_coloring( const control_params * const control,
             p_conflict = p_temp;
         }
 
-        sfree( conflict_local, "graph_coloring::conflict_local" );
-        sfree( fb_color, "graph_coloring::fb_color" );
+        sfree( conflict_local, __FILE__, __LINE__ );
+        sfree( fb_color, __FILE__, __LINE__ );
     }
 }
 
@@ -2637,9 +2662,12 @@ void setup_graph_coloring( const control_params * const control,
     {
         Allocate_Matrix( H_p, H->n, H->n_max, H->m );
     }
-    else if ( H_p->m < H->m || realloc == TRUE )
+    else if ( H_p->m < H->m || H_p->n_max < H->n_max || realloc == TRUE )
     {
-        Deallocate_Matrix( H_p );
+        if ( H_p->allocated == TRUE )
+        {
+            Deallocate_Matrix( H_p );
+        }
         Allocate_Matrix( H_p, H->n, H->n_max, H->m );
     }
 
@@ -2743,7 +2771,7 @@ void jacobi_iter( const static_storage * const workspace,
 }
 
 
-/* Apply left-sided preconditioning while solver M^{-1}Ax = M^{-1}b
+/* Apply left-sided preconditioning while solving M^{-1}Ax = M^{-1}b
  *
  * workspace: data struct containing matrices, stored in CSR
  * control: data struct containing parameters
@@ -2759,7 +2787,7 @@ void jacobi_iter( const static_storage * const workspace,
  *   Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */
 static void apply_preconditioner( const static_storage * const workspace,
         const control_params * const control, const real * const y, real * const x,
-        const int fresh_pre, const int side )
+        int fresh_pre, int side )
 {
     int i, si;
 
@@ -3533,9 +3561,6 @@ int CG( const static_storage * const workspace, const control_params * const con
 
         t_start = Get_Time( );
         Vector_Sum( r, 1.0,  b, -1.0, d, N );
-#if defined(QMMM)
-        Vector_Mask_qmmm( r, workspace->mask_qmmm, N );
-#endif
         rnorm = Norm( r, N );
         t_vops += Get_Timing_Info( t_start );
 
@@ -3546,9 +3571,6 @@ int CG( const static_storage * const workspace, const control_params * const con
 
         t_start = Get_Time( );
         Vector_Copy( p, z, N );
-#if defined(QMMM)
-        Vector_Mask_qmmm( p, workspace->mask_qmmm, N );
-#endif
         sig_new = Dot( r, p, N );
         t_vops += Get_Timing_Info( t_start );
 
@@ -3563,9 +3585,6 @@ int CG( const static_storage * const workspace, const control_params * const con
             alpha = sig_new / tmp;
             Vector_Add( x, alpha, p, N );
             Vector_Add( r, -1.0 * alpha, d, N );
-#if defined(QMMM)
-            Vector_Mask_qmmm( r, workspace->mask_qmmm, N );
-#endif
             rnorm = Norm( r, N );
             t_vops += Get_Timing_Info( t_start );
 
@@ -3579,9 +3598,6 @@ int CG( const static_storage * const workspace, const control_params * const con
             sig_new = Dot( r, z, N );
             beta = sig_new / sig_old;
             Vector_Sum( p, 1.0, z, beta, p, N );
-#if defined(QMMM)
-            Vector_Mask_qmmm( p, workspace->mask_qmmm, N );
-#endif
             t_vops += Get_Timing_Info( t_start );
         }
 
@@ -3925,7 +3941,7 @@ real condest( const sparse_matrix * const L, const sparse_matrix * const U )
 
     N = L->n;
 
-    e = smalloc( sizeof(real) * N, "condest::e" );
+    e = smalloc( sizeof(real) * N, __FILE__, __LINE__ );
 
     for ( i = 0; i < N; ++i )
         e[i] = 1.0;
@@ -3944,7 +3960,7 @@ real condest( const sparse_matrix * const L, const sparse_matrix * const U )
 
     }
 
-    sfree( e, "condest::e" );
+    sfree( e, __FILE__, __LINE__ );
 
     return c;
 }
diff --git a/sPuReMD/src/list.c b/sPuReMD/src/list.c
index 21dd6a69e64ad0e742f939e8d31440c684b1ccd0..fd9087e4f23de79b6a1b1a06ec58d0941fdf2c8b 100644
--- a/sPuReMD/src/list.c
+++ b/sPuReMD/src/list.c
@@ -44,8 +44,8 @@ void Make_List( int n, int n_max, int total_intrs, int type, reax_list* l )
     l->n_max = n_max;
     l->total_intrs = total_intrs;
 
-    l->index = smalloc( n_max * sizeof(int), "Make_List::l->index" );
-    l->end_index = smalloc( n_max * sizeof(int), "Make_List::l->end_index" );
+    l->index = smalloc( n_max * sizeof(int), __FILE__, __LINE__ );
+    l->end_index = smalloc( n_max * sizeof(int), __FILE__, __LINE__ );
 
     switch ( type )
     {
@@ -53,7 +53,7 @@ void Make_List( int n, int n_max, int total_intrs, int type, reax_list* l )
         if ( l->total_intrs > 0 )
         {
             l->three_body_list = smalloc( l->total_intrs * sizeof(three_body_interaction_data),
-                    "Make_List::l->three_body_list" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -65,7 +65,7 @@ void Make_List( int n, int n_max, int total_intrs, int type, reax_list* l )
         if ( l->total_intrs > 0 )
         {
             l->bond_list = smalloc( l->total_intrs * sizeof(bond_data),
-                    "Make_List::l->bond_list" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -77,7 +77,7 @@ void Make_List( int n, int n_max, int total_intrs, int type, reax_list* l )
         if ( l->total_intrs > 0 )
         {
             l->dbo_list = smalloc( l->total_intrs * sizeof(dbond_data),
-                    "Make_List::l->dbo_list" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -89,7 +89,7 @@ void Make_List( int n, int n_max, int total_intrs, int type, reax_list* l )
         if ( l->total_intrs > 0 )
         {
             l->dDelta_list = smalloc( l->total_intrs * sizeof(dDelta_data),
-                    "Make_List::l->dDelta_list" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -101,7 +101,7 @@ void Make_List( int n, int n_max, int total_intrs, int type, reax_list* l )
         if ( l->total_intrs > 0 )
         {
             l->far_nbr_list = smalloc( l->total_intrs * sizeof(far_neighbor_data),
-                    "Make_List::l->far_nbr_list" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -113,7 +113,7 @@ void Make_List( int n, int n_max, int total_intrs, int type, reax_list* l )
         if ( l->total_intrs > 0 )
         {
             l->near_nbr_list = smalloc( l->total_intrs * sizeof(near_neighbor_data),
-                    "Make_List::l->near_nbr_list" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -125,7 +125,7 @@ void Make_List( int n, int n_max, int total_intrs, int type, reax_list* l )
         if ( l->total_intrs > 0 )
         {
             l->hbond_list = smalloc( l->total_intrs * sizeof(hbond_data),
-                    "Make_List::l->hbond_list" );
+                    __FILE__, __LINE__ );
         }
         else
         {
@@ -157,57 +157,57 @@ void Delete_List( int type, reax_list* l )
     l->n_max = 0;
     l->total_intrs = 0;
 
-    sfree( l->index, "Delete_List::l->index" );
-    sfree( l->end_index, "Delete_List::l->end_index" );
+    sfree( l->index, __FILE__, __LINE__ );
+    sfree( l->end_index, __FILE__, __LINE__ );
 
     switch ( type )
     {
     case TYP_THREE_BODY:
         if ( l->three_body_list != NULL )
         {
-            sfree( l->three_body_list, "Delete_List::l->three_body_list" );
+            sfree( l->three_body_list, __FILE__, __LINE__ );
         }
         break;
 
     case TYP_BOND:
         if ( l->bond_list != NULL )
         {
-            sfree( l->bond_list, "Delete_List::l->bond_list" );
+            sfree( l->bond_list, __FILE__, __LINE__ );
         }
         break;
 
     case TYP_DBO:
         if ( l->dbo_list != NULL )
         {
-            sfree( l->dbo_list, "Delete_List::l->dbo_list" );
+            sfree( l->dbo_list, __FILE__, __LINE__ );
         }
         break;
 
     case TYP_DDELTA:
         if ( l->dDelta_list != NULL )
         {
-            sfree( l->dDelta_list, "Delete_List::l->dDelta_list" );
+            sfree( l->dDelta_list, __FILE__, __LINE__ );
         }
         break;
 
     case TYP_FAR_NEIGHBOR:
         if ( l->far_nbr_list != NULL )
         {
-            sfree( l->far_nbr_list, "Delete_List::l->far_nbr_list" );
+            sfree( l->far_nbr_list, __FILE__, __LINE__ );
         }
         break;
 
     case TYP_NEAR_NEIGHBOR:
         if ( l->near_nbr_list != NULL )
         {
-            sfree( l->near_nbr_list, "Delete_List::l->near_nbr_list" );
+            sfree( l->near_nbr_list, __FILE__, __LINE__ );
         }
         break;
 
     case TYP_HBOND:
         if ( l->hbond_list != NULL )
         {
-            sfree( l->hbond_list, "Delete_List::l->hbond_list" );
+            sfree( l->hbond_list, __FILE__, __LINE__ );
         }
         break;
 
diff --git a/sPuReMD/src/list.h b/sPuReMD/src/list.h
index 3a0bcdcb39d1f57bd76eaaab3de8149a1f8935e1..6c46c1e9230fd04d9dee3f8282978bc426ed29d3 100644
--- a/sPuReMD/src/list.h
+++ b/sPuReMD/src/list.h
@@ -30,7 +30,7 @@ void Make_List( int, int, int, int, reax_list* );
 void Delete_List( int, reax_list* );
 
 
-static inline int Num_Entries( int i, reax_list* l )
+static inline int Num_Entries( int i, reax_list const * const l )
 {
     assert( l != NULL );
     assert( i >= 0 && i <= l->n );
@@ -39,7 +39,7 @@ static inline int Num_Entries( int i, reax_list* l )
 }
 
 
-static inline int Start_Index( int i, reax_list *l )
+static inline int Start_Index( int i, reax_list const * const l )
 {
     assert( l != NULL );
     assert( i >= 0 && i <= l->n );
@@ -48,7 +48,7 @@ static inline int Start_Index( int i, reax_list *l )
 }
 
 
-static inline int End_Index( int i, reax_list *l )
+static inline int End_Index( int i, reax_list const * const l )
 {
     assert( l != NULL );
     assert( i >= 0 && i <= l->n );
@@ -57,7 +57,7 @@ static inline int End_Index( int i, reax_list *l )
 }
 
 
-static inline void Set_Start_Index( int i, int val, reax_list *l )
+static inline void Set_Start_Index( int i, int val, reax_list * const l )
 {
     assert( l != NULL );
     assert( i >= 0 && i <= l->n );
@@ -67,7 +67,7 @@ static inline void Set_Start_Index( int i, int val, reax_list *l )
 }
 
 
-static inline void Set_End_Index( int i, int val, reax_list *l )
+static inline void Set_End_Index( int i, int val, reax_list * const l )
 {
     assert( l != NULL );
     assert( i >= 0 && i <= l->n );
diff --git a/sPuReMD/src/lookup.c b/sPuReMD/src/lookup.c
index a7b0b99c139e65034ea1591796649becb4a029bf..ec2b4b5441719f5c40563ffb9b3a73a0c67c948a 100644
--- a/sPuReMD/src/lookup.c
+++ b/sPuReMD/src/lookup.c
@@ -58,11 +58,11 @@ static void Natural_Cubic_Spline( const real *h, const real *f,
     real *a, *b, *c, *d, *v;
 
     /* allocate space for linear system */
-    a = smalloc( sizeof(real) * n, "Natural_Cubic_Spline::a" );
-    b = smalloc( sizeof(real) * n, "Natural_Cubic_Spline::b" );
-    c = smalloc( sizeof(real) * n, "Natural_Cubic_Spline::c" );
-    d = smalloc( sizeof(real) * n, "Natural_Cubic_Spline::d" );
-    v = smalloc( sizeof(real) * n, "Natural_Cubic_Spline::v" );
+    a = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
+    b = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
+    c = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
+    d = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
+    v = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
 
     /* build linear system */
     a[0] = 0.0;
@@ -113,11 +113,11 @@ static void Natural_Cubic_Spline( const real *h, const real *f,
         coef[i - 1].a = f[i];
     }
 
-    sfree( a, "Natural_Cubic_Spline::a" );
-    sfree( b, "Natural_Cubic_Spline::b" );
-    sfree( c, "Natural_Cubic_Spline::c" );
-    sfree( d, "Natural_Cubic_Spline::d" );
-    sfree( v, "Natural_Cubic_Spline::v" );
+    sfree( a, __FILE__, __LINE__ );
+    sfree( b, __FILE__, __LINE__ );
+    sfree( c, __FILE__, __LINE__ );
+    sfree( d, __FILE__, __LINE__ );
+    sfree( v, __FILE__, __LINE__ );
 }
 
 
@@ -128,11 +128,11 @@ static void Complete_Cubic_Spline( const real *h, const real *f, real v0, real v
     real *a, *b, *c, *d, *v;
 
     /* allocate space for the linear system */
-    a = smalloc( sizeof(real) * n, "Complete_Cubic_Spline::a" );
-    b = smalloc( sizeof(real) * n, "Complete_Cubic_Spline::b" );
-    c = smalloc( sizeof(real) * n, "Complete_Cubic_Spline::c" );
-    d = smalloc( sizeof(real) * n, "Complete_Cubic_Spline::d" );
-    v = smalloc( sizeof(real) * n, "Complete_Cubic_Spline::v" );
+    a = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
+    b = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
+    c = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
+    d = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
+    v = smalloc( sizeof(real) * n, __FILE__, __LINE__ );
 
     /* build the linear system */
     a[0] = 0.0;
@@ -176,11 +176,11 @@ static void Complete_Cubic_Spline( const real *h, const real *f, real v0, real v
         coef[i - 1].a = f[i];
     }
 
-    sfree( a, "Complete_Cubic_Spline::a" );
-    sfree( b, "Complete_Cubic_Spline::b" );
-    sfree( c, "Complete_Cubic_Spline::c" );
-    sfree( d, "Complete_Cubic_Spline::d" );
-    sfree( v, "Complete_Cubic_Spline::v" );
+    sfree( a, __FILE__, __LINE__ );
+    sfree( b, __FILE__, __LINE__ );
+    sfree( c, __FILE__, __LINE__ );
+    sfree( d, __FILE__, __LINE__ );
+    sfree( v, __FILE__, __LINE__ );
 }
 
 
@@ -239,27 +239,21 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control,
 
     num_atom_types = system->reax_param.num_atom_types;
     dr = control->nonb_cut / control->tabulate;
-    h = scalloc( control->tabulate + 2, sizeof(real),
-            "Make_LR_Lookup_Table::h" );
-    fh = scalloc( control->tabulate + 2, sizeof(real),
-            "Make_LR_Lookup_Table::fh" );
-    fvdw = scalloc( control->tabulate + 2, sizeof(real),
-            "Make_LR_Lookup_Table::fvdw" );
-    fCEvd = scalloc( control->tabulate + 2, sizeof(real),
-            "Make_LR_Lookup_Table::fCEvd" );
-    fele = scalloc( control->tabulate + 2, sizeof(real),
-            "Make_LR_Lookup_Table::fele" );
-    fCEclmb = scalloc( control->tabulate + 2, sizeof(real),
-            "Make_LR_Lookup_Table::fCEclmb" );
+    h = scalloc( control->tabulate + 2, sizeof(real), __FILE__, __LINE__ );
+    fh = scalloc( control->tabulate + 2, sizeof(real), __FILE__, __LINE__ );
+    fvdw = scalloc( control->tabulate + 2, sizeof(real), __FILE__, __LINE__ );
+    fCEvd = scalloc( control->tabulate + 2, sizeof(real), __FILE__, __LINE__ );
+    fele = scalloc( control->tabulate + 2, sizeof(real), __FILE__, __LINE__ );
+    fCEclmb = scalloc( control->tabulate + 2, sizeof(real), __FILE__, __LINE__ );
 
     /* allocate Long-Range LookUp Table space based on
        number of atom types in the ffield file */
-    workspace->LR = (LR_lookup_table**) smalloc( num_atom_types * sizeof(LR_lookup_table*),
-           "Make_LR_Lookup_Table::LR" );
+    workspace->LR = smalloc( num_atom_types * sizeof(LR_lookup_table*),
+           __FILE__, __LINE__ );
     for ( i = 0; i < num_atom_types; ++i )
     {
-        workspace->LR[i] = (LR_lookup_table*) smalloc( num_atom_types * sizeof(LR_lookup_table),
-                "Make_LR_Lookup_Table::LR[i]");
+        workspace->LR[i] = smalloc( num_atom_types * sizeof(LR_lookup_table),
+                __FILE__, __LINE__ );
     }
 
     /* most atom types in ffield file will not exist in the current
@@ -291,22 +285,22 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control,
                     workspace->LR[i][j].inv_dx = control->tabulate / control->nonb_cut;
                     workspace->LR[i][j].y = 
                         smalloc( workspace->LR[i][j].n * sizeof(LR_data),
-                              "Make_LR_Lookup_Table::LR[i][j].y" );
+                              __FILE__, __LINE__ );
                     workspace->LR[i][j].H = 
                         smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
-                              "Make_LR_Lookup_Table::LR[i][j].H" );
+                              __FILE__, __LINE__ );
                     workspace->LR[i][j].vdW = 
                         smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
-                              "Make_LR_Lookup_Table::LR[i][j].vdW" );
+                              __FILE__, __LINE__ );
                     workspace->LR[i][j].CEvd = 
                         smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
-                              "Make_LR_Lookup_Table::LR[i][j].CEvd" );
+                              __FILE__, __LINE__ );
                     workspace->LR[i][j].ele = 
                         smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
-                              "Make_LR_Lookup_Table::LR[i][j].ele" );
+                              __FILE__, __LINE__ );
                     workspace->LR[i][j].CEclmb = 
                         smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
-                              "Make_LR_Lookup_Table::LR[i][j].CEclmb" );
+                              __FILE__, __LINE__ );
 
                     for ( r = 1; r <= control->tabulate; ++r )
                     {
@@ -417,12 +411,12 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control,
              fprintf( stderr, "eele_maxerr: %24.15e\n", eele_maxerr );
     *******/
 
-    sfree( h, "Make_LR_Lookup_Table::h" );
-    sfree( fh, "Make_LR_Lookup_Table::fh" );
-    sfree( fvdw, "Make_LR_Lookup_Table::fvdw" );
-    sfree( fCEvd, "Make_LR_Lookup_Table::fCEvd" );
-    sfree( fele, "Make_LR_Lookup_Table::fele" );
-    sfree( fCEclmb, "Make_LR_Lookup_Table::fCEclmb" );
+    sfree( h, __FILE__, __LINE__ );
+    sfree( fh, __FILE__, __LINE__ );
+    sfree( fvdw, __FILE__, __LINE__ );
+    sfree( fCEvd, __FILE__, __LINE__ );
+    sfree( fele, __FILE__, __LINE__ );
+    sfree( fCEclmb, __FILE__, __LINE__ );
 }
 
 
@@ -452,18 +446,18 @@ void Finalize_LR_Lookup_Table( reax_system *system, control_params *control,
             {
                 if ( existing_types[j] )
                 {
-                    sfree( workspace->LR[i][j].y, "Finalize_LR_Lookup_Table::LR[i][j].y" );
-                    sfree( workspace->LR[i][j].H, "Finalize_LR_Lookup_Table::LR[i][j].H" );
-                    sfree( workspace->LR[i][j].vdW, "Finalize_LR_Lookup_Table::LR[i][j].vdW" );
-                    sfree( workspace->LR[i][j].CEvd, "Finalize_LR_Lookup_Table::LR[i][j].CEvd" );
-                    sfree( workspace->LR[i][j].ele, "Finalize_LR_Lookup_Table::LR[i][j].ele" );
-                    sfree( workspace->LR[i][j].CEclmb, "Finalize_LR_Lookup_Table::LR[i][j].CEclmb" );
+                    sfree( workspace->LR[i][j].y, __FILE__, __LINE__ );
+                    sfree( workspace->LR[i][j].H, __FILE__, __LINE__ );
+                    sfree( workspace->LR[i][j].vdW, __FILE__, __LINE__ );
+                    sfree( workspace->LR[i][j].CEvd, __FILE__, __LINE__ );
+                    sfree( workspace->LR[i][j].ele, __FILE__, __LINE__ );
+                    sfree( workspace->LR[i][j].CEclmb, __FILE__, __LINE__ );
                 }
             }
         }
 
-        sfree( workspace->LR[i], "Finalize_LR_Lookup_Table::LR[i]" );
+        sfree( workspace->LR[i], __FILE__, __LINE__ );
     }
 
-    sfree( workspace->LR, "Finalize_LR_Lookup_Table::LR" );
+    sfree( workspace->LR, __FILE__, __LINE__ );
 }
diff --git a/sPuReMD/src/neighbors.c b/sPuReMD/src/neighbors.c
index 8d6f59966fa4b4052434fb434d418f647705ceca..a673c46d4a455fee8f79318c03c50c8e1c44c175 100644
--- a/sPuReMD/src/neighbors.c
+++ b/sPuReMD/src/neighbors.c
@@ -193,11 +193,6 @@ int Estimate_Num_Neighbors( reax_system * const system,
                             {
                                 atom2 = nbr_atoms[m];
 
-#if defined(QMMM)
-                                if ( system->atoms[atom1].qmmm_mask == TRUE
-                                        || system->atoms[atom2].qmmm_mask == TRUE )
-                                {
-#endif
                                 if ( atom1 >= atom2 )
                                 {
                                     count = Count_Far_Neighbors( system->atoms[atom1].x,
@@ -206,9 +201,6 @@ int Estimate_Num_Neighbors( reax_system * const system,
 
                                     num_far += count;
                                 }
-#if defined(QMMM)
-                                }
-#endif
                             }
                         }
 
@@ -306,11 +298,6 @@ void Generate_Neighbor_Lists( reax_system * const system,
                             {
                                 atom2 = nbr_atoms[m];
 
-#if defined(QMMM)
-                                if ( system->atoms[atom1].qmmm_mask == TRUE
-                                        || system->atoms[atom2].qmmm_mask == TRUE )
-                                {
-#endif
                                 if ( atom1 >= atom2 )
                                 {
                                     nbr_data = &far_nbrs->far_nbr_list[num_far];
@@ -321,9 +308,6 @@ void Generate_Neighbor_Lists( reax_system * const system,
 
                                     num_far += count;
                                 }
-#if defined(QMMM)
-                                }
-#endif
                             }
                         }
 
diff --git a/sPuReMD/src/reax_types.h b/sPuReMD/src/reax_types.h
index e95616c38880cfdd3f5c936ca6e51933b034e5ca..1220988fe0f7ccc75eb6680d07b7aa48f0c0dfe5 100644
--- a/sPuReMD/src/reax_types.h
+++ b/sPuReMD/src/reax_types.h
@@ -759,6 +759,10 @@ struct reax_atom
 {
     /* integer representation of element type of this atom */
     int type;
+    /* TRUE if the atom is a dummy atom, FALSE otherwise
+     * Note: dummy atoms do not form bonds but participate
+     * in other (non-bonded) interactions */
+    int is_dummy;
     /* relative coordinates in terms of periodic images of the
      * simulation box which are used to track if this atom moves
      * between images between simulation steps which regenerate
@@ -1609,10 +1613,6 @@ struct static_storage
     /* for hydrogen bonds */
     int *hbond_index;
 
-#if defined(QMMM)
-    /* TRUE if the atom is in the QM region, FALSE otherwise (atom in MM region) */
-    int *mask_qmmm;
-#endif
     rvec *a; // used in integrators
     rvec *f_old;
     rvec *v_const;
@@ -1799,7 +1799,8 @@ struct spuremd_handle
     output_controls *out_control;
     /* TRUE if file I/O for simulation output enabled, FALSE otherwise */
     int output_enabled;
-    /* TRUE if reallocation is required due to num. atoms increasing, FALSE otherwise */
+    /* TRUE if reallocation is required due to num. atoms increasing
+     * (this includes first simulation run), FALSE otherwise */
     int realloc;
     /* Callback for getting simulation state at the end of each time step */
     callback_function callback;
diff --git a/sPuReMD/src/restart.c b/sPuReMD/src/restart.c
index 43ef92027138ace8de2199b232732c068fdd23a8..037d2870598d01a7a2ebfe19904f4077eae6d5c6 100644
--- a/sPuReMD/src/restart.c
+++ b/sPuReMD/src/restart.c
@@ -38,7 +38,7 @@ void Write_Binary_Restart( reax_system *system, control_params *control,
     restart_atom res_data;
 
     snprintf( fname, MAX_STR, "%.*s.res%d", MAX_STR - 12, control->sim_name, data->step );
-    fres = sfopen( fname, "wb" );
+    fres = sfopen( fname, "wb", __FILE__, __LINE__ );
 
     res_header.step = data->step;
     res_header.N = system->N;
@@ -63,7 +63,7 @@ void Write_Binary_Restart( reax_system *system, control_params *control,
         fwrite( &res_data, sizeof(restart_atom), 1, fres );
     }
 
-    sfclose( fres, "Write_Binary_Restart::fres" );
+    sfclose( fres, __FILE__, __LINE__ );
 
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "write restart - " );
@@ -81,7 +81,7 @@ void Read_Binary_Restart( const char * const fname, reax_system *system,
     restart_header res_header;
     restart_atom res_data;
 
-    fres = sfopen( fname, "rb" );
+    fres = sfopen( fname, "rb", __FILE__, __LINE__ );
 
     /* parse header of restart file */
     fread( &res_header, sizeof(restart_header), 1, fres );
@@ -139,7 +139,7 @@ void Read_Binary_Restart( const char * const fname, reax_system *system,
     fprintf( stderr, "system->N: %d, i: %d\n", system->N, i );
 #endif
 
-    sfclose( fres, "Read_Binary_Restart::fres" );
+    sfclose( fres, __FILE__, __LINE__ );
 
     data->step = data->prev_steps;
     /* target num. of MD sim. steps (nsteps)
@@ -157,7 +157,7 @@ void Write_ASCII_Restart( reax_system *system, control_params *control,
     reax_atom *p_atom;
 
     snprintf( fname, MAX_STR + 8, "%s.res%d", control->sim_name, data->step );
-    fres = sfopen( fname, "w" );
+    fres = sfopen( fname, "w", __FILE__, __LINE__ );
 
     fprintf( fres, RESTART_HEADER,
              data->step, system->N, data->therm.T, data->therm.xi,
@@ -176,7 +176,7 @@ void Write_ASCII_Restart( reax_system *system, control_params *control,
                  p_atom->v[0], p_atom->v[1], p_atom->v[2] );
     }
 
-    sfclose( fres, "Write_ASCII_Restart::fres" );
+    sfclose( fres, __FILE__, __LINE__ );
 
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "write restart - " );
@@ -192,7 +192,7 @@ void Read_ASCII_Restart( const char * const fname, reax_system *system,
     FILE *fres;
     reax_atom *p_atom;
 
-    fres = sfopen( fname, "r" );
+    fres = sfopen( fname, "r", __FILE__, __LINE__ );
 
     /* parse header of restart file */
     fscanf( fres, READ_RESTART_HEADER,
@@ -238,7 +238,7 @@ void Read_ASCII_Restart( const char * const fname, reax_system *system,
         workspace->map_serials[workspace->orig_id[i]] = i;
     }
 
-    sfclose( fres, "Read_ASCII_Restart::fres" );
+    sfclose( fres, __FILE__, __LINE__ );
 
     data->step = data->prev_steps;
     /* target num. of MD sim. steps (nsteps)
diff --git a/sPuReMD/src/spuremd.c b/sPuReMD/src/spuremd.c
index 7658e1a93f860fab1279c8b8ea89e758d3b78531..913d305015bb5a6724fef0fa10bcc273f2f1acdb 100644
--- a/sPuReMD/src/spuremd.c
+++ b/sPuReMD/src/spuremd.c
@@ -158,30 +158,19 @@ static void Allocate_Top_Level_Structs( spuremd_handle ** handle )
     int i;
 
     /* top-level allocation */
-    *handle = smalloc( sizeof(spuremd_handle), "Allocate_Top_Level_Structs::handle" );
+    *handle = smalloc( sizeof(spuremd_handle), __FILE__, __LINE__ );
 
     /* second-level allocations */
-    (*handle)->system = smalloc( sizeof(reax_system),
-           "Allocate_Top_Level_Structs::handle->system" );
-
-    (*handle)->control = smalloc( sizeof(control_params),
-           "Allocate_Top_Level_Structs::handle->control" );
-
-    (*handle)->data = smalloc( sizeof(simulation_data),
-           "Allocate_Top_Level_Structs::handle->data" );
-
-    (*handle)->workspace = smalloc( sizeof(static_storage),
-           "Allocate_Top_Level_Structs::handle->workspace" );
-
-    (*handle)->lists = smalloc( sizeof(reax_list *) * LIST_N,
-           "Allocate_Top_Level_Structs::handle->lists" );
+    (*handle)->system = smalloc( sizeof(reax_system), __FILE__, __LINE__ );
+    (*handle)->control = smalloc( sizeof(control_params), __FILE__, __LINE__ );
+    (*handle)->data = smalloc( sizeof(simulation_data), __FILE__, __LINE__ );
+    (*handle)->workspace = smalloc( sizeof(static_storage), __FILE__, __LINE__ );
+    (*handle)->lists = smalloc( sizeof(reax_list *) * LIST_N, __FILE__, __LINE__ );
     for ( i = 0; i < LIST_N; ++i )
     {
-        (*handle)->lists[i] = smalloc( sizeof(reax_list),
-                "Allocate_Top_Level_Structs::handle->lists[i]" );
+        (*handle)->lists[i] = smalloc( sizeof(reax_list), __FILE__, __LINE__ );
     }
-    (*handle)->out_control = smalloc( sizeof(output_controls),
-           "Allocate_Top_Level_Structs::handle->out_control" );
+    (*handle)->out_control = smalloc( sizeof(output_controls), __FILE__, __LINE__ );
 }
 
 
@@ -293,6 +282,9 @@ void * setup2( int num_atoms, const int * const atom_type,
 
     for ( i = 0; i < spmd_handle->system->N; ++i )
     {
+        assert( atom_type[i] >= 0
+                && atom_type[i] < spmd_handle->system->reax_param.num_atom_types );
+
         x[0] = pos[3 * i];
         x[1] = pos[3 * i + 1];
         x[2] = pos[3 * i + 2];
@@ -300,16 +292,25 @@ void * setup2( int num_atoms, const int * const atom_type,
         Fit_to_Periodic_Box( &spmd_handle->system->box, x );
 
         spmd_handle->workspace->orig_id[i] = i + 1;
-//        spmd_handle->system->atoms[i].type = Get_Atom_Type( &system->reax_param,
-//                element, sizeof(element) );
         spmd_handle->system->atoms[i].type = atom_type[i];
-//        strncpy( spmd_handle->system->atoms[i].name, atom_name,
-//                sizeof(spmd_handle->system->atoms[i].name) - 1 );
-//        spmd_handle->system->atoms[i].name[sizeof(spmd_handle->system->atoms[i].name) - 1] = '\0';
+        strncpy( spmd_handle->system->atoms[i].name,
+                spmd_handle->system->reax_param.sbp[atom_type[i]].name,
+                sizeof(spmd_handle->system->atoms[i].name) - 1 );
+        spmd_handle->system->atoms[i].name[sizeof(spmd_handle->system->atoms[i].name) - 1] = '\0';
         rvec_Copy( spmd_handle->system->atoms[i].x, x );
         rvec_MakeZero( spmd_handle->system->atoms[i].v );
         rvec_MakeZero( spmd_handle->system->atoms[i].f );
         spmd_handle->system->atoms[i].q = 0.0;
+            
+        /* check for dummy atom */
+        if ( strncmp( spmd_handle->system->atoms[i].name, "X\0", 2 ) == 0 )
+        {
+           spmd_handle->system->atoms[i].is_dummy = TRUE;
+        }
+        else
+        {
+            spmd_handle->system->atoms[i].is_dummy = FALSE;            
+        }		
     }
 
     spmd_handle->system->N_max = (int) CEIL( SAFE_ZONE * spmd_handle->system->N );
@@ -368,8 +369,6 @@ int simulate( const void * const handle )
                 spmd_handle->output_enabled,
                 spmd_handle->realloc );
 
-        spmd_handle->realloc = FALSE;
-
         /* compute f_0 */
         //if( control.restart == FALSE ) {
         Reset( spmd_handle->system, spmd_handle->control, spmd_handle->data,
@@ -475,6 +474,7 @@ int simulate( const void * const handle )
             fprintf( spmd_handle->out_control->log, "total: %.2f secs\n", spmd_handle->data->timing.elapsed );
         }
 
+        spmd_handle->realloc = FALSE;
         ret = SPUREMD_SUCCESS;
     }
 
@@ -503,18 +503,18 @@ int cleanup( const void * const handle )
                 spmd_handle->workspace, spmd_handle->lists, spmd_handle->out_control,
                 spmd_handle->output_enabled, FALSE );
 
-        sfree( spmd_handle->out_control, "cleanup::spmd_handle->out_control" );
+        sfree( spmd_handle->out_control, __FILE__, __LINE__ );
         for ( i = 0; i < LIST_N; ++i )
         {
-            sfree( spmd_handle->lists[i], "cleanup::spmd_handle->lists[i]" );
+            sfree( spmd_handle->lists[i], __FILE__, __LINE__ );
         }
-        sfree( spmd_handle->lists, "cleanup::spmd_handle->lists" );
-        sfree( spmd_handle->workspace, "cleanup::spmd_handle->workspace" );
-        sfree( spmd_handle->data, "cleanup::spmd_handle->data" );
-        sfree( spmd_handle->control, "cleanup::spmd_handle->control" );
-        sfree( spmd_handle->system, "cleanup::spmd_handle->system" );
+        sfree( spmd_handle->lists, __FILE__, __LINE__ );
+        sfree( spmd_handle->workspace, __FILE__, __LINE__ );
+        sfree( spmd_handle->data, __FILE__, __LINE__ );
+        sfree( spmd_handle->control, __FILE__, __LINE__ );
+        sfree( spmd_handle->system, __FILE__, __LINE__ );
 
-        sfree( spmd_handle, "cleanup::spmd_handle" );
+        sfree( spmd_handle, __FILE__, __LINE__ );
 
         ret = SPUREMD_SUCCESS;
     }
@@ -639,6 +639,9 @@ int reset2( const void * const handle, int num_atoms,
 
         for ( i = 0; i < spmd_handle->system->N; ++i )
         {
+            assert( atom_type[i] >= 0
+                    && atom_type[i] < spmd_handle->system->reax_param.num_atom_types );
+
             x[0] = pos[3 * i];
             x[1] = pos[3 * i + 1];
             x[2] = pos[3 * i + 2];
@@ -646,16 +649,25 @@ int reset2( const void * const handle, int num_atoms,
             Fit_to_Periodic_Box( &spmd_handle->system->box, x );
 
             spmd_handle->workspace->orig_id[i] = i + 1;
-//            spmd_handle->system->atoms[i].type = Get_Atom_Type( &system->reax_param,
-//                    element, sizeof(element) );
             spmd_handle->system->atoms[i].type = atom_type[i];
-//            strncpy( spmd_handle->system->atoms[i].name, atom_name,
-//                    sizeof(spmd_handle->system->atoms[i].name) - 1 );
-//            spmd_handle->system->atoms[i].name[sizeof(spmd_handle->system->atoms[i].name) - 1] = '\0';
+            strncpy( spmd_handle->system->atoms[i].name,
+                    spmd_handle->system->reax_param.sbp[atom_type[i]].name,
+                    sizeof(spmd_handle->system->atoms[i].name) - 1 );
+            spmd_handle->system->atoms[i].name[sizeof(spmd_handle->system->atoms[i].name) - 1] = '\0';
             rvec_Copy( spmd_handle->system->atoms[i].x, x );
             rvec_MakeZero( spmd_handle->system->atoms[i].v );
             rvec_MakeZero( spmd_handle->system->atoms[i].f );
             spmd_handle->system->atoms[i].q = 0.0;
+                
+            /* check for dummy atom */
+            if ( strncmp( spmd_handle->system->atoms[i].name, "X\0", 2 ) == 0 )
+            {
+               spmd_handle->system->atoms[i].is_dummy = TRUE;
+            }
+            else
+            {
+                spmd_handle->system->atoms[i].is_dummy = FALSE;            
+            }		
         }
 
         if ( spmd_handle->system->N > spmd_handle->system->N_max )
@@ -960,12 +972,18 @@ int set_control_parameter( const void * const handle, const char * const keyword
  * sim_box_info: simulation box information, where the entries are
  *  - box length per dimension (3 entries)
  *  - angles per dimension (3 entries)
+ * num_charge_constraints: num. of charge constraints for charge model
+ * charge_constraint_start: starting atom num. (1-based) of atom group for a charge constraint
+ * charge_constraint_end: ending atom num. (1-based) of atom group for a charge constraint
+ * charge_constraint_value: charge constraint value for atom group
  * ffield_file: file containing force field parameters
  * control_file: file containing simulation parameters
  */
 void * setup_qmmm( int qm_num_atoms, const char * const qm_symbols,
         const double * const qm_pos, int mm_num_atoms, const char * const mm_symbols,
         const double * const mm_pos_q, const double * const sim_box_info,
+        int num_charge_constraints, const int * const charge_constraint_start,
+        const int * const charge_constraint_end, const double * const charge_constraint_value,
         const char * const ffield_file, const char * const control_file )
 {
     int i;
@@ -984,6 +1002,26 @@ void * setup_qmmm( int qm_num_atoms, const char * const qm_symbols,
             spmd_handle->data, spmd_handle->workspace,
             spmd_handle->out_control, FALSE );
 
+    spmd_handle->system->max_num_molec_charge_constraints = num_charge_constraints;
+    spmd_handle->system->num_molec_charge_constraints = num_charge_constraints;
+
+    if ( spmd_handle->system->num_molec_charge_constraints > 0 )
+    {
+        spmd_handle->system->molec_charge_constraints = smalloc(
+                sizeof(real) * spmd_handle->system->num_molec_charge_constraints,
+                __FILE__, __LINE__ );
+        spmd_handle->system->molec_charge_constraint_ranges = smalloc(
+                sizeof(int) * 2 * spmd_handle->system->num_molec_charge_constraints,
+                __FILE__, __LINE__ );
+
+        for ( i = 0; i < spmd_handle->system->num_molec_charge_constraints; ++i )
+        {
+            spmd_handle->system->molec_charge_constraint_ranges[2 * i] = charge_constraint_start[i];
+            spmd_handle->system->molec_charge_constraint_ranges[2 * i + 1] = charge_constraint_end[i];
+            spmd_handle->system->molec_charge_constraints[i] = charge_constraint_value[i];
+        }
+    }
+
     spmd_handle->system->N_qm = qm_num_atoms;
     spmd_handle->system->N_mm = mm_num_atoms;
     spmd_handle->system->N = spmd_handle->system->N_qm + spmd_handle->system->N_mm;
@@ -1022,8 +1060,17 @@ void * setup_qmmm( int qm_num_atoms, const char * const qm_symbols,
         rvec_MakeZero( spmd_handle->system->atoms[i].f );
         spmd_handle->system->atoms[i].q = 0.0;
         spmd_handle->system->atoms[i].q_init = 0.0;
-
         spmd_handle->system->atoms[i].qmmm_mask = TRUE;
+            
+        /* check for dummy atom */
+        if ( strncmp( element, "X\0", 2 ) == 0 )
+        {
+           spmd_handle->system->atoms[i].is_dummy = TRUE;
+        }
+        else
+        {
+            spmd_handle->system->atoms[i].is_dummy = FALSE;            
+        }		
     }
 
     for ( i = spmd_handle->system->N_qm; i < spmd_handle->system->N; ++i )
@@ -1048,8 +1095,17 @@ void * setup_qmmm( int qm_num_atoms, const char * const qm_symbols,
         rvec_MakeZero( spmd_handle->system->atoms[i].f );
         spmd_handle->system->atoms[i].q = mm_pos_q[4 * (i - spmd_handle->system->N_qm) + 3];
         spmd_handle->system->atoms[i].q_init = mm_pos_q[4 * (i - spmd_handle->system->N_qm) + 3];
-
         spmd_handle->system->atoms[i].qmmm_mask = FALSE;
+            
+        /* check for dummy atom */
+        if ( strncmp( element, "X\0", 2 ) == 0 )
+        {
+           spmd_handle->system->atoms[i].is_dummy = TRUE;
+        }
+        else
+        {
+            spmd_handle->system->atoms[i].is_dummy = FALSE;            
+        }		
     }
 
     spmd_handle->system->N_max = (int) CEIL( SAFE_ZONE * spmd_handle->system->N );
@@ -1071,6 +1127,10 @@ void * setup_qmmm( int qm_num_atoms, const char * const qm_symbols,
  * sim_box_info: simulation box information, where the entries are
  *  - box length per dimension (3 entries)
  *  - angles per dimension (3 entries)
+ * num_charge_constraints: num. of charge constraints for charge model
+ * charge_constraint_start: starting atom num. (1-based) of atom group for a charge constraint
+ * charge_constraint_end: ending atom num. (1-based) of atom group for a charge constraint
+ * charge_constraint_value: charge constraint value for atom group
  * ffield_file: file containing force field parameters
  * control_file: file containing simulation parameters
  *
@@ -1080,6 +1140,8 @@ int reset_qmmm( const void * const handle, int qm_num_atoms,
         const char * const qm_symbols, const double * const qm_pos,
         int mm_num_atoms, const char * const mm_symbols,
         const double * const mm_pos_q, const double * const sim_box_info,
+        int num_charge_constraints, const int * const charge_constraint_start,
+        const int * const charge_constraint_end, const double * const charge_constraint_value,
         const char * const ffield_file, const char * const control_file )
 {
     int i, ret;
@@ -1108,6 +1170,40 @@ int reset_qmmm( const void * const handle, int qm_num_atoms,
                 spmd_handle->data, spmd_handle->workspace,
                 spmd_handle->out_control, TRUE );
 
+        spmd_handle->system->num_molec_charge_constraints = num_charge_constraints;
+
+        if ( spmd_handle->system->num_molec_charge_constraints
+                > spmd_handle->system->max_num_molec_charge_constraints )
+        {
+            if ( spmd_handle->system->max_num_molec_charge_constraints > 0 )
+            {
+                sfree( spmd_handle->system->molec_charge_constraints,
+                        __FILE__, __LINE__ );
+                sfree( spmd_handle->system->molec_charge_constraint_ranges,
+                        __FILE__, __LINE__ );
+            }
+
+            spmd_handle->system->molec_charge_constraints = smalloc(
+                    sizeof(real) * spmd_handle->system->num_molec_charge_constraints,
+                    __FILE__, __LINE__ );
+            spmd_handle->system->molec_charge_constraint_ranges = smalloc(
+                    sizeof(int) * 2 * spmd_handle->system->num_molec_charge_constraints,
+                    __FILE__, __LINE__ );
+
+            spmd_handle->system->max_num_molec_charge_constraints
+                = spmd_handle->system->num_molec_charge_constraints;
+        }
+
+        if ( spmd_handle->system->num_molec_charge_constraints > 0 )
+        {
+            for ( i = 0; i < spmd_handle->system->num_molec_charge_constraints; ++i )
+            {
+                spmd_handle->system->molec_charge_constraint_ranges[2 * i] = charge_constraint_start[i];
+                spmd_handle->system->molec_charge_constraint_ranges[2 * i + 1] = charge_constraint_end[i];
+                spmd_handle->system->molec_charge_constraints[i] = charge_constraint_value[i];
+            }
+        }
+
         spmd_handle->system->N_qm = qm_num_atoms;
         spmd_handle->system->N_mm = mm_num_atoms;
         spmd_handle->system->N = spmd_handle->system->N_qm + spmd_handle->system->N_mm;
@@ -1146,8 +1242,18 @@ int reset_qmmm( const void * const handle, int qm_num_atoms,
             rvec_MakeZero( spmd_handle->system->atoms[i].v );
             rvec_MakeZero( spmd_handle->system->atoms[i].f );
             spmd_handle->system->atoms[i].q = 0.0;
-
+            spmd_handle->system->atoms[i].q_init = 0.0;
             spmd_handle->system->atoms[i].qmmm_mask = TRUE;
+            
+            /* check for dummy atom */
+            if ( strncmp( element, "X\0", 2 ) == 0 )
+            {
+               spmd_handle->system->atoms[i].is_dummy = TRUE;
+            }
+            else
+            {
+                spmd_handle->system->atoms[i].is_dummy = FALSE;            
+            }		
         }
 
         for ( i = spmd_handle->system->N_qm; i < spmd_handle->system->N; ++i )
@@ -1173,6 +1279,16 @@ int reset_qmmm( const void * const handle, int qm_num_atoms,
             spmd_handle->system->atoms[i].q = mm_pos_q[4 * (i - spmd_handle->system->N_qm) + 3];
             spmd_handle->system->atoms[i].q_init = mm_pos_q[4 * (i - spmd_handle->system->N_qm) + 3];
             spmd_handle->system->atoms[i].qmmm_mask = FALSE;
+            
+            /* check for dummy atom */
+            if ( strncmp( element, "X\0", 2 ) == 0 )
+            {
+               spmd_handle->system->atoms[i].is_dummy = TRUE;
+            }
+            else
+            {
+                spmd_handle->system->atoms[i].is_dummy = FALSE;            
+            }		
         }
 
         if ( spmd_handle->system->N > spmd_handle->system->N_max )
diff --git a/sPuReMD/src/spuremd.h b/sPuReMD/src/spuremd.h
index 22f534e5ae05fce542ba1ee999948c42853e4533..5b4d44f077a94b529f177944f8821b10eaa9c8ad 100644
--- a/sPuReMD/src/spuremd.h
+++ b/sPuReMD/src/spuremd.h
@@ -76,12 +76,14 @@ int set_control_parameter( const void * const, const char * const,
 void * setup_qmmm( int, const char * const,
         const double * const, int, const char * const,
         const double * const, const double * const,
+        int, const int * const, const int * const, const double * const,
         const char * const, const char * const );
 
 int reset_qmmm( const void * const, int, const char * const,
         const double * const, int, const char * const,
         const double * const, const double * const,
-        const char * const, const char * const );
+        int, const int * const, const int * const, const double * const,
+        const char * const, const char * const);
 
 int get_atom_positions_qmmm( const void * const, double * const,
         double * const );
diff --git a/sPuReMD/src/tool_box.c b/sPuReMD/src/tool_box.c
index 0483e048cec24801e1bf05a1e0f4851818ce11dc..dab56c3393c01ec740274dac33eba96b0b8cd59f 100644
--- a/sPuReMD/src/tool_box.c
+++ b/sPuReMD/src/tool_box.c
@@ -170,12 +170,24 @@ int is_Valid_Serial( int serial )
 }
 
 
-int Check_Input_Range( int val, int lo, int hi, char *message )
+/* Validate atom serial numbers in BGF geometry file
+ *
+ * val: atom serial to validate
+ * lo: lower limit of valid serial range
+ * hi: upper limit of valid serial range
+ * filename: source filename of caller
+ * line: source line of caller
+ */
+int Check_Input_Range( int val, int lo, int hi, const char * const filename,
+        int line )
 {
     if ( val < lo || val > hi )
     {
-        fprintf( stderr, "[ERROR] %s\nInput %d - Out of range %d-%d. Terminating...\n",
-                 message, val, lo, hi );
+        fprintf( stderr, "[ERROR] Invalid BGF serial\n" );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
+        fprintf( stderr, "    [INFO] Input %d - Out of range %d-%d. Terminating...\n",
+                 val, lo, hi );
         exit( INVALID_INPUT );
     }
 
@@ -290,14 +302,14 @@ void Allocate_Tokenizer_Space( char **line, size_t line_size,
 {
     int i;
 
-    *line = smalloc( sizeof(char) * line_size, "Allocate_Tokenizer_Space::*line" );
-    *backup = smalloc( sizeof(char) * backup_size, "Allocate_Tokenizer_Space::*backup" );
-    *tokens = smalloc( sizeof(char*) * num_tokens, "Allocate_Tokenizer_Space::*tokens" );
+    *line = smalloc( sizeof(char) * line_size, __FILE__, __LINE__ );
+    *backup = smalloc( sizeof(char) * backup_size, __FILE__, __LINE__ );
+    *tokens = smalloc( sizeof(char*) * num_tokens, __FILE__, __LINE__ );
 
     for ( i = 0; i < num_tokens; i++ )
     {
         (*tokens)[i] = smalloc( sizeof(char) * token_size,
-                "Allocate_Tokenizer_Space::(*tokens)[i]" );
+                __FILE__, __LINE__ );
     }
 }
 
@@ -309,12 +321,12 @@ void Deallocate_Tokenizer_Space( char **line, char **backup,
 
     for ( i = 0; i < num_tokens; i++ )
     {
-        sfree( (*tokens)[i], "Deallocate_Tokenizer_Space::tokens[i]" );
+        sfree( (*tokens)[i], __FILE__, __LINE__ );
     }
 
-    sfree( *line, "Deallocate_Tokenizer_Space::line" );
-    sfree( *backup, "Deallocate_Tokenizer_Space::backup" );
-    sfree( *tokens, "Deallocate_Tokenizer_Space::tokens" );
+    sfree( *line, __FILE__, __LINE__ );
+    sfree( *backup, __FILE__, __LINE__ );
+    sfree( *tokens, __FILE__, __LINE__ );
 }
 
 
@@ -343,27 +355,30 @@ int Tokenize( char* s, char*** tok, size_t token_len )
 }
 
 
-/***************** taken from lammps ************************/
 /* Safe wrapper around libc malloc
  *
  * n: num. of bytes to allocated
- * name: message with details about pointer, used for warnings/errors
+ * filename: source filename of caller
+ * line: source line of caller
  *
  * returns: ptr to allocated memory
  * */
-void * smalloc( size_t n, const char *name )
+void * smalloc( size_t n, const char * const filename, int line )
 {
     void *ptr;
 
     if ( n == 0 )
     {
-        fprintf( stderr, "[ERROR] failed to allocate %zu bytes for array %s.\n",
-                n, name );
+        fprintf( stderr, "[ERROR] failed to allocate %zu bytes for array\n",
+                n );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         exit( INSUFFICIENT_MEMORY );
     }
 
 #if defined(DEBUG_FOCUS)
-    fprintf( stderr, "[INFO] requesting memory for %s\n", name );
+    fprintf( stderr, "[INFO] requesting allocation of %zu bytes of memory at line %d in file %.*s\n",
+            n, line, (int) strlen(filename), filename );
     fflush( stderr );
 #endif
 
@@ -371,8 +386,10 @@ void * smalloc( size_t n, const char *name )
 
     if ( ptr == NULL )
     {
-        fprintf( stderr, "[ERROR] failed to allocate %zu bytes for array %s.\n",
-                n, name );
+        fprintf( stderr, "[ERROR] failed to allocate %zu bytes for array\n",
+                n );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         exit( INSUFFICIENT_MEMORY );
     }
 
@@ -388,29 +405,27 @@ void * smalloc( size_t n, const char *name )
 /* Safe wrapper around libc realloc
  *
  * n: num. of bytes to reallocated
- * name: message with details about pointer, used for warnings/errors
+ * filename: source filename of caller
+ * line: source line of caller
  *
  * returns: ptr to reallocated memory
  * */
-void * srealloc( void *ptr, size_t n, const char *name )
+void * srealloc( void *ptr, size_t n, const char * const filename, int line )
 {
     void *new_ptr;
 
     if ( n == 0 )
     {
-        fprintf( stderr, "[ERROR] failed to reallocate %zu bytes for array %s.\n",
-                n, name );
+        fprintf( stderr, "[ERROR] failed to reallocate %zu bytes for array\n",
+                n );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         exit( INSUFFICIENT_MEMORY );
     }
 
-    if ( ptr == NULL )
-    {
-        fprintf( stderr, "[INFO] trying to allocate %zu NEW bytes for array %s.\n",
-                n, name );
-    }
-
 #if defined(DEBUG_FOCUS)
-    fprintf( stderr, "[INFO] requesting memory for %s\n", name );
+    fprintf( stderr, "[INFO] requesting reallocation of %zu bytes of memory at line %d in file %.*s\n",
+            n, line, (int) strlen(filename), filename );
     fflush( stderr );
 #endif
 
@@ -420,8 +435,10 @@ void * srealloc( void *ptr, size_t n, const char *name )
      * but we needed more memory, so abort */
     if ( new_ptr == NULL )
     {
-        fprintf( stderr, "[ERROR] failed to reallocate %zu bytes for array %s.\n",
-                n, name );
+        fprintf( stderr, "[ERROR] failed to reallocate %zu bytes for array\n",
+                n );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         exit( INSUFFICIENT_MEMORY );
     }
 
@@ -438,23 +455,27 @@ void * srealloc( void *ptr, size_t n, const char *name )
  *
  * n: num. of elements to allocated (each of size bytes)
  * size: num. of bytes per element
- * name: message with details about pointer, used for warnings/errors
+ * filename: source filename of caller
+ * line: source line of caller
  *
  * returns: ptr to allocated memory, all bits initialized to zeros
  * */
-void * scalloc( size_t n, size_t size, const char *name )
+void * scalloc( size_t n, size_t size, const char * const filename, int line )
 {
     void *ptr;
 
     if ( n == 0 )
     {
-        fprintf( stderr, "[ERROR] failed to allocate %zu bytes for array %s.\n",
-                n * size, name );
+        fprintf( stderr, "[ERROR] failed to allocate %zu bytes for array\n",
+                n * size );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         exit( INSUFFICIENT_MEMORY );
     }
 
 #if defined(DEBUG_FOCUS)
-    fprintf( stderr, "[INFO] requesting memory for %s\n", name );
+    fprintf( stderr, "[INFO] requesting allocation of %zu bytes of zeroed memory at line %d in file %.*s\n",
+            n * size, line, (int) strlen(filename), filename );
     fflush( stderr );
 #endif
 
@@ -462,8 +483,10 @@ void * scalloc( size_t n, size_t size, const char *name )
 
     if ( ptr == NULL )
     {
-        fprintf( stderr, "[ERROR] failed to allocate %zu bytes for array %s.\n",
-                n * size, name );
+        fprintf( stderr, "[ERROR] failed to allocate %zu bytes for array\n",
+                n * size );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         exit( INSUFFICIENT_MEMORY );
     }
 
@@ -479,19 +502,22 @@ void * scalloc( size_t n, size_t size, const char *name )
 /* Safe wrapper around libc free
  *
  * ptr: pointer to dynamically allocated memory which will be deallocated
- * name: message with details about pointer, used for warnings/errors
+ * filename: source filename of caller
+ * line: source line of caller
  * */
-void sfree( void *ptr, const char *name )
+void sfree( void *ptr, const char * const filename, int line )
 {
     if ( ptr == NULL )
     {
-        fprintf( stderr, "[WARNING] trying to free the already NULL pointer %s!\n",
-                name );
+        fprintf( stderr, "[WARNING] trying to free the already NULL pointer\n" );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         return;
     }
 
 #if defined(DEBUG_FOCUS)
-    fprintf( stderr, "[INFO] trying to free pointer %s\n", name );
+    fprintf( stderr, "[INFO] trying to free pointer at line %d in file %.*s\n",
+            line, (int) strlen(filename), filename );
     fflush( stderr );
     fprintf( stderr, "[INFO] address: %p [SFREE]\n", (void *) ptr );
     fflush( stderr );
@@ -505,20 +531,27 @@ void sfree( void *ptr, const char *name )
  *
  * fname: name of file to be opened
  * mode: mode in which to open file
+ * filename: source filename of caller
+ * line: source line of caller
  * */
-FILE * sfopen( const char * fname, const char * mode )
+FILE * sfopen( const char * fname, const char * mode,
+        const char * const filename, int line )
 {
     FILE * ptr;
 
     if ( fname == NULL )
     {
         fprintf( stderr, "[ERROR] trying to open file\n" );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         fprintf( stderr, "  [INFO] NULL file name\n" );
         exit( INVALID_INPUT );
     }
     if ( mode == NULL )
     {
         fprintf( stderr, "[ERROR] trying to open file\n" );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         fprintf( stderr, "  [INFO] NULL mode\n" );
         exit( INVALID_INPUT );
     }
@@ -529,6 +562,8 @@ FILE * sfopen( const char * fname, const char * mode )
     {
         fprintf( stderr, "[ERROR] failed to open file %s with mode %s\n",
               fname, mode );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         exit( INVALID_INPUT );
     }
 
@@ -538,18 +573,19 @@ FILE * sfopen( const char * fname, const char * mode )
 
 /* Safe wrapper around libc fclose
  *
- * fname: name of file to be opened
- * mode: mode in which to open file
- * msg: message to be printed in case of error
+ * fp: pointer to file to close
+ * filename: source filename of caller
+ * line: source line of caller
  * */
-void sfclose( FILE * fp, const char * msg )
+void sfclose( FILE * fp, const char * const filename, int line )
 {
     int ret;
 
     if ( fp == NULL )
     {
         fprintf( stderr, "[WARNING] trying to close NULL file pointer. Returning...\n" );
-        fprintf( stderr, "  [INFO] %s\n", msg );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         return;
     }
 
@@ -558,7 +594,8 @@ void sfclose( FILE * fp, const char * msg )
     if ( ret != 0 )
     {
         fprintf( stderr, "[ERROR] error detected when closing file\n" );
-        fprintf( stderr, "  [INFO] %s\n", msg );
+        fprintf( stderr, "    [INFO] At line %d in file %.*s\n",
+                line, (int) strlen(filename), filename );
         exit( INVALID_INPUT );
     }
 }
diff --git a/sPuReMD/src/tool_box.h b/sPuReMD/src/tool_box.h
index 88e333c9a9a6b8d0a0a7a69240e0523177883c67..dbc8028e0b55ebfdce33bed6b6559aeb94912474 100644
--- a/sPuReMD/src/tool_box.h
+++ b/sPuReMD/src/tool_box.h
@@ -39,7 +39,7 @@ void Make_Point( real, real, real, rvec * );
 
 int is_Valid_Serial( int );
 
-int Check_Input_Range( int, int, int, char * );
+int Check_Input_Range( int, int, int, const char * const, int );
 
 void Trim_Spaces( char * const, const size_t );
 
@@ -63,23 +63,20 @@ void Deallocate_Tokenizer_Space( char **, char **, char ***,
 
 int Tokenize( char *, char ***, size_t );
 
-/* from lammps */
-void * smalloc( size_t, const char * );
+void * smalloc( size_t, const char * const, int );
 
-void * srealloc( void *, size_t, const char * );
+void * srealloc( void *, size_t, const char * const, int );
 
-void * scalloc( size_t, size_t, const char * );
+void * scalloc( size_t, size_t, const char * const, int );
 
-void sfree( void *, const char * );
+void sfree( void *, const char * const, int );
 
-FILE * sfopen( const char *, const char * );
+FILE * sfopen( const char *, const char *, const char * const, int );
 
-void sfclose( FILE *, const char * );
+void sfclose( FILE *, const char * const, int );
 
-int sstrtol( const char * const,
-        const char * const, int );
+int sstrtol( const char * const, const char * const, int );
 
-double sstrtod( const char * const,
-        const char * const, int );
+double sstrtod( const char * const, const char * const, int );
 
 #endif