diff --git a/sPuReMD/configure.ac b/sPuReMD/configure.ac
index ce330dc6b5aa08436972f70edb79df3f27703cce..89eec9342401993b1678bf01d45a3b851b5d6996 100644
--- a/sPuReMD/configure.ac
+++ b/sPuReMD/configure.ac
@@ -76,6 +76,7 @@ if test "x${BUILD_OPENMP}" = "xyes"; then
 		fi
 		AC_SUBST(AM_CFLAGS, "$OPENMP_CFLAGS")
 		AC_SUBST(AM_CPPFLAGS, "$OPENMP_CFLAGS")
+		LIBS="${LIBS} -lgomp"
 	fi
 fi
 
diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c
index 4ad3069f78e7fa0158f92c7872eaad6fdc81a139..86b6ebce8310e4a8c044bae6aed709c863e565f8 100644
--- a/sPuReMD/src/forces.c
+++ b/sPuReMD/src/forces.c
@@ -34,10 +34,6 @@
 #include "vector.h"
 
 
-/* File scope variables */
-static interaction_function Interaction_Functions[NO_OF_INTERACTIONS];
-
-
 typedef enum
 {
     DIAGONAL = 0,
@@ -52,32 +48,33 @@ void Dummy_Interaction( reax_system *system, control_params *control,
 }
 
 
-void Init_Bonded_Force_Functions( control_params *control )
+void Init_Bonded_Force_Functions( control_params *control,
+        interaction_function *interaction_functions )
 {
-    Interaction_Functions[0] = Calculate_Bond_Orders;
-    Interaction_Functions[1] = Bond_Energy;  //*/Dummy_Interaction;
-    Interaction_Functions[2] = LonePair_OverUnder_Coordination_Energy;
+    interaction_functions[0] = Calculate_Bond_Orders;
+    interaction_functions[1] = Bond_Energy;  //*/Dummy_Interaction;
+    interaction_functions[2] = LonePair_OverUnder_Coordination_Energy;
     //*/Dummy_Interaction;
-    Interaction_Functions[3] = Three_Body_Interactions; //*/Dummy_Interaction;
-    Interaction_Functions[4] = Four_Body_Interactions;  //*/Dummy_Interaction;
+    interaction_functions[3] = Three_Body_Interactions; //*/Dummy_Interaction;
+    interaction_functions[4] = Four_Body_Interactions;  //*/Dummy_Interaction;
     if ( control->hb_cut > 0.0 )
     {
-        Interaction_Functions[5] = Hydrogen_Bonds; //*/Dummy_Interaction;
+        interaction_functions[5] = Hydrogen_Bonds; //*/Dummy_Interaction;
     }
     else
     {
-        Interaction_Functions[5] = Dummy_Interaction;
+        interaction_functions[5] = Dummy_Interaction;
     }
-    Interaction_Functions[6] = Dummy_Interaction; //empty
-    Interaction_Functions[7] = Dummy_Interaction; //empty
-    Interaction_Functions[8] = Dummy_Interaction; //empty
-    Interaction_Functions[9] = Dummy_Interaction; //empty
+    interaction_functions[6] = Dummy_Interaction; //empty
+    interaction_functions[7] = Dummy_Interaction; //empty
+    interaction_functions[8] = Dummy_Interaction; //empty
+    interaction_functions[9] = Dummy_Interaction; //empty
 }
 
 
 void Compute_Bonded_Forces( reax_system *system, control_params *control,
-        simulation_data *data, static_storage *workspace,
-        reax_list **lists, output_controls *out_control )
+        simulation_data *data, static_storage *workspace, reax_list **lists,
+        output_controls *out_control, interaction_function *interaction_functions )
 {
 
     int i;
@@ -116,7 +113,7 @@ void Compute_Bonded_Forces( reax_system *system, control_params *control,
     /* Implement all the function calls as function pointers */
     for ( i = 0; i < NO_OF_INTERACTIONS; i++ )
     {
-        (Interaction_Functions[i])( system, control, data, workspace,
+        (interaction_functions[i])( system, control, data, workspace,
                 lists, out_control );
 
 #if defined(DEBUG_FOCUS)
@@ -329,7 +326,7 @@ void Validate_Lists( static_storage *workspace, reax_list **lists, int step, int
 
 
 static inline real Init_Charge_Matrix_Entry_Tab( reax_system *system,
-        control_params *control, int i, int j,
+        control_params *control, LR_lookup_table **LR, int i, int j,
         real r_ij, MATRIX_ENTRY_POSITION pos )
 {
     int r;
@@ -1009,8 +1006,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
                 twbp = &(system->reaxprm.tbp[type_i][type_j]);
 
                 H->j[Htop] = j;
-                H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control, i, j, 
-                        r_ij, OFF_DIAGONAL );
+                H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control,
+                        workspace->LR, i, j, r_ij, OFF_DIAGONAL );
                 ++Htop;
 
                 /* H_sp matrix entry */
@@ -1165,8 +1162,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
 
         /* diagonal entry */
         H->j[Htop] = i;
-        H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control, i, j,
-                r_ij, DIAGONAL );
+        H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control, workspace->LR,
+                i, j, r_ij, DIAGONAL );
         ++Htop;
 
         H_sp->j[H_sp_top] = i;
@@ -1301,7 +1298,8 @@ void Estimate_Storage_Sizes( reax_system *system, control_params *control,
 
 void Compute_Forces( reax_system *system, control_params *control,
         simulation_data *data, static_storage *workspace,
-        reax_list** lists, output_controls *out_control )
+        reax_list** lists, output_controls *out_control,
+        interaction_function *interaction_functions )
 {
     real t_start, t_elapsed;
 
@@ -1322,7 +1320,8 @@ void Compute_Forces( reax_system *system, control_params *control,
 #endif
 
     t_start = Get_Time( );
-    Compute_Bonded_Forces( system, control, data, workspace, lists, out_control );
+    Compute_Bonded_Forces( system, control, data, workspace, lists, out_control,
+           interaction_functions );
     t_elapsed = Get_Timing_Info( t_start );
     data->timing.bonded += t_elapsed;
 
diff --git a/sPuReMD/src/forces.h b/sPuReMD/src/forces.h
index 477732584d7fa7515a286f70d1b2343a5459f09c..eeccac22c003250eae1bad325fc5ccdad08d9d88 100644
--- a/sPuReMD/src/forces.h
+++ b/sPuReMD/src/forces.h
@@ -24,10 +24,12 @@
 
 #include "mytypes.h"
 
-void Init_Bonded_Force_Functions( control_params* );
+void Init_Bonded_Force_Functions( control_params*,
+       interaction_function* );
 
 void Compute_Forces( reax_system*, control_params*, simulation_data*,
-                     static_storage*, reax_list**, output_controls* );
+                     static_storage*, reax_list**, output_controls*,
+                     interaction_function* );
 
 void Estimate_Storage_Sizes( reax_system*, control_params*, reax_list**,
                              int*, int*, int*, int* );
diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c
index c071a46f18e6df9a36ea0ee96a84d64ad5b43132..01b0fc5593bbdb562b374b86227764edbfbc553e 100644
--- a/sPuReMD/src/init_md.c
+++ b/sPuReMD/src/init_md.c
@@ -942,7 +942,7 @@ void Init_Out_Controls( reax_system *system, control_params *control,
 void Initialize( reax_system *system, control_params *control,
         simulation_data *data, static_storage *workspace, reax_list **lists,
         output_controls *out_control, evolve_function *Evolve,
-        const int output_enabled )
+        interaction_function *interaction_functions, const int output_enabled )
 {
 #if defined(DEBUG)
     real start, end;
@@ -972,7 +972,7 @@ void Initialize( reax_system *system, control_params *control,
     }
 
     /* These are done in forces.c, only forces.c can see all those functions */
-    Init_Bonded_Force_Functions( control );
+    Init_Bonded_Force_Functions( control, interaction_functions );
 
 #ifdef TEST_FORCES
     Init_Force_Test_Functions( );
@@ -984,7 +984,7 @@ void Initialize( reax_system *system, control_params *control,
         start = Get_Time( );
 #endif
 
-        Make_LR_Lookup_Table( system, control );
+        Make_LR_Lookup_Table( system, control, workspace );
 
 #if defined(DEBUG)
         end = Get_Timing_Info( start );
@@ -1446,7 +1446,7 @@ void Finalize( reax_system *system, control_params *control,
 {
     if ( control->tabulate )
     {
-        Finalize_LR_Lookup_Table( system, control );
+        Finalize_LR_Lookup_Table( system, control, workspace );
     }
 
     if ( output_enabled == TRUE )
diff --git a/sPuReMD/src/init_md.h b/sPuReMD/src/init_md.h
index 0643e0dd739fbbe8c662a8fa5e8ff1f651aabecc..aff4d01d1cbed94e6e271b50ac962ac377891a4e 100644
--- a/sPuReMD/src/init_md.h
+++ b/sPuReMD/src/init_md.h
@@ -27,7 +27,7 @@
 
 void Initialize( reax_system*, control_params*, simulation_data*,
         static_storage*, reax_list**, output_controls*, evolve_function*,
-        const int );
+        interaction_function *, const int );
 
 void Finalize( reax_system*, control_params*, simulation_data*,
         static_storage*, reax_list**, output_controls*, const int );
diff --git a/sPuReMD/src/integrate.c b/sPuReMD/src/integrate.c
index 567e96a03bbfb01241b887512cd05c40fbd47dac..aff852c30006753fc38cd2890906a65892cf77ee 100644
--- a/sPuReMD/src/integrate.c
+++ b/sPuReMD/src/integrate.c
@@ -35,9 +35,10 @@
 #include "list.h"
 
 
-void Velocity_Verlet_NVE(reax_system* system, control_params* control,
+void Velocity_Verlet_NVE(reax_system *system, control_params *control,
         simulation_data *data, static_storage *workspace,
-        reax_list **lists, output_controls *out_control )
+        reax_list **lists, output_controls *out_control,
+        interaction_function *interaction_functions )
 {
     int i, steps, renbr;
     real inv_m, dt, dt_sqr;
@@ -75,7 +76,8 @@ void Velocity_Verlet_NVE(reax_system* system, control_params* control,
         Generate_Neighbor_Lists( system, control, data, workspace,
                 lists, out_control );
     }
-    Compute_Forces( system, control, data, workspace, lists, out_control );
+    Compute_Forces( system, control, data, workspace, lists, out_control,
+           interaction_functions );
 
     for ( i = 0; i < system->N; i++ )
     {
@@ -93,7 +95,7 @@ void Velocity_Verlet_NVE(reax_system* system, control_params* control,
 
 void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* control,
         simulation_data *data, static_storage *workspace, reax_list **lists,
-        output_controls *out_control )
+        output_controls *out_control, interaction_function *interaction_functions )
 {
     int i, itr, steps, renbr;
     real inv_m, coef_v, dt, dt_sqr;
@@ -138,7 +140,8 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params*
                 lists, out_control );
     }
     /* Calculate Forces at time (t + dt) */
-    Compute_Forces( system, control, data, workspace, lists, out_control );
+    Compute_Forces( system, control, data, workspace, lists, out_control,
+           interaction_functions );
 
     /* Compute iteration constants for each atom's velocity */
     for ( i = 0; i < system->N; ++i )
@@ -206,11 +209,9 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params*
    All box dimensions are scaled by the same amount,
    there is no change in the angles between axes. */
 void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system,
-        control_params* control,
-        simulation_data *data,
-        static_storage *workspace,
-        reax_list **lists,
-        output_controls *out_control )
+        control_params* control, simulation_data *data, static_storage *workspace,
+        reax_list **lists, output_controls *out_control,
+        interaction_function *interaction_functions )
 {
     int i, steps, renbr;
     real inv_m, dt, lambda, mu;
@@ -256,7 +257,8 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system,
         Generate_Neighbor_Lists( system, control, data, workspace,
                                  lists, out_control );
     }
-    Compute_Forces( system, control, data, workspace, lists, out_control );
+    Compute_Forces( system, control, data, workspace, lists, out_control,
+           interaction_functions );
 
     /* velocity verlet, 2nd part */
     for ( i = 0; i < system->N; i++ )
@@ -321,11 +323,9 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system,
    All box dimensions are scaled by the same amount,
    there is no change in the angles between axes. */
 void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system,
-        control_params* control,
-        simulation_data *data,
-        static_storage *workspace,
-        reax_list **lists,
-        output_controls *out_control )
+        control_params* control, simulation_data *data, static_storage *workspace,
+        reax_list **lists, output_controls *out_control,
+        interaction_function *interaction_functions )
 {
     int i, d, steps, renbr;
     real dt, inv_m, lambda;
@@ -371,7 +371,8 @@ void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system,
         Generate_Neighbor_Lists( system, control, data, workspace,
                                  lists, out_control );
     }
-    Compute_Forces( system, control, data, workspace, lists, out_control );
+    Compute_Forces( system, control, data, workspace, lists, out_control,
+           interaction_functions );
 
     /* velocity verlet, 2nd part */
     for ( i = 0; i < system->N; i++ )
@@ -452,6 +453,7 @@ void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system,
                                      static_storage *workspace,
                                      reax_list **lists,
                                      output_controls *out_control )
+        interaction_function *interaction_functions )
 {
     int i;
     real inv_m;
@@ -492,7 +494,8 @@ void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system,
     /* Compute_Charges( system, control, workspace, lists[FAR_NBRS], out_control );
        fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */
 
-    Compute_Forces( system, control, data, workspace, lists, out_control );
+    Compute_Forces( system, control, data, workspace, lists, out_control,
+           interaction_functions );
     fprintf(out_control->log, "forces\n");
     fflush( out_control->log );
 
@@ -521,11 +524,9 @@ void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system,
 
 
 void Velocity_Verlet_Isotropic_NPT( reax_system* system,
-                                    control_params* control,
-                                    simulation_data *data,
-                                    static_storage *workspace,
-                                    reax_list **lists,
-                                    output_controls *out_control )
+        control_params* control, simulation_data *data,
+        static_storage *workspace, reax_list **lists,
+        output_controls *out_control )
 {
     int i, itr;
     real deps, v_eps_new = 0, v_eps_old = 0, G_xi_new;
@@ -598,7 +599,8 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system,
     /* Compute_Charges( system, control, workspace, lists[FAR_NBRS], out_control );
        fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */
 
-    Compute_Forces( system, control, data, workspace, lists, out_control );
+    Compute_Forces( system, control, data, workspace, lists,
+            out_control, interaction_functions );
     fprintf(out_control->log, "forces\n");
     fflush( out_control->log );
 
@@ -698,12 +700,10 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system,
    All box dimensions are scaled by the same amount,
    there is no change in the angles between axes. */
 void Velocity_Verlet_Berendsen_NVT( reax_system* system,
-                                    control_params* control,
-                                    simulation_data *data,
-                                    static_storage *workspace,
-                                    reax_list **lists,
-                                    output_controls *out_control
-                                  )
+        control_params* control, simulation_data *data,
+        static_storage *workspace, reax_list **lists,
+        output_controls *out_control,
+        interaction_function *interaction_functions )
 {
     int i, steps, renbr;
     real inv_m, dt, lambda;
@@ -746,7 +746,7 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system,
         Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control );
 
     Compute_Forces( system, control, data, workspace,
-                    lists, out_control );
+            lists, out_control, interaction_functions );
 
     /* velocity verlet, 2nd part */
     for ( i = 0; i < system->N; i++ )
diff --git a/sPuReMD/src/integrate.h b/sPuReMD/src/integrate.h
index 1a821fe8d90c240355305e2b56cb9eee1fed9e15..8b847601c012dda8d6192b1d116a5531870e1f3e 100644
--- a/sPuReMD/src/integrate.h
+++ b/sPuReMD/src/integrate.h
@@ -22,36 +22,41 @@
 #ifndef __INTEGRATE_H_
 #define __INTEGRATE_H_
 
+
 #include "mytypes.h"
 
+
 void Velocity_Verlet_NVE( reax_system*, control_params*, simulation_data*,
-                          static_storage*, reax_list**, output_controls* );
+        static_storage*, reax_list**, output_controls*,
+        interaction_function* );
+
 void Velocity_Verlet_Nose_Hoover_NVT( reax_system*, control_params*,
-                                      simulation_data*, static_storage*,
-                                      reax_list**, output_controls* );
+        simulation_data*, static_storage*, reax_list**, output_controls*,
+        interaction_function* );
+
 void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system*, control_params*,
-        simulation_data*, static_storage*,
-        reax_list**, output_controls* );
+        simulation_data*, static_storage*, reax_list**, output_controls*,
+        interaction_function* );
+
 void Velocity_Verlet_Flexible_NPT( reax_system*, control_params*,
-                                   simulation_data*, static_storage*,
-                                   reax_list**, output_controls* );
+        simulation_data*, static_storage*, reax_list**, output_controls*,
+        interaction_function* );
+
 void Velocity_Verlet_Isotropic_NPT( reax_system*, control_params*,
-                                    simulation_data*, static_storage*,
-                                    reax_list**, output_controls* );
+        simulation_data*, static_storage*, reax_list**, output_controls*,
+        interaction_function* );
+
 void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system*, control_params*,
-        simulation_data*, static_storage*,
-        reax_list**, output_controls* );
+        simulation_data*, static_storage*, reax_list**, output_controls*,
+        interaction_function* );
+
 void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system*, control_params*,
-        simulation_data*,
-        static_storage*, reax_list**,
-        output_controls* );
-
-//upon Adri's request moved from parallel code to serial code
-void Velocity_Verlet_Berendsen_NVT( reax_system* ,
-                                    control_params* ,
-                                    simulation_data *,
-                                    static_storage *,
-                                    reax_list **,
-                                    output_controls *
-                                  );
+        simulation_data*, static_storage*, reax_list**, output_controls*,
+        interaction_function* );
+
+void Velocity_Verlet_Berendsen_NVT( reax_system* , control_params* ,
+        simulation_data *, static_storage *, reax_list **, output_controls *,
+        interaction_function* );
+
+
 #endif
diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c
index c7db2641e3eeb2d92da124254393becef33b2456..5ea019d003c2e42e8f2b20dc16cd8e429e633e3c 100644
--- a/sPuReMD/src/lin_alg.c
+++ b/sPuReMD/src/lin_alg.c
@@ -20,6 +20,7 @@
   ----------------------------------------------------------------------*/
 
 #include "lin_alg.h"
+
 #include "allocate.h"
 #include "tool_box.h"
 #include "vector.h"
@@ -34,6 +35,7 @@
 #include "lapacke.h"
 #endif
 
+
 typedef struct
 {
     unsigned int j;
@@ -41,39 +43,57 @@ typedef struct
 } sparse_matrix_entry;
 
 
+enum preconditioner_type
+{
+    LEFT = 0,
+    RIGHT = 1,
+};
+
+
 /* global to make OpenMP shared (Sparse_MatVec) */
 #ifdef _OPENMP
-real *b_local = NULL;
+static real *b_local = NULL;
 #endif
 /* global to make OpenMP shared (apply_preconditioner) */
-real *Dinv_L = NULL, *Dinv_U = NULL;
+static real *Dinv_L = NULL;
+static real *Dinv_U = NULL;
 /* global to make OpenMP shared (tri_solve_level_sched) */
-int levels = 1;
-int levels_L = 1, levels_U = 1;
-unsigned int *row_levels_L = NULL, *level_rows_L = NULL, *level_rows_cnt_L = NULL;
-unsigned int *row_levels_U = NULL, *level_rows_U = NULL, *level_rows_cnt_U = NULL;
-unsigned int *row_levels, *level_rows, *level_rows_cnt;
-unsigned int *top = NULL;
+static int levels = 1;
+static int levels_L = 1;
+static int levels_U = 1;
+static unsigned int *row_levels_L = NULL;
+static unsigned int *level_rows_L = NULL;
+static unsigned int *level_rows_cnt_L = NULL;
+static unsigned int *row_levels_U = NULL;
+static unsigned int *level_rows_U = NULL;
+static unsigned int *level_rows_cnt_U = NULL;
+static unsigned int *row_levels;
+static unsigned int *level_rows;
+static unsigned int *level_rows_cnt;
+static unsigned int *top = NULL;
 /* global to make OpenMP shared (graph_coloring) */
-unsigned int *color = NULL;
-unsigned int *to_color = NULL;
-unsigned int *conflict = NULL;
-unsigned int *conflict_cnt = NULL;
-unsigned int *temp_ptr;
-unsigned int *recolor = NULL;
-unsigned int recolor_cnt;
-unsigned int *color_top = NULL;
+static unsigned int *color = NULL;
+static unsigned int *to_color = NULL;
+static unsigned int *conflict = NULL;
+static unsigned int *conflict_cnt = NULL;
+static unsigned int *temp_ptr;
+static unsigned int *recolor = NULL;
+static unsigned int recolor_cnt;
+static unsigned int *color_top = NULL;
 /* global to make OpenMP shared (sort_colors) */
-unsigned int *permuted_row_col = NULL;
-unsigned int *permuted_row_col_inv = NULL;
-real *y_p = NULL;
+static unsigned int *permuted_row_col = NULL;
+static unsigned int *permuted_row_col_inv = NULL;
+static real *y_p = NULL;
 /* global to make OpenMP shared (permute_vector) */
-real *x_p = NULL;
-unsigned int *mapping = NULL;
-sparse_matrix *H_full;
-sparse_matrix *H_p;
+static real *x_p = NULL;
+static unsigned int *mapping = NULL;
+static sparse_matrix *H_full;
+static sparse_matrix *H_p;
 /* global to make OpenMP shared (jacobi_iter) */
-real *Dinv_b = NULL, *rp = NULL, *rp2 = NULL, *rp3 = NULL;
+static real *Dinv_b = NULL;
+static real *rp = NULL;
+static real *rp2 = NULL;
+static real *rp3 = NULL;
 
 
 #if defined(TEST_MAT)
@@ -2734,8 +2754,10 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv,
                   const real * const b, real * const x, const TRIANGULARITY tri, const
                   unsigned int maxiter )
 {
-    unsigned int i, k, si = 0, ei = 0, iter;
+    unsigned int i, k, si, ei, iter;
 
+    si = 0;
+    ei = 0;
     iter = 0;
 
 #ifdef _OPENMP
@@ -2815,176 +2837,274 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv,
 }
 
 
-/* Solve triangular system LU*x = y using level scheduling
+/* Apply left-sided preconditioning while solver M^{-1}Ax = M^{-1}b
  *
- * workspace: data struct containing matrices, lower/upper triangular, stored in CSR
+ * workspace: data struct containing matrices, stored in CSR
  * control: data struct containing parameters
- * y: constants in linear system (RHS)
- * x: solution
+ * y: vector to which to apply preconditioning,
+ *  specific to internals of iterative solver being used
+ * x: preconditioned vector (output)
  * fresh_pre: parameter indicating if this is a newly computed (fresh) preconditioner
+ * side: used in determining how to apply preconditioner if the preconditioner is
+ *  factorized as M = M_{1}M_{2} (e.g., incomplete LU, A \approx LU)
  *
  * Assumptions:
  *   Matrices have non-zero diagonals
  *   Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */
 static void apply_preconditioner( const static_storage * const workspace, const control_params * const control,
-                                  const real * const y, real * const x, const int fresh_pre )
+                                  const real * const y, real * const x, const int fresh_pre,
+                                  const int side )
 {
     int i, si;
 
     /* no preconditioning */
     if ( control->cm_solver_pre_comp_type == NONE_PC )
     {
-        Vector_Copy( x, y, workspace->H->n );
+        if ( x != y )
+        {
+            Vector_Copy( x, y, workspace->H->n );
+        }
     }
     else
     {
-        switch ( control->cm_solver_pre_app_type )
+        switch ( side )
         {
-        case TRI_SOLVE_PA:
-            switch ( control->cm_solver_pre_comp_type )
+        case LEFT:
+            switch ( control->cm_solver_pre_app_type )
             {
-            case DIAG_PC:
-                diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
-                break;
-            case ICHOLT_PC:
-            case ILU_PAR_PC:
-            case ILUT_PAR_PC:
-                tri_solve( workspace->L, y, x, workspace->L->n, LOWER );
-                tri_solve( workspace->U, x, x, workspace->U->n, UPPER );
-                break;
-            case SAI_PC:
-                Sparse_MatVec_full( workspace->H_app_inv, y, x );
-                break;
-            default:
-                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
-                exit( INVALID_INPUT );
-                break;
-            }
-            break;
-        case TRI_SOLVE_LEVEL_SCHED_PA:
-            switch ( control->cm_solver_pre_comp_type )
-            {
-            case DIAG_PC:
-                diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
-                break;
-            case ICHOLT_PC:
-            case ILU_PAR_PC:
-            case ILUT_PAR_PC:
-                tri_solve_level_sched( workspace->L, y, x, workspace->L->n, LOWER, fresh_pre );
-                tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre );
+            case TRI_SOLVE_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                case DIAG_PC:
+                    diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
+                    tri_solve( workspace->L, y, x, workspace->L->n, LOWER );
+                    break;
+                case SAI_PC:
+                    Sparse_MatVec_full( workspace->H_app_inv, y, x );
+                    break;
+                default:
+                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                }
                 break;
-            case SAI_PC:
-                Sparse_MatVec_full( workspace->H_app_inv, y, x );
+            case TRI_SOLVE_LEVEL_SCHED_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                case DIAG_PC:
+                    diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
+                    tri_solve_level_sched( workspace->L, y, x, workspace->L->n, LOWER, fresh_pre );
+                    break;
+                case SAI_PC:
+                    Sparse_MatVec_full( workspace->H_app_inv, y, x );
+                    break;
+                default:
+                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                }
                 break;
-            default:
-                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
-                exit( INVALID_INPUT );
+            case TRI_SOLVE_GC_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                case DIAG_PC:
+                case SAI_PC:
+                    fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
+#ifdef _OPENMP
+                    #pragma omp single
+#endif
+                {
+                    memcpy( y_p, y, sizeof(real) * workspace->H->n );
+                }
+
+                permute_vector( y_p, workspace->H->n, FALSE, LOWER );
+                tri_solve_level_sched( workspace->L, y_p, x, workspace->L->n, LOWER, fresh_pre );
                 break;
-            }
-            break;
-        case TRI_SOLVE_GC_PA:
-            switch ( control->cm_solver_pre_comp_type )
-            {
-            case DIAG_PC:
-            case SAI_PC:
-                fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
-                exit( INVALID_INPUT );
+                default:
+                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                }
                 break;
-            case ICHOLT_PC:
-            case ILU_PAR_PC:
-            case ILUT_PAR_PC:
+            case JACOBI_ITER_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                case DIAG_PC:
+                case SAI_PC:
+                    fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
 #ifdef _OPENMP
-                #pragma omp single
+                    #pragma omp single
 #endif
-            {
-                memcpy( y_p, y, sizeof(real) * workspace->H->n );
-            }
+                    {
+                        if ( Dinv_L == NULL )
+                        {
+                            Dinv_L = (real*) smalloc( sizeof(real) * workspace->L->n,
+                                                      "apply_preconditioner::Dinv_L" );
+                        }
+                    }
 
-            permute_vector( y_p, workspace->H->n, FALSE, LOWER );
-            tri_solve_level_sched( workspace->L, y_p, x, workspace->L->n, LOWER, fresh_pre );
-            tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre );
-            permute_vector( x, workspace->H->n, TRUE, UPPER );
-            break;
+                        /* construct D^{-1}_L */
+                    if ( fresh_pre == TRUE )
+                    {
+#ifdef _OPENMP
+                        #pragma omp for schedule(static)
+#endif
+                        for ( i = 0; i < workspace->L->n; ++i )
+                        {
+                            si = workspace->L->start[i + 1] - 1;
+                            Dinv_L[i] = 1.0 / workspace->L->val[si];
+                        }
+                    }
+
+                    jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters );
+                break;
+                default:
+                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                }
+                break;
             default:
                 fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
                 exit( INVALID_INPUT );
                 break;
+
             }
             break;
-        case JACOBI_ITER_PA:
-            switch ( control->cm_solver_pre_comp_type )
+
+        case RIGHT:
+            switch ( control->cm_solver_pre_app_type )
             {
-            case DIAG_PC:
-            case SAI_PC:
-                fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
-                exit( INVALID_INPUT );
+            case TRI_SOLVE_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                case DIAG_PC:
+                case SAI_PC:
+                    if ( x != y )
+                    {
+                        Vector_Copy( x, y, workspace->H->n );
+                    }
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
+                    tri_solve( workspace->U, y, x, workspace->U->n, UPPER );
+                    break;
+                default:
+                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                }
                 break;
-            case ICHOLT_PC:
-            case ILU_PAR_PC:
-            case ILUT_PAR_PC:
-#ifdef _OPENMP
-                #pragma omp single
-#endif
-            {
-                if ( Dinv_L == NULL )
+            case TRI_SOLVE_LEVEL_SCHED_PA:
+                switch ( control->cm_solver_pre_comp_type )
                 {
-                    Dinv_L = (real*) smalloc( sizeof(real) * workspace->L->n,
-                                              "apply_preconditioner::Dinv_L" );
+                case DIAG_PC:
+                case SAI_PC:
+                    if ( x != y )
+                    {
+                        Vector_Copy( x, y, workspace->H->n );
+                    }
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
+                    tri_solve_level_sched( workspace->U, y, x, workspace->U->n, UPPER, fresh_pre );
+                    break;
+                default:
+                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
                 }
-            }
-
-                /* construct D^{-1}_L */
-            if ( fresh_pre == TRUE )
-            {
-#ifdef _OPENMP
-                #pragma omp for schedule(static)
-#endif
-                for ( i = 0; i < workspace->L->n; ++i )
+                break;
+            case TRI_SOLVE_GC_PA:
+                switch ( control->cm_solver_pre_comp_type )
                 {
-                    si = workspace->L->start[i + 1] - 1;
-                    Dinv_L[i] = 1. / workspace->L->val[si];
+                case DIAG_PC:
+                case SAI_PC:
+                    fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
+                tri_solve_level_sched( workspace->U, y, x, workspace->U->n, UPPER, fresh_pre );
+                permute_vector( x, workspace->H->n, TRUE, UPPER );
+                break;
+                default:
+                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
                 }
-            }
-
-            jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters );
-
+                break;
+            case JACOBI_ITER_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                case DIAG_PC:
+                case SAI_PC:
+                    fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
 #ifdef _OPENMP
-            #pragma omp single
+                #pragma omp single
 #endif
-            {
-                if ( Dinv_U == NULL )
                 {
-                    Dinv_U = (real*) smalloc( sizeof(real) * workspace->U->n,
-                                              "apply_preconditioner::Dinv_U" );
+                    if ( Dinv_U == NULL )
+                    {
+                        Dinv_U = (real*) smalloc( sizeof(real) * workspace->U->n,
+                                                  "apply_preconditioner::Dinv_U" );
+                    }
                 }
-            }
 
-                /* construct D^{-1}_U */
-            if ( fresh_pre == TRUE )
-            {
+                    /* construct D^{-1}_U */
+                if ( fresh_pre == TRUE )
+                {
 #ifdef _OPENMP
-                #pragma omp for schedule(static)
+                    #pragma omp for schedule(static)
 #endif
-                for ( i = 0; i < workspace->U->n; ++i )
-                {
-                    si = workspace->U->start[i];
-                    Dinv_U[i] = 1. / workspace->U->val[si];
+                    for ( i = 0; i < workspace->U->n; ++i )
+                    {
+                        si = workspace->U->start[i];
+                        Dinv_U[i] = 1. / workspace->U->val[si];
+                    }
                 }
-            }
 
-            jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->cm_solver_pre_app_jacobi_iters );
-            break;
+                jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->cm_solver_pre_app_jacobi_iters );
+                break;
+                default:
+                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                }
+                break;
             default:
                 fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
                 exit( INVALID_INPUT );
                 break;
+
             }
             break;
-        default:
-            fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
-            exit( INVALID_INPUT );
-            break;
-
         }
     }
 }
@@ -2996,17 +3116,22 @@ int GMRES( const static_storage * const workspace, const control_params * const
            const real tol, real * const x, const int fresh_pre )
 {
     int i, j, k, itr, N, g_j, g_itr;
-    real cc, tmp1, tmp2, temp, ret_temp, bnorm;
+    real cc, tmp1, tmp2, temp, bnorm;
     real t_start, t_ortho, t_pa, t_spmv, t_ts, t_vops;
 
     N = H->n;
     g_j = 0;
     g_itr = 0;
+    t_ortho = 0.0;
+    t_pa = 0.0;
+    t_spmv = 0.0;
+    t_ts = 0.0;
+    t_vops = 0.0;
 
 #ifdef _OPENMP
     #pragma omp parallel default(none) \
-    private(i, j, k, itr, bnorm, ret_temp, t_start) \
-    shared(N, cc, tmp1, tmp2, temp, g_itr, g_j, stderr) \
+    private(i, j, k, itr, bnorm, temp, t_start) \
+    shared(N, cc, tmp1, tmp2, g_itr, g_j, stderr) \
     reduction(+: t_ortho, t_pa, t_spmv, t_ts, t_vops)
 #endif
     {
@@ -3031,25 +3156,25 @@ int GMRES( const static_storage * const workspace, const control_params * const
             t_spmv += Get_Timing_Info( t_start );
 
             t_start = Get_Time( );
-            Vector_Sum( workspace->b_prc, 1., b, -1., workspace->b_prm, N );
+            Vector_Sum( workspace->b_prc, 1.0, b, -1.0, workspace->b_prm, N );
             t_vops += Get_Timing_Info( t_start );
 
             t_start = Get_Time( );
-            apply_preconditioner( workspace, control, workspace->b_prc, workspace->v[0],
-                                  itr == 0 ? fresh_pre : FALSE );
+            apply_preconditioner( workspace, control, workspace->b_prc, workspace->b_prm,
+                                  itr == 0 ? fresh_pre : FALSE, LEFT );
+            apply_preconditioner( workspace, control, workspace->b_prm, workspace->v[0],
+                                  itr == 0 ? fresh_pre : FALSE, RIGHT );
             t_pa += Get_Timing_Info( t_start );
 
             t_start = Get_Time( );
-            ret_temp = Norm( workspace->v[0], N );
+            temp = Norm( workspace->v[0], N );
 
 #ifdef _OPENMP
             #pragma omp single
 #endif
-            {
-                workspace->g[0] = ret_temp;
-            }
+            workspace->g[0] = temp;
 
-            Vector_Scale( workspace->v[0], 1. / ret_temp, workspace->v[0], N );
+            Vector_Scale( workspace->v[0], 1.0 / temp, workspace->v[0], N );
             t_vops += Get_Timing_Info( t_start );
 
             /* GMRES inner-loop */
@@ -3061,87 +3186,40 @@ int GMRES( const static_storage * const workspace, const control_params * const
                 t_spmv += Get_Timing_Info( t_start );
 
                 t_start = Get_Time( );
-                apply_preconditioner( workspace, control, workspace->b_prc, workspace->v[j + 1], FALSE );
+                apply_preconditioner( workspace, control, workspace->b_prc,
+                        workspace->b_prm, FALSE, LEFT );
+                apply_preconditioner( workspace, control, workspace->b_prm,
+                        workspace->v[j + 1], FALSE, RIGHT );
                 t_pa += Get_Timing_Info( t_start );
 
-//                if ( control->cm_solver_pre_comp_type == DIAG_PC )
-//                {
                 /* apply modified Gram-Schmidt to orthogonalize the new residual */
                 t_start = Get_Time( );
                 for ( i = 0; i <= j; i++ )
                 {
-                    ret_temp = Dot( workspace->v[i], workspace->v[j + 1], N );
+                    temp = Dot( workspace->v[i], workspace->v[j + 1], N );
 
 #ifdef _OPENMP
                     #pragma omp single
 #endif
-                    {
-                        workspace->h[i][j] = ret_temp;
-                    }
+                    workspace->h[i][j] = temp;
 
-                    Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N );
+                    Vector_Add( workspace->v[j + 1], -1.0 * temp, workspace->v[i], N );
 
                 }
                 t_vops += Get_Timing_Info( t_start );
-//                }
-//                else
-//                {
-//                    //TODO: investigate correctness of not explicitly orthogonalizing first few vectors
-//                    /* apply modified Gram-Schmidt to orthogonalize the new residual */
-//
-//
-//
-//                    {
-//                        t_start = Get_Time( );
-//                    }
-//
-//                    #pragma omp single
-//
-//                    {
-//                        for ( i = 0; i < j - 1; i++ )
-//                        {
-//                            workspace->h[i][j] = 0.0;
-//                        }
-//                    }
-//
-//                    for ( i = MAX(j - 1, 0); i <= j; i++ )
-//                    {
-//                        ret_temp = Dot( workspace->v[i], workspace->v[j + 1], N );
-//
-//                        #pragma omp single
-//
-//                        {
-//                            workspace->h[i][j] = ret_temp;
-//                        }
-//
-//                        Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N );
-//                    }
-//
-//
-//
-//                    {
-//                        t_vops += Get_Timing_Info( t_start );
-//                    }
-//                }
 
                 t_start = Get_Time( );
-                ret_temp = Norm( workspace->v[j + 1], N );
+                temp = Norm( workspace->v[j + 1], N );
 
 #ifdef _OPENMP
                 #pragma omp single
 #endif
-                {
-                    workspace->h[j + 1][j] = ret_temp;
-                }
+                workspace->h[j + 1][j] = temp;
 
-                Vector_Scale( workspace->v[j + 1],
-                              1.0 / workspace->h[j + 1][j], workspace->v[j + 1], N );
+                Vector_Scale( workspace->v[j + 1], 1.0 / temp, workspace->v[j + 1], N );
                 t_vops += Get_Timing_Info( t_start );
 
                 t_start = Get_Time( );
-//                    if ( control->cm_solver_pre_comp_type == NONE_PC ||
-//                            control->cm_solver_pre_comp_type == DIAG_PC )
-//                    {
                 /* Givens rotations on the upper-Hessenberg matrix to make it U */
 #ifdef _OPENMP
                 #pragma omp single
@@ -3156,50 +3234,23 @@ int GMRES( const static_storage * const workspace, const control_params * const
                             workspace->hs[j] = workspace->h[j + 1][j] / cc;
                         }
 
-                        tmp1 =  workspace->hc[i] * workspace->h[i][j] +
-                                workspace->hs[i] * workspace->h[i + 1][j];
-                        tmp2 = -workspace->hs[i] * workspace->h[i][j] +
-                               workspace->hc[i] * workspace->h[i + 1][j];
+                        tmp1 = workspace->hc[i] * workspace->h[i][j] +
+                            workspace->hs[i] * workspace->h[i + 1][j];
+                        tmp2 = -1.0 * workspace->hs[i] * workspace->h[i][j] +
+                            workspace->hc[i] * workspace->h[i + 1][j];
 
                         workspace->h[i][j] = tmp1;
                         workspace->h[i + 1][j] = tmp2;
                     }
-//                    }
-//                    else
-//                    {
-//                        //TODO: investigate correctness of not explicitly orthogonalizing first few vectors
-//                        /* Givens rotations on the upper-Hessenberg matrix to make it U */
-//                        for ( i = MAX(j - 1, 0); i <= j; i++ )
-//                        {
-//                            if ( i == j )
-//                            {
-//                                cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) );
-//                                workspace->hc[j] = workspace->h[j][j] / cc;
-//                                workspace->hs[j] = workspace->h[j + 1][j] / cc;
-//                            }
-//
-//                            tmp1 =  workspace->hc[i] * workspace->h[i][j] +
-//                                    workspace->hs[i] * workspace->h[i + 1][j];
-//                            tmp2 = -workspace->hs[i] * workspace->h[i][j] +
-//                                   workspace->hc[i] * workspace->h[i + 1][j];
-//
-//                            workspace->h[i][j] = tmp1;
-//                            workspace->h[i + 1][j] = tmp2;
-//                        }
-//                    }
 
                     /* apply Givens rotations to the rhs as well */
-                    tmp1 =  workspace->hc[j] * workspace->g[j];
-                    tmp2 = -workspace->hs[j] * workspace->g[j];
+                    tmp1 = workspace->hc[j] * workspace->g[j];
+                    tmp2 = -1.0 * workspace->hs[j] * workspace->g[j];
                     workspace->g[j] = tmp1;
                     workspace->g[j + 1] = tmp2;
 
                 }
                 t_ortho += Get_Timing_Info( t_start );
-
-#ifdef _OPENMP
-                #pragma omp barrier
-#endif
             }
 
             /* solve Hy = g: H is now upper-triangular, do back-substitution */
@@ -3230,7 +3281,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
                 Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N );
             }
 
-            Vector_Add( x, 1., workspace->p, N );
+            Vector_Add( x, 1.0, workspace->p, N );
             t_vops += Get_Timing_Info( t_start );
 
             /* stopping condition */
@@ -3266,10 +3317,10 @@ int GMRES( const static_storage * const workspace, const control_params * const
     if ( g_itr >= control->cm_solver_max_iters )
     {
         fprintf( stderr, "[WARNING] GMRES convergence failed (%d outer iters)\n", g_itr );
-        return g_itr * (control->cm_solver_restart + 1) + g_j + 1;
+        return g_itr * control->cm_solver_restart + g_j;
     }
 
-    return g_itr * (control->cm_solver_restart + 1) + g_j + 1;
+    return g_itr * control->cm_solver_restart + g_j + 1;
 }
 
 
@@ -3279,21 +3330,27 @@ int GMRES_HouseHolder( const static_storage * const workspace,
                        real * const x, const int fresh_pre )
 {
     int i, j, k, itr, N, g_j, g_itr;
-    real cc, tmp1, tmp2, temp, bnorm, ret_temp;
+    real cc, tmp1, tmp2, temp, bnorm;
     real v[10000], z[control->cm_solver_restart + 2][10000], w[control->cm_solver_restart + 2];
     real u[control->cm_solver_restart + 2][10000];
     real t_start, t_ortho, t_pa, t_spmv, t_ts, t_vops;
 
     j = 0;
     N = H->n;
+    t_ortho = 0.0;
+    t_pa = 0.0;
+    t_spmv = 0.0;
+    t_ts = 0.0;
+    t_vops = 0.0;
 
 #ifdef _OPENMP
     #pragma omp parallel default(none) \
-    private(i, j, k, itr, bnorm, ret_temp, t_start) \
-    shared(v, z, w, u, tol, N, cc, tmp1, tmp2, temp, g_itr, g_j, stderr) \
+    private(i, j, k, itr, bnorm, temp, t_start) \
+    shared(v, z, w, u, tol, N, cc, tmp1, tmp2, g_itr, g_j, stderr) \
     reduction(+: t_ortho, t_pa, t_spmv, t_ts, t_vops)
 #endif
     {
+        j = 0;
         t_ortho = 0.0;
         t_pa = 0.0;
         t_spmv = 0.0;
@@ -3319,7 +3376,10 @@ int GMRES_HouseHolder( const static_storage * const workspace,
             t_vops += Get_Timing_Info( t_start );
 
             t_start = Get_Time( );
-            apply_preconditioner( workspace, control, workspace->b_prc, z[0], fresh_pre );
+            apply_preconditioner( workspace, control, workspace->b_prc,
+                    workspace->b_prm, fresh_pre, LEFT );
+            apply_preconditioner( workspace, control, workspace->b_prm,
+                    z[0], fresh_pre, RIGHT );
             t_pa += Get_Timing_Info( t_start );
 
             t_start = Get_Time( );
@@ -3353,7 +3413,10 @@ int GMRES_HouseHolder( const static_storage * const workspace,
                 t_spmv += Get_Timing_Info( t_start );
 
                 t_start = Get_Time( );
-                apply_preconditioner( workspace, control, workspace->b_prc, v, fresh_pre );
+                apply_preconditioner( workspace, control, workspace->b_prc,
+                        workspace->b_prm, fresh_pre, LEFT );
+                apply_preconditioner( workspace, control, workspace->b_prm,
+                        v, fresh_pre, RIGHT );
                 t_pa += Get_Timing_Info( t_start );
 
                 t_start = Get_Time( );
@@ -3367,11 +3430,11 @@ int GMRES_HouseHolder( const static_storage * const workspace,
                     /* compute the HouseHolder unit vector u_j+1 */
                     Vector_MakeZero( u[j + 1], j + 1 );
                     Vector_Copy( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) );
-                    ret_temp = Norm( v + (j + 1), N - (j + 1) );
+                    temp = Norm( v + (j + 1), N - (j + 1) );
 #ifdef _OPENMP
                     #pragma omp single
 #endif
-                    u[j + 1][j + 1] += ( v[j + 1] < 0.0 ? -1 : 1 ) * ret_temp;
+                    u[j + 1][j + 1] += ( v[j + 1] < 0.0 ? -1 : 1 ) * temp;
 
 #ifdef _OPENMP
                     #pragma omp barrier
@@ -3380,11 +3443,11 @@ int GMRES_HouseHolder( const static_storage * const workspace,
                     Vector_Scale( u[j + 1], 1 / Norm( u[j + 1], N ), u[j + 1], N );
 
                     /* overwrite v with P_m+1 * v */
-                    ret_temp = 2 * Dot( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ) * u[j + 1][j + 1];
+                    temp = 2 * Dot( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ) * u[j + 1][j + 1];
 #ifdef _OPENMP
                     #pragma omp single
 #endif
-                    v[j + 1] -= ret_temp;
+                    v[j + 1] -= temp;
 
 #ifdef _OPENMP
                     #pragma omp barrier
@@ -3523,6 +3586,9 @@ int CG( const static_storage * const workspace, const control_params * const con
     r = workspace->r;
     p = workspace->q;
     z = workspace->p;
+    t_pa = 0.0;
+    t_spmv = 0.0;
+    t_vops = 0.0;
 
 #ifdef _OPENMP
     #pragma omp parallel default(none) \
@@ -3549,7 +3615,8 @@ int CG( const static_storage * const workspace, const control_params * const con
         t_vops += Get_Timing_Info( t_start );
 
         t_start = Get_Time( );
-        apply_preconditioner( workspace, control, r, z, fresh_pre );
+        apply_preconditioner( workspace, control, r, d, fresh_pre, LEFT );
+        apply_preconditioner( workspace, control, d, z, fresh_pre, RIGHT );
         t_pa += Get_Timing_Info( t_start );
 
         t_start = Get_Time( );
@@ -3572,7 +3639,8 @@ int CG( const static_storage * const workspace, const control_params * const con
             t_vops += Get_Timing_Info( t_start );
 
             t_start = Get_Time( );
-            apply_preconditioner( workspace, control, r, z, FALSE );
+            apply_preconditioner( workspace, control, r, d, FALSE, LEFT );
+            apply_preconditioner( workspace, control, d, z, FALSE, RIGHT );
             t_pa += Get_Timing_Info( t_start );
 
             t_start = Get_Time( );
@@ -3620,6 +3688,9 @@ int SDM( const static_storage * const workspace, const control_params * const co
     real t_start, t_pa, t_spmv, t_vops;
 
     N = H->n;
+    t_pa = 0.0;
+    t_spmv = 0.0;
+    t_vops = 0.0;
 
 #ifdef _OPENMP
     #pragma omp parallel default(none) \
@@ -3645,7 +3716,8 @@ int SDM( const static_storage * const workspace, const control_params * const co
         t_vops += Get_Timing_Info( t_start );
 
         t_start = Get_Time( );
-        apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre );
+        apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre, LEFT );
+        apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre, RIGHT );
         t_pa += Get_Timing_Info( t_start );
 
         t_start = Get_Time( );
@@ -3677,7 +3749,8 @@ int SDM( const static_storage * const workspace, const control_params * const co
             t_vops += Get_Timing_Info( t_start );
 
             t_start = Get_Time( );
-            apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE );
+            apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE, LEFT );
+            apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE, RIGHT );
             t_pa += Get_Timing_Info( t_start );
         }
 
diff --git a/sPuReMD/src/lookup.c b/sPuReMD/src/lookup.c
index 7ec372f4392fea03e81219eb2780c64ab45b7e4c..c10931436b73d78feb0be46d0b905028ff6e64e7 100644
--- a/sPuReMD/src/lookup.c
+++ b/sPuReMD/src/lookup.c
@@ -249,7 +249,8 @@ void LR_Lookup( LR_lookup_table *t, real r, LR_data *y )
 }
 
 
-void Make_LR_Lookup_Table( reax_system *system, control_params *control )
+void Make_LR_Lookup_Table( reax_system *system, control_params *control,
+       static_storage *workspace )
 {
     int i, j, r;
     int num_atom_types;
@@ -286,11 +287,11 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control )
 
     /* allocate Long-Range LookUp Table space based on
        number of atom types in the ffield file */
-    LR = (LR_lookup_table**) smalloc( num_atom_types * sizeof(LR_lookup_table*),
+    workspace->LR = (LR_lookup_table**) smalloc( num_atom_types * sizeof(LR_lookup_table*),
            "Make_LR_Lookup_Table::LR" );
     for ( i = 0; i < num_atom_types; ++i )
     {
-        LR[i] = (LR_lookup_table*) smalloc( num_atom_types * sizeof(LR_lookup_table),
+        workspace->LR[i] = (LR_lookup_table*) smalloc( num_atom_types * sizeof(LR_lookup_table),
                 "Make_LR_Lookup_Table::LR[i]");
     }
 
@@ -316,61 +317,62 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control )
             {
                 if ( existing_types[j] )
                 {
-                    LR[i][j].xmin = 0;
-                    LR[i][j].xmax = control->r_cut;
-                    LR[i][j].n = control->tabulate + 1;
-                    LR[i][j].dx = dr;
-                    LR[i][j].inv_dx = control->tabulate / control->r_cut;
-                    LR[i][j].y = (LR_data*)
-                        smalloc( LR[i][j].n * sizeof(LR_data),
+                    workspace->LR[i][j].xmin = 0;
+                    workspace->LR[i][j].xmax = control->r_cut;
+                    workspace->LR[i][j].n = control->tabulate + 1;
+                    workspace->LR[i][j].dx = dr;
+                    workspace->LR[i][j].inv_dx = control->tabulate / control->r_cut;
+                    workspace->LR[i][j].y = (LR_data*)
+                        smalloc( workspace->LR[i][j].n * sizeof(LR_data),
                               "Make_LR_Lookup_Table::LR[i][j].y" );
-                    LR[i][j].H = (cubic_spline_coef*)
-                        smalloc( LR[i][j].n * sizeof(cubic_spline_coef),
+                    workspace->LR[i][j].H = (cubic_spline_coef*)
+                        smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
                               "Make_LR_Lookup_Table::LR[i][j].H" );
-                    LR[i][j].vdW = (cubic_spline_coef*)
-                        smalloc( LR[i][j].n * sizeof(cubic_spline_coef),
+                    workspace->LR[i][j].vdW = (cubic_spline_coef*)
+                        smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
                               "Make_LR_Lookup_Table::LR[i][j].vdW" );
-                    LR[i][j].CEvd = (cubic_spline_coef*)
-                        smalloc( LR[i][j].n * sizeof(cubic_spline_coef),
+                    workspace->LR[i][j].CEvd = (cubic_spline_coef*)
+                        smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
                               "Make_LR_Lookup_Table::LR[i][j].CEvd" );
-                    LR[i][j].ele = (cubic_spline_coef*)
-                        smalloc( LR[i][j].n * sizeof(cubic_spline_coef),
+                    workspace->LR[i][j].ele = (cubic_spline_coef*)
+                        smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
                               "Make_LR_Lookup_Table::LR[i][j].ele" );
-                    LR[i][j].CEclmb = (cubic_spline_coef*)
-                        smalloc( LR[i][j].n * sizeof(cubic_spline_coef),
+                    workspace->LR[i][j].CEclmb = (cubic_spline_coef*)
+                        smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef),
                               "Make_LR_Lookup_Table::LR[i][j].CEclmb" );
 
                     for ( r = 1; r <= control->tabulate; ++r )
                     {
-                        LR_vdW_Coulomb( system, control, i, j, r * dr, &(LR[i][j].y[r]) );
-                        h[r] = LR[i][j].dx;
-                        fh[r] = LR[i][j].y[r].H;
-                        fvdw[r] = LR[i][j].y[r].e_vdW;
-                        fCEvd[r] = LR[i][j].y[r].CEvd;
-                        fele[r] = LR[i][j].y[r].e_ele;
-                        fCEclmb[r] = LR[i][j].y[r].CEclmb;
+                        LR_vdW_Coulomb( system, control, i, j, r * dr,
+                                &(workspace->LR[i][j].y[r]) );
+                        h[r] = workspace->LR[i][j].dx;
+                        fh[r] = workspace->LR[i][j].y[r].H;
+                        fvdw[r] = workspace->LR[i][j].y[r].e_vdW;
+                        fCEvd[r] = workspace->LR[i][j].y[r].CEvd;
+                        fele[r] = workspace->LR[i][j].y[r].e_ele;
+                        fCEclmb[r] = workspace->LR[i][j].y[r].CEclmb;
 
                         if ( r == 1 )
                         {
-                            v0_vdw = LR[i][j].y[r].CEvd;
-                            v0_ele = LR[i][j].y[r].CEclmb;
+                            v0_vdw = workspace->LR[i][j].y[r].CEvd;
+                            v0_ele = workspace->LR[i][j].y[r].CEclmb;
                         }
                         else if ( r == control->tabulate )
                         {
-                            vlast_vdw = LR[i][j].y[r].CEvd;
-                            vlast_ele = LR[i][j].y[r].CEclmb;
+                            vlast_vdw = workspace->LR[i][j].y[r].CEvd;
+                            vlast_ele = workspace->LR[i][j].y[r].CEclmb;
                         }
                     }
 
                     Natural_Cubic_Spline( h, fh,
-                            &(LR[i][j].H[1]), control->tabulate + 1 );
+                            &(workspace->LR[i][j].H[1]), control->tabulate + 1 );
 
 //                    fprintf( stderr, "%-6s  %-6s  %-6s\n", "r", "h", "fh" );
 //                    for( r = 1; r <= control->tabulate; ++r )
 //                        fprintf( stderr, "%f  %f  %f\n", r * dr, h[r], fh[r] );
 
                     Complete_Cubic_Spline( h, fvdw, v0_vdw, vlast_vdw,
-                            &(LR[i][j].vdW[1]), control->tabulate + 1 );
+                            &(workspace->LR[i][j].vdW[1]), control->tabulate + 1 );
 
 //                    fprintf( stderr, "%-6s  %-6s  %-6s\n", "r", "h", "fvdw" );
 //                    for( r = 1; r <= control->tabulate; ++r )
@@ -378,7 +380,7 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control )
 //                    fprintf( stderr, "v0_vdw: %f, vlast_vdw: %f\n", v0_vdw, vlast_vdw );
 
                     Natural_Cubic_Spline( h, fCEvd,
-                            &(LR[i][j].CEvd[1]), control->tabulate + 1 );
+                            &(workspace->LR[i][j].CEvd[1]), control->tabulate + 1 );
 
 //                    fprintf( stderr, "%-6s  %-6s  %-6s\n", "r", "h", "fele" );
 //                    for( r = 1; r <= control->tabulate; ++r )
@@ -386,7 +388,7 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control )
 //                    fprintf( stderr, "v0_ele: %f, vlast_ele: %f\n", v0_ele, vlast_ele );
 
                     Complete_Cubic_Spline( h, fele, v0_ele, vlast_ele,
-                            &(LR[i][j].ele[1]), control->tabulate + 1 );
+                            &(workspace->LR[i][j].ele[1]), control->tabulate + 1 );
 
 //                    fprintf( stderr, "%-6s  %-6s  %-6s\n", "r", "h", "fele" );
 //                    for( r = 1; r <= control->tabulate; ++r )
@@ -394,7 +396,7 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control )
 //                    fprintf( stderr, "v0_ele: %f, vlast_ele: %f\n", v0_ele, vlast_ele );
 
                     Natural_Cubic_Spline( h, fCEclmb,
-                            &(LR[i][j].CEclmb[1]), control->tabulate + 1 );
+                            &(workspace->LR[i][j].CEclmb[1]), control->tabulate + 1 );
                 }
             }
         }
@@ -410,7 +412,7 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control )
      for( r = 1; r <= 100; ++r ) {
      rand_dist = (real)rand()/RAND_MAX * control->r_cut;
      LR_vdW_Coulomb( system, control, i, j, rand_dist, &y );
-     LR_Lookup( &(LR[i][j]), rand_dist, &y_spline );
+     LR_Lookup( &(workspace->LR[i][j]), rand_dist, &y_spline );
 
      evdw_abserr = FABS(y.e_vdW - y_spline.e_vdW);
      evdw_relerr = FABS(evdw_abserr / y.e_vdW);
@@ -457,7 +459,8 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control )
 }
 
 
-void Finalize_LR_Lookup_Table( reax_system *system, control_params *control )
+void Finalize_LR_Lookup_Table( reax_system *system, control_params *control,
+       static_storage *workspace )
 {
     int i, j;
     int num_atom_types;
@@ -482,20 +485,20 @@ void Finalize_LR_Lookup_Table( reax_system *system, control_params *control )
             {
                 if ( existing_types[j] )
                 {
-                    sfree( LR[i][j].y, "Finalize_LR_Lookup_Table::LR[i][j].y" );
-                    sfree( LR[i][j].H, "Finalize_LR_Lookup_Table::LR[i][j].H" );
-                    sfree( LR[i][j].vdW, "Finalize_LR_Lookup_Table::LR[i][j].vdW" );
-                    sfree( LR[i][j].CEvd, "Finalize_LR_Lookup_Table::LR[i][j].CEvd" );
-                    sfree( LR[i][j].ele, "Finalize_LR_Lookup_Table::LR[i][j].ele" );
-                    sfree( LR[i][j].CEclmb, "Finalize_LR_Lookup_Table::LR[i][j].CEclmb" );
+                    sfree( workspace->LR[i][j].y, "Finalize_LR_Lookup_Table::LR[i][j].y" );
+                    sfree( workspace->LR[i][j].H, "Finalize_LR_Lookup_Table::LR[i][j].H" );
+                    sfree( workspace->LR[i][j].vdW, "Finalize_LR_Lookup_Table::LR[i][j].vdW" );
+                    sfree( workspace->LR[i][j].CEvd, "Finalize_LR_Lookup_Table::LR[i][j].CEvd" );
+                    sfree( workspace->LR[i][j].ele, "Finalize_LR_Lookup_Table::LR[i][j].ele" );
+                    sfree( workspace->LR[i][j].CEclmb, "Finalize_LR_Lookup_Table::LR[i][j].CEclmb" );
                 }
             }
         }
 
-        sfree( LR[i], "Finalize_LR_Lookup_Table::LR[i]" );
+        sfree( workspace->LR[i], "Finalize_LR_Lookup_Table::LR[i]" );
     }
 
-    sfree( LR, "Finalize_LR_Lookup_Table::LR" );
+    sfree( workspace->LR, "Finalize_LR_Lookup_Table::LR" );
 }
 
 
diff --git a/sPuReMD/src/lookup.h b/sPuReMD/src/lookup.h
index 29720cfbc05b8522d62f209d42be3bd744b61b2c..6fbd6ffafb921fc41ff17be571e1fb7892f6dac1 100644
--- a/sPuReMD/src/lookup.h
+++ b/sPuReMD/src/lookup.h
@@ -35,9 +35,11 @@ int Lookup_Index_Of( real, lookup_table* );
 
 real Lookup( real, lookup_table* );
 
-void Make_LR_Lookup_Table( reax_system*, control_params* );
+void Make_LR_Lookup_Table( reax_system*, control_params*,
+       static_storage* );
 
-void Finalize_LR_Lookup_Table( reax_system *, control_params * );
+void Finalize_LR_Lookup_Table( reax_system*, control_params*,
+       static_storage* );
 
 
 #endif
diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h
index 38d1e35c5c7e26990d777315c491cb653779f7fd..e46d217c766685ed6c6d6153f83e2679e100b6b0 100644
--- a/sPuReMD/src/mytypes.h
+++ b/sPuReMD/src/mytypes.h
@@ -869,6 +869,61 @@ typedef struct
 } reallocate_data;
 
 
+typedef struct
+{
+    real H;
+    real e_vdW;
+    real CEvd;
+    real e_ele;
+    real CEclmb;
+} LR_data;
+
+
+typedef struct
+{
+    real a;
+    real b;
+    real c;
+    real d;
+} cubic_spline_coef;
+
+
+typedef struct
+{
+    real xmin;
+    real xmax;
+    int n;
+    real dx;
+    real inv_dx;
+    real a;
+
+    real m;
+    real c;
+
+    real *y;
+} lookup_table;
+
+
+typedef struct
+{
+    real xmin;
+    real xmax;
+    int n;
+    real dx;
+    real inv_dx;
+    real a;
+    real m;
+    real c;
+
+    LR_data *y;
+    cubic_spline_coef *H;
+    cubic_spline_coef *vdW;
+    cubic_spline_coef *CEvd;
+    cubic_spline_coef *ele;
+    cubic_spline_coef *CEclmb;
+} LR_lookup_table;
+
+
 typedef struct
 {
     /* bond order related storage */
@@ -948,6 +1003,8 @@ typedef struct
 
     reallocate_data realloc;
 
+    LR_lookup_table **LR;
+
 #ifdef TEST_FORCES
     rvec *f_ele;
     rvec *f_vdw;
@@ -1067,59 +1124,15 @@ typedef struct
 } output_controls;
 
 
-typedef struct
-{
-    real H;
-    real e_vdW;
-    real CEvd;
-    real e_ele;
-    real CEclmb;
-} LR_data;
-
-
-typedef struct
-{
-    real a;
-    real b;
-    real c;
-    real d;
-} cubic_spline_coef;
-
-
-typedef struct
-{
-    real xmin;
-    real xmax;
-    int n;
-    real dx;
-    real inv_dx;
-    real a;
-
-    real m;
-    real c;
-
-    real *y;
-} lookup_table;
-
+/* Function pointer definitions */
+typedef void (*interaction_function)(reax_system*, control_params*,
+        simulation_data*, static_storage*, reax_list**, output_controls*);
 
-typedef struct
-{
-    real xmin;
-    real xmax;
-    int n;
-    real dx;
-    real inv_dx;
-    real a;
-    real m;
-    real c;
+typedef void (*evolve_function)(reax_system*, control_params*,
+        simulation_data*, static_storage*,
+        reax_list**, output_controls*, interaction_function*);
 
-    LR_data *y;
-    cubic_spline_coef *H;
-    cubic_spline_coef *vdW;
-    cubic_spline_coef *CEvd;
-    cubic_spline_coef *ele;
-    cubic_spline_coef *CEclmb;
-} LR_lookup_table;
+typedef void (*callback_function)(reax_atom*, simulation_data*, reax_list*);
 
 
 /* Handle for working with an instance of the sPuReMD library */
@@ -1139,20 +1152,11 @@ typedef struct
     output_controls *out_control;
     /* TRUE if file I/O for simulation output enabled, FALSE otherwise */
     int output_enabled;
+    /* */
+    interaction_function interaction_functions[NO_OF_INTERACTIONS];
+    /* Callback for getting simulation state at the end of each time step */
+    callback_function callback;
 } spuremd_handle;
 
 
-/* Function pointer definitions */
-typedef void (*interaction_function)(reax_system*, control_params*,
-        simulation_data*, static_storage*, reax_list**, output_controls*);
-
-typedef void (*evolve_function)(reax_system*, control_params*,
-        simulation_data*, static_storage*,
-        reax_list**, output_controls*);
-
-
-/* Global variables */
-LR_lookup_table **LR;
-
-
 #endif
diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c
index d37fe5ea32a8f1829c43d3a86fbe85978f3c6d09..30c071251bdfcfb9a55f147dbeab16b50994132b 100644
--- a/sPuReMD/src/print_utils.c
+++ b/sPuReMD/src/print_utils.c
@@ -604,6 +604,7 @@ void Output_Results( reax_system *system, control_params *control,
         data->E_Ang + data->E_Pen + data->E_Coa + data->E_HB +
         data->E_Tor + data->E_Con + data->E_vdW + data->E_Ele + data->E_Pol;
 
+
     data->E_Tot = data->E_Pot + E_CONV * data->E_Kin;
 
     /* output energies if it is the time */
@@ -723,6 +724,24 @@ void Output_Results( reax_system *system, control_params *control,
         //t_elapsed = Get_Timing_Info( t_start );
         //fprintf(stdout, "append_frame took %.6f seconds\n", t_elapsed );
     }
+
+    if ( IS_NAN_REAL(data->E_Pol) )
+    {
+        fprintf( stderr, "[ERROR] NaN detected for polarization energy. Terminating...\n" );
+        exit( NUMERIC_BREAKDOWN );
+    }
+
+    if ( IS_NAN_REAL(data->E_Pot) )
+    {
+        fprintf( stderr, "[ERROR] NaN detected for potential energy. Terminating...\n" );
+        exit( NUMERIC_BREAKDOWN );
+    }
+
+    if ( IS_NAN_REAL(data->E_Tot) )
+    {
+        fprintf( stderr, "[ERROR] NaN detected for total energy. Terminating...\n" );
+        exit( NUMERIC_BREAKDOWN );
+    }
 }
 
 
diff --git a/sPuReMD/src/spuremd.c b/sPuReMD/src/spuremd.c
index 5eca7537520ab593712d9bf8408a94a2b8dc9deb..f18e71d521d5e0195db3957bb29bfb760d4b94bb 100644
--- a/sPuReMD/src/spuremd.c
+++ b/sPuReMD/src/spuremd.c
@@ -21,8 +21,6 @@
 
 #include "spuremd.h"
 
-#include "mytypes.h"
-
 #include "analyze.h"
 #include "control.h"
 #include "ffield.h"
@@ -172,6 +170,7 @@ void* setup( const char * const geo_file, const char * const ffield_file,
            "Setup::spmd_handle->out_control" );
 
     spmd_handle->output_enabled = TRUE;
+    spmd_handle->callback = NULL;
 
     /* parse geometry file */
     Read_System( geo_file, ffield_file, control_file,
@@ -185,6 +184,25 @@ void* setup( const char * const geo_file, const char * const ffield_file,
 }
 
 
+int setup_callback( const void * const handle, const callback_function callback  )
+{
+    int ret;
+    spuremd_handle *spmd_handle;
+
+
+    ret = SPUREMD_FAILURE;
+
+    if ( handle != NULL && callback != NULL )
+    {
+        spmd_handle = (spuremd_handle*) handle;
+        spmd_handle->callback = callback;
+        ret = SPUREMD_SUCCESS;
+    }
+
+    return ret;
+}
+
+
 int simulate( const void * const handle )
 {
     int steps, ret;
@@ -199,20 +217,24 @@ int simulate( const void * const handle )
 
         Initialize( spmd_handle->system, spmd_handle->control, spmd_handle->data,
                 spmd_handle->workspace, &spmd_handle->lists,
-                spmd_handle->out_control, &Evolve,
+                spmd_handle->out_control, &Evolve, spmd_handle->interaction_functions,
                 spmd_handle->output_enabled );
 
         /* compute f_0 */
         //if( control.restart == 0 ) {
         Reset( spmd_handle->system, spmd_handle->control, spmd_handle->data,
                 spmd_handle->workspace, &spmd_handle->lists );
+
         Generate_Neighbor_Lists( spmd_handle->system, spmd_handle->control, spmd_handle->data,
                 spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control );
 
         //fprintf( stderr, "total: %.2f secs\n", data.timing.nbrs);
         Compute_Forces( spmd_handle->system, spmd_handle->control, spmd_handle->data,
-                spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control );
+                spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control,
+                spmd_handle->interaction_functions );
+
         Compute_Kinetic_Energy( spmd_handle->system, spmd_handle->data );
+
         if ( spmd_handle->output_enabled == TRUE )
         {
             Output_Results( spmd_handle->system, spmd_handle->control, spmd_handle->data,
@@ -230,7 +252,9 @@ int simulate( const void * const handle )
             }
 
             Evolve( spmd_handle->system, spmd_handle->control, spmd_handle->data,
-                    spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control );
+                    spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control,
+                    spmd_handle->interaction_functions );
+
             Post_Evolve( spmd_handle->system, spmd_handle->control, spmd_handle->data,
                     spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control );
 
@@ -250,6 +274,12 @@ int simulate( const void * const handle )
                 Write_Restart( spmd_handle->system, spmd_handle->control, spmd_handle->data,
                         spmd_handle->workspace, spmd_handle->out_control );
             }
+
+            if ( spmd_handle->callback != NULL )
+            {
+                spmd_handle->callback( spmd_handle->system->atoms, spmd_handle->data,
+                        spmd_handle->lists );
+            }
         }
 
         if ( spmd_handle->out_control->write_steps > 0 && spmd_handle->output_enabled == TRUE )
@@ -261,6 +291,7 @@ int simulate( const void * const handle )
 
         spmd_handle->data->timing.end = Get_Time( );
         spmd_handle->data->timing.elapsed = Get_Timing_Info( spmd_handle->data->timing.start );
+
         if ( spmd_handle->output_enabled == TRUE )
         {
             fprintf( spmd_handle->out_control->log, "total: %.2f secs\n", spmd_handle->data->timing.elapsed );
@@ -302,3 +333,20 @@ int cleanup( const void * const handle )
 
     return ret;
 }
+
+
+reax_atom* get_atoms( const void * const handle )
+{
+    spuremd_handle *spmd_handle;
+    reax_atom *atoms;
+
+    atoms = NULL;
+
+    if ( handle != NULL )
+    {
+        spmd_handle = (spuremd_handle*) handle;
+        atoms = spmd_handle->system->atoms;
+    }
+
+    return atoms;
+}
diff --git a/sPuReMD/src/spuremd.h b/sPuReMD/src/spuremd.h
index 7ed56288f8f26bce4505cbbf98babb42a9d4d658..ed2b98d3cc21ab0a68d5195d4e33ffe422685dfd 100644
--- a/sPuReMD/src/spuremd.h
+++ b/sPuReMD/src/spuremd.h
@@ -26,12 +26,19 @@
 #define SPUREMD_FAILURE (-1)
 
 
+#include "mytypes.h"
+
+
 void* setup( const char * const, const char * const,
         const char * const );
 
+int setup_callback( const void * const, const callback_function );
+
 int simulate( const void * const );
 
 int cleanup( const void * const );
 
+reax_atom* get_atoms( const void * const );
+
 
 #endif
diff --git a/sPuReMD/src/two_body_interactions.c b/sPuReMD/src/two_body_interactions.c
index e533d1869efdf5152853d65f5c192ddb856248b2..664d7c0f3088d1b11beceb6e6a2647c49fb46cb6 100644
--- a/sPuReMD/src/two_body_interactions.c
+++ b/sPuReMD/src/two_body_interactions.c
@@ -571,7 +571,7 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control,
                     self_coef = (i == j) ? 0.5 : 1.0;
                     tmin = MIN( type_i, type_j );
                     tmax = MAX( type_i, type_j );
-                    t = &( LR[tmin][tmax] );
+                    t = &( workspace->LR[tmin][tmax] );
 
                     /* Cubic Spline Interpolation */
                     r = (int)(r_ij * t->inv_dx);
diff --git a/sPuReMD/src/vector.h b/sPuReMD/src/vector.h
index 557057489b3a52a407836360046b6a9e80d9c8df..f150b84cc933bb4f1f23d400473546794ce4cd00 100644
--- a/sPuReMD/src/vector.h
+++ b/sPuReMD/src/vector.h
@@ -48,8 +48,6 @@ static inline int Vector_isZero( const real * const v, const unsigned int k )
     }
 
 #ifdef _OPENMP
-    #pragma omp barrier
-
     #pragma omp for simd reduction(&&: ret_omp) schedule(simd:static)
 #endif
     for ( i = 0; i < k; ++i )
@@ -60,10 +58,6 @@ static inline int Vector_isZero( const real * const v, const unsigned int k )
         }
     }
 
-#ifdef _OPENMP
-    #pragma omp barrier
-#endif
-
     return ret_omp;
 }
 
@@ -154,8 +148,6 @@ static inline real Dot( const real * const v1, const real * const v2,
     }
 
 #ifdef _OPENMP
-    #pragma omp barrier
-
     #pragma omp for simd reduction(+: ret2_omp) schedule(simd:static)
 #endif
     for ( i = 0; i < k; ++i )
@@ -163,10 +155,6 @@ static inline real Dot( const real * const v1, const real * const v2,
         ret2_omp += v1[i] * v2[i];
     }
 
-#ifdef _OPENMP
-    #pragma omp barrier
-#endif
-
     return ret2_omp;
 }
 
@@ -183,8 +171,6 @@ static inline real Norm( const real * const v1, const unsigned int k )
     }
 
 #ifdef _OPENMP
-    #pragma omp barrier
-
     #pragma omp for simd reduction(+: ret2_omp) schedule(simd:static)
 #endif
     for ( i = 0; i < k; ++i )
@@ -193,18 +179,12 @@ static inline real Norm( const real * const v1, const unsigned int k )
     }
 
 #ifdef _OPENMP
-    #pragma omp barrier
-
     #pragma omp single
 #endif
     {
         ret2_omp = SQRT( ret2_omp );
     }
 
-#ifdef _OPENMP
-    #pragma omp barrier
-#endif
-
     return ret2_omp;
 }
 
@@ -459,7 +439,8 @@ static inline void rvec_Cross( rvec ret, const rvec v1, const rvec v2 )
 #endif
     for ( i = 0; i < 3; ++i )
     {
-        ret[i] = v1[(i + 1) % 3] * v2[(i + 2) % 3] - v1[(i + 2) % 3] * v2[(i + 1) % 3];
+        ret[i] = v1[(i + 1) % 3] * v2[(i + 2) % 3]
+            - v1[(i + 2) % 3] * v2[(i + 1) % 3];
     }
 
 //    ret[0] = v1[1] * v2[2] - v1[2] * v2[1];