diff --git a/PG-PuReMD/src/cuda_environment.cu b/PG-PuReMD/src/cuda_environment.cu
index dbbaba9bfad47c8d794f6f593a98cbebdbaf72bd..24898ec57ea754e0658d63193b264ea78c406c14 100644
--- a/PG-PuReMD/src/cuda_environment.cu
+++ b/PG-PuReMD/src/cuda_environment.cu
@@ -1,25 +1,28 @@
-
 #include "cuda_environment.h"
-
 #include "cuda_utils.h"
 
 extern "C" void Setup_Cuda_Environment (int rank, int nprocs, int gpus_per_node)
 {
 
-    int deviceCount = 0;
-    cudaGetDeviceCount (&deviceCount);
+    int deviceCount;
+    cudaError_t flag;
+    
+    flag = cudaGetDeviceCount (&deviceCount);
+
+    if ( flag != cudaSuccess )
+    {
+        fprintf( stderr, "ERROR: no CUDA capable device(s) found. Terminating...\n" );
+        exit( CANNOT_INITIALIZE );
+    }
 
     //Calculate the # of GPUs per processor
     //and assign the GPU for each process
-
-    //hpcc changes
-    //if (gpus_per_node == 2) {
+    //TODO: handle condition where # CPU procs > # GPUs
     cudaSetDevice ( (rank % (deviceCount)) );
-    //cudaSetDevice( 1 );
+
+#if defined(__CUDA_DEBUG__)
     fprintf( stderr, "p:%d is using GPU: %d \n", rank, (rank % deviceCount));
-    //} else {
-    //    cudaSetDevice ( 0 );
-    //}
+#endif
 
     ///////////////////////////////////////////////
     ///////////////////////////////////////////////
@@ -36,9 +39,9 @@ extern "C" void Setup_Cuda_Environment (int rank, int nprocs, int gpus_per_node)
     ///////////////////////////////////////////////
     ///////////////////////////////////////////////
     ///////////////////////////////////////////////
-
 }
 
+
 extern "C" void Cleanup_Cuda_Environment ()
 {
     cudaDeviceReset ();
diff --git a/PG-PuReMD/src/parallelreax.c b/PG-PuReMD/src/parallelreax.c
index 2f4b19dff34ca4e52f8d5b6e1368247ffdbbdadf..40cc9c3b96eea09bbf1074485c368beb7af7f9c5 100644
--- a/PG-PuReMD/src/parallelreax.c
+++ b/PG-PuReMD/src/parallelreax.c
@@ -92,7 +92,7 @@ void Read_System( char *geo_file, char *ffield_file, char *control_file,
     else
     {
         fprintf( stderr, "unknown geo file format. terminating!\n" );
-        MPI_Abort( MPI_COMM_WORLD, UNKNOWN_OPTION );
+        MPI_Abort( MPI_COMM_WORLD, INVALID_GEO );
     }
 }
 
@@ -170,6 +170,12 @@ void init_blocks (reax_system *system)
 #endif
 
 
+static void usage(char* argv[])
+{
+    fprintf(stderr, "usage: ./%s geometry ffield control\n", argv[0]);
+}
+
+
 int main( int argc, char* argv[] )
 {
     reax_system *system;
@@ -183,6 +189,12 @@ int main( int argc, char* argv[] )
     real t_start = 0, t_elapsed;
     real t_begin, t_end;
 
+    if ( argc != 4 )
+    {
+        usage(argv);
+        exit( INVALID_INPUT );
+    }
+
 #ifdef HAVE_CUDA
 
     /* Remove this debug information later */
diff --git a/PuReMD/src/geo_tools.c b/PuReMD/src/geo_tools.c
index 30139912468b045f54110c84a92523ab2d39703f..c037840bb15f962717c17363a50c38603f99b31d 100644
--- a/PuReMD/src/geo_tools.c
+++ b/PuReMD/src/geo_tools.c
@@ -164,19 +164,17 @@ int Read_Box_Info( reax_system *system, FILE *geo, int geo_format )
 
     switch ( geo_format )
     {
-    case PDB:
-        cryst = "CRYST1";
-        break;
-    default:
-        cryst = "BOX";
+        case PDB:
+            cryst = "CRYST1";
+            break;
+        default:
+            cryst = "BOX";
     }
 
     /* locate the cryst line in the geo file, read it and
        initialize the big box */
-    while ( !feof( geo ) )
+    while ( fgets( line, MAX_LINE, geo ) )
     {
-        fgets( line, MAX_LINE, geo );
-
         if ( strncmp( line, cryst, 6 ) == 0 )
         {
             if ( geo_format == PDB )
@@ -193,6 +191,10 @@ int Read_Box_Info( reax_system *system, FILE *geo, int geo_format )
             return SUCCESS;
         }
     }
+    if ( ferror( geo ) )
+    {
+        return FAILURE;
+    }
 
     return FAILURE;
 }
@@ -310,15 +312,11 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control,
     c  = 0;
     c1 = 0;
     top = 0;
-    while ( !feof( pdb ) )
-    {
-        /* clear previous input line */
-        s[0] = 0;
-        for ( i = 0; i < c1; ++i )
-            tmp[i][0] = 0;
+    s[0] = 0;
 
+    while ( fgets( s, MAX_LINE, pdb ) )
+    {
         /* read new line and tokenize it */
-        fgets( s, MAX_LINE, pdb );
         strncpy( s1, s, MAX_LINE - 1 );
         c1 = Tokenize( s, &tmp );
 
@@ -474,6 +472,15 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control,
                 // fprintf( stderr, "\n" );
             }
         }
+
+        /* clear previous input line */
+        s[0] = 0;
+        for ( i = 0; i < c1; ++i )
+            tmp[i][0] = 0;
+    }
+    if ( ferror( pdb ) )
+    {
+        return FAILURE;
     }
 
     fclose( pdb );
diff --git a/PuReMD/src/parallelreax.c b/PuReMD/src/parallelreax.c
index 4a0420744a6c2aa8a79dbbb22b114890ac6fcb2c..de9eade3f86f23f5ce49f29f4d7641e6fc4f28ae 100644
--- a/PuReMD/src/parallelreax.c
+++ b/PuReMD/src/parallelreax.c
@@ -51,9 +51,13 @@ void Read_System( char *geo_file, char *ffield_file, char *control_file,
 
     /* geo file */
     if ( control->geo_format == CUSTOM )
+    {
         Read_Geo( geo_file, system, control, data, workspace, mpi_data );
+    }
     else if ( control->geo_format == PDB )
+    {
         Read_PDB( geo_file, system, control, data, workspace, mpi_data );
+    }
     else if ( control->geo_format == ASCII_RESTART )
     {
         Read_Restart( geo_file, system, control, data, workspace, mpi_data );
@@ -103,6 +107,11 @@ void Post_Evolve( reax_system* system, control_params* control,
     Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D );
 }
 
+static void usage(char* argv[])
+{
+    fprintf(stderr, "usage: ./%s geometry ffield control\n", argv[0]);
+}
+
 
 int main( int argc, char* argv[] )
 {
@@ -116,6 +125,12 @@ int main( int argc, char* argv[] )
     int i;
     real t_start = 0, t_elapsed;
 
+    if ( argc != 4 )
+    {
+        usage(argv);
+        exit( INVALID_INPUT );
+    }
+
     /* allocated main datastructures */
     system = (reax_system *)
              smalloc( sizeof(reax_system), "system", MPI_COMM_WORLD );
diff --git a/PuReMD/src/reax_defs.h b/PuReMD/src/reax_defs.h
index 3cc4b61d7aabb4ce5222ef60582c07a24408532e..a61ce245a5fc29c580158f4f425805dd16506c5a 100644
--- a/PuReMD/src/reax_defs.h
+++ b/PuReMD/src/reax_defs.h
@@ -102,24 +102,24 @@
 
 
 /******************* ENUMERATIONS *************************/
-enum geo_formats { CUSTOM, PDB, ASCII_RESTART, BINARY_RESTART, GF_N };
+enum geo_formats { CUSTOM = 0, PDB = 1, ASCII_RESTART = 2, BINARY_RESTART = 3, GF_N = 4 };
 
-enum restart_formats { WRITE_ASCII, WRITE_BINARY, RF_N };
+enum restart_formats { WRITE_ASCII = 0, WRITE_BINARY = 1, RF_N = 2 };
 
-enum ensembles { NVE, bNVT, nhNVT, sNPT, iNPT, NPT, ens_N };
+enum ensembles { NVE = 0, bNVT = 1, nhNVT = 2, sNPT = 3, iNPT = 4, NPT = 5, ens_N = 6 };
 
-enum lists { BONDS, OLD_BONDS, THREE_BODIES,
-             HBONDS, FAR_NBRS, DBOS, DDELTAS, LIST_N
+enum lists { BONDS = 0, OLD_BONDS = 1, THREE_BODIES = 2,
+             HBONDS = 3, FAR_NBRS = 4, DBOS = 5, DDELTAS = 6, LIST_N = 7
            };
 
-enum interactions { TYP_VOID, TYP_BOND, TYP_THREE_BODY,
-                    TYP_HBOND, TYP_FAR_NEIGHBOR, TYP_DBO, TYP_DDELTA, TYP_N
+enum interactions { TYP_VOID = 0, TYP_BOND = 1, TYP_THREE_BODY = 2,
+                    TYP_HBOND = 3, TYP_FAR_NEIGHBOR = 4, TYP_DBO = 5, TYP_DDELTA = 6, TYP_N = 7
                   };
 
-enum message_tags { INIT, UPDATE, BNDRY, UPDATE_BNDRY,
-                    EXC_VEC1, EXC_VEC2, DIST_RVEC2, COLL_RVEC2,
-                    DIST_RVECS, COLL_RVECS, INIT_DESCS, ATOM_LINES,
-                    BOND_LINES, ANGLE_LINES, RESTART_ATOMS, TAGS_N
+enum message_tags { INIT = 0, UPDATE = 1, BNDRY = 2, UPDATE_BNDRY = 3,
+                    EXC_VEC1 = 4, EXC_VEC2 = 5, DIST_RVEC2 = 6, COLL_RVEC2 = 7,
+                    DIST_RVECS = 8, COLL_RVECS = 9, INIT_DESCS = 10, ATOM_LINES = 11,
+                    BOND_LINES = 12, ANGLE_LINES = 13, RESTART_ATOMS = 14, TAGS_N = 15
                   };
 
 enum errors { FILE_NOT_FOUND = -10, UNKNOWN_ATOM_TYPE = -11,
@@ -128,7 +128,7 @@ enum errors { FILE_NOT_FOUND = -10, UNKNOWN_ATOM_TYPE = -11,
               INVALID_INPUT = -16, INVALID_GEO = -17
             };
 
-enum exchanges { NONE, NEAR_EXCH, FULL_EXCH };
+enum exchanges { NONE = 0, NEAR_EXCH = 1, FULL_EXCH = 2 };
 
 enum gcell_types { NO_NBRS = 0, NEAR_ONLY = 1, HBOND_ONLY = 2, FAR_ONLY = 4,
                    NEAR_HBOND = 3, NEAR_FAR = 5, HBOND_FAR = 6, FULL_NBRS = 7,
@@ -139,9 +139,9 @@ enum atoms { C_ATOM = 0, H_ATOM = 1, O_ATOM = 2, N_ATOM = 3,
              S_ATOM = 4, SI_ATOM = 5, GE_ATOM = 6, X_ATOM = 7
            };
 
-enum traj_methods { REG_TRAJ, MPI_TRAJ, TF_N };
+enum traj_methods { REG_TRAJ = 0, MPI_TRAJ = 1, TF_N = 2 };
 
-enum molecules { UNKNOWN, WATER };
+enum molecules { UNKNOWN = 0, WATER = 1 };
 
 
 #endif
diff --git a/PuReMD/src/tool_box.c b/PuReMD/src/tool_box.c
index 24c8c799bdd59f7acc7869264724b0e6e4e99ab4..02d9e3d9d6d083f3873b61d932f31b78964eb66e 100644
--- a/PuReMD/src/tool_box.c
+++ b/PuReMD/src/tool_box.c
@@ -165,9 +165,9 @@ inline int is_Inside_Box( simulation_box *box, rvec p )
     if ( p[0] < box->min[0] || p[0] >= box->max[0] ||
             p[1] < box->min[1] || p[1] >= box->max[1] ||
             p[2] < box->min[2] || p[2] >= box->max[2] )
-        return 0;
+        return FALSE;
 
-    return 1;
+    return TRUE;
 }
 
 
@@ -182,9 +182,9 @@ inline int iown_midpoint( simulation_box *box, rvec p1, rvec p2 )
     if ( midp[0] < box->min[0] || midp[0] >= box->max[0] ||
             midp[1] < box->min[1] || midp[1] >= box->max[1] ||
             midp[2] < box->min[2] || midp[2] >= box->max[2] )
-        return 0;
+        return FALSE;
 
-    return 1;
+    return TRUE;
 }
 
 
@@ -284,7 +284,7 @@ int is_Valid_Serial( storage *workspace, int serial )
     //  MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT );
     // }
 
-    return SUCCESS;
+    return TRUE;
 }
 
 
diff --git a/environ/param.gpu.water b/environ/param.gpu.water
index edd2784aa4f0a7a25df38e415d070f9cc392083c..7b3d00e95f1eba256b006f40283943d2bcbcbd57 100644
--- a/environ/param.gpu.water
+++ b/environ/param.gpu.water
@@ -14,12 +14,13 @@ bond_graph_cutoff       0.3                     ! bond strength cutoff for bond
 thb_cutoff              0.001                   ! cutoff value for three body interactions
 hbond_cutoff            7.50                    ! cutoff distance for hydrogen bond interactions
 
-qeq_solver_type         2                       ! method used to solve charge equilibration phase (QEq)
+qeq_solver_type         0                       ! method used to solve charge equilibration phase (QEq)
 qeq_solver_q_err        1e-6                    ! relative residual norm threshold used in solver
-pre_comp_type           2                       ! method used to compute QEq preconditioner, if applicable
+pre_comp_type           0                       ! method used to compute QEq preconditioner, if applicable
 pre_comp_refactor       100                     ! nsteps to recompute preconditioner
 pre_comp_droptol        0.0                     ! threshold tolerance for dropping values in preconditioner computation, if applicable
-pre_comp_sweeps         3                       ! sweeps to compute preconditioner (ILU_PAR/ICHOL_PAR)
+pre_comp_sweeps         3                       ! sweeps to compute preconditioner (ILU_PAR)
+pre_app_type    	1                       ! method used to apply QEq preconditioner
 pre_app_jacobi_iters    50                      ! number of Jacobi iterations used for applying QEq precondition, if applicable
 
 temp_init               0.0                     ! desired initial temperature of the simulated system
@@ -34,7 +35,7 @@ p_mass                  5000.00                 ! in fs, pressure inertia parame
 compress                0.008134                ! in ps^2 * A / amu ( 4.5X10^(-5) bar^(-1) )
 press_mode              0                       ! 0: internal + external pressure, 1: ext only, 2: int only
 
-geo_format              1                       ! 0: xyz, 1: pdb, 2: bgf
+geo_format              1                       ! 0: custom, 1: pdb, 2: bgf
 write_freq              0                       ! write trajectory after so many steps
 traj_compress           0                       ! 0: no compression  1: uses zlib to compress trajectory output
 traj_format             0                       ! 0: our own format (below options apply to this only), 1: xyz, 2: bgf, 3: pdb
diff --git a/sPuReMD/Makefile.am b/sPuReMD/Makefile.am
index 5315c55e00e655b3be1629a9037e9ac1d6c604db..22bfb4986956b78797bc6ceccbb4bbd64aabed72 100644
--- a/sPuReMD/Makefile.am
+++ b/sPuReMD/Makefile.am
@@ -1,10 +1,10 @@
 AM_CFLAGS += -Wall -O3 -funroll-loops -fstrict-aliasing
 
 bin_PROGRAMS = bin/spuremd
-bin_spuremd_SOURCES = src/grid.c src/list.c src/lookup.c src/print_utils.c \
-		  src/reset_utils.c src/restart.c src/random.c src/traj.c \
-		  src/vector.c src/allocate.c src/analyze.c src/box.c src/system_props.c \
-		  src/param.c src/pdb_tools.c src/neighbors.c src/GMRES.c src/QEq.c src/bond_orders.c \
+bin_spuremd_SOURCES = src/ffield.c src/grid.c src/list.c src/lookup.c src/print_utils.c \
+		  src/reset_utils.c src/restart.c src/random.c src/tool_box.c src/traj.c \
+		  src/vector.c src/allocate.c src/analyze.c src/box.c src/system_props.c src/control.c \
+		  src/geo_tools.c src/neighbors.c src/GMRES.c src/QEq.c src/bond_orders.c \
 		  src/single_body_interactions.c src/two_body_interactions.c \
 		  src/three_body_interactions.c src/four_body_interactions.c src/forces.c \
 		  src/integrate.c src/init_md.c src/testmd.c 
diff --git a/sPuReMD/src/GMRES.c b/sPuReMD/src/GMRES.c
index fae32bb8f66fe1d110503e1d71d0bfe3f1cb3365..c686ce887919d686c09665b140d9e5bb8ce174fe 100644
--- a/sPuReMD/src/GMRES.c
+++ b/sPuReMD/src/GMRES.c
@@ -19,13 +19,12 @@
   <http://www.gnu.org/licenses/>.
   ----------------------------------------------------------------------*/
 
-#include "allocate.h"
 #include "GMRES.h"
+#include "allocate.h"
 #include "list.h"
+#include "tool_box.h"
 #include "vector.h"
 
-#include <omp.h>
-
 
 typedef enum
 {
@@ -40,7 +39,7 @@ typedef enum
  *   x: vector
  *   b: vector (result) */
 static void Sparse_MatVec( const sparse_matrix * const A,
-                           const real * const x, real * const b )
+        const real * const x, real * const b )
 {
     int i, j, k, n, si, ei;
     real H;
@@ -66,7 +65,7 @@ static void Sparse_MatVec( const sparse_matrix * const A,
             {
                 if ( (b_local = (real*) malloc( omp_get_num_threads() * n * sizeof(real))) == NULL )
 		{
-                    exit( INSUFFICIENT_SPACE );
+                    exit( INSUFFICIENT_MEMORY );
 		}
             }
 
@@ -83,8 +82,8 @@ static void Sparse_MatVec( const sparse_matrix * const A,
 
             for ( k = si; k < ei; ++k )
             {
-                j = A->entries[k].j;
-                H = A->entries[k].val;
+                j = A->j[k];
+                H = A->val[k];
 #ifdef _OPENMP
                 b_local[tid * n + j] += H * x[i];
                 b_local[tid * n + i] += H * x[j];
@@ -96,9 +95,9 @@ static void Sparse_MatVec( const sparse_matrix * const A,
 
             // the diagonal entry is the last one in
 #ifdef _OPENMP
-            b_local[tid * n + i] += A->entries[k].val * x[i];
+            b_local[tid * n + i] += A->val[k] * x[i];
 #else
-            b[i] += A->entries[k].val * x[i];
+            b[i] += A->val[k] * x[i];
 #endif
         }
 #ifdef _OPENMP
@@ -126,8 +125,8 @@ static void Sparse_MatVec( const sparse_matrix * const A,
  *   b: vector (result)
  *   D^{-1}b (Dinv_b): precomputed vector-vector product */
 static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R,
-                            const TRIANGULARITY tri, const real * const Dinv,
-                            const real * const x, real * const b, const real * const Dinv_b)
+        const TRIANGULARITY tri, const real * const Dinv,
+        const real * const x, real * const b, const real * const Dinv_b)
 {
     int i, k, si = 0, ei = 0;
 #ifdef _OPENMP
@@ -151,7 +150,7 @@ static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R,
             {
                 if ( (b_local = (real*) malloc( omp_get_num_threads() * R->n * sizeof(real))) == NULL )
                 {
-                    exit( INSUFFICIENT_SPACE );
+                    exit( INSUFFICIENT_MEMORY );
                 }
 	    }
 
@@ -178,9 +177,9 @@ static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R,
             for ( k = si; k < ei; ++k )
             {
 #ifdef _OPENMP
-                b_local[tid * R->n + i] += R->entries[k].val * x[R->entries[k].j];
+                b_local[tid * R->n + i] += R->val[k] * x[R->j[k]];
 #else
-                b[i] += R->entries[k].val * x[R->entries[k].j];
+                b[i] += R->val[k] * x[R->j[k]];
 #endif
             }
 #ifdef _OPENMP
@@ -205,48 +204,211 @@ static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R,
 }
 
 
-/* solve sparse lower triangular linear system using forward substitution */
-static void Forward_Subs( const sparse_matrix * const L, const real * const b, real * const y )
+static void diag_pre_app( const real * const Hdia_inv, const real * const y,
+        real * const x, const int N )
+{
+    unsigned int i;
+
+    #pragma omp parallel for schedule(guided) \
+        default(none) private(i) shared(stderr)
+    for ( i = 0; i < N; ++i )
+    {
+        x[i] = y[i] * Hdia_inv[i];
+    }
+}
+
+
+/* Solve triangular system LU*x = y using level scheduling
+ *
+ * LU: lower/upper triangular, stored in CSR
+ * y: constants in linear system (RHS)
+ * x: solution
+ * tri: triangularity of LU (lower/upper)
+ * 
+ * Assumptions:
+ *   LU has non-zero diagonals
+ *   Each row of LU has at least one non-zero (i.e., no rows with all zeros) */
+static void tri_solve( const sparse_matrix * const LU, const real * const y,
+        real * const x, const TRIANGULARITY tri )
 {
     int i, pj, j, si, ei;
     real val;
 
-    for ( i = 0; i < L->n; ++i )
+    if ( tri == LOWER )
+    {
+        for ( i = 0; i < LU->n; ++i )
+        {
+            x[i] = y[i];
+            si = LU->start[i];
+            ei = LU->start[i + 1];
+            for ( pj = si; pj < ei - 1; ++pj )
+            {
+                j = LU->j[pj];
+                val = LU->val[pj];
+                x[i] -= val * x[j];
+            }
+            x[i] /= LU->val[pj];
+        }
+    }
+    else
     {
-        y[i] = b[i];
-        si = L->start[i];
-        ei = L->start[i + 1];
-        for ( pj = si; pj < ei - 1; ++pj )
+        for ( i = LU->n - 1; i >= 0; --i )
         {
-            // TODO: remove assignments? compiler optimizes away?
-            j = L->entries[pj].j;
-            val = L->entries[pj].val;
-            y[i] -= val * y[j];
+            x[i] = y[i];
+            si = LU->start[i];
+            ei = LU->start[i + 1];
+            for ( pj = si + 1; pj < ei; ++pj )
+            {
+                j = LU->j[pj];
+                val = LU->val[pj];
+                x[i] -= val * x[j];
+            }
+            x[i] /= LU->val[si];
         }
-        y[i] /= L->entries[pj].val;
     }
 }
 
 
-/* solve sparse upper triangular linear system using backward substitution */
-static void Backward_Subs( const sparse_matrix * const U, const real * const y, real * const x )
+/* Solve triangular system LU*x = y using level scheduling
+ *
+ * LU: lower/upper triangular, stored in CSR
+ * y: constants in linear system (RHS)
+ * x: solution
+ * tri: triangularity of LU (lower/upper)
+ * find_levels: perform level search if positive, otherwise reuse existing levels
+ * 
+ * Assumptions:
+ *   LU has non-zero diagonals
+ *   Each row of LU has at least one non-zero (i.e., no rows with all zeros) */
+static void tri_solve_level_sched( const sparse_matrix * const LU, const real * const y,
+        real * const x, const TRIANGULARITY tri, int find_levels )
 {
-    int i, pj, j, si, ei;
-    real val;
+    int i, j, pj, local_row, local_level, levels;
+    static int levels_L = 1, levels_U = 1;
+    static unsigned int *row_levels_L = NULL, *level_rows_L = NULL, *level_rows_cnt_L = NULL;
+    static unsigned int *row_levels_U = NULL, *level_rows_U = NULL, *level_rows_cnt_U = NULL;
+    unsigned int *row_levels, *level_rows, *level_rows_cnt;
 
-    for ( i = U->n - 1; i >= 0; --i )
+    if ( tri == LOWER )
+    {
+        row_levels = row_levels_L;
+        level_rows = level_rows_L;
+        level_rows_cnt = level_rows_cnt_L;
+        levels = levels_L;
+    }
+    else
+    {
+        row_levels = row_levels_U;
+        level_rows = level_rows_U;
+        level_rows_cnt = level_rows_cnt_U;
+        levels = levels_U;
+    }
+
+    if ( row_levels == NULL || level_rows == NULL || level_rows_cnt == NULL )
+    {
+        if ( (row_levels = (unsigned int*) malloc(LU->n * sizeof(unsigned int))) == NULL
+            || (level_rows = (unsigned int*) malloc(LU->n * LU->n * sizeof(unsigned int))) == NULL
+            || (level_rows_cnt = (unsigned int*) malloc(LU->n * sizeof(unsigned int))) == NULL )
+        {
+            fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" );
+            exit( INSUFFICIENT_MEMORY );
+        }
+    }
+
+    /* find levels (row dependencies in substitutions) */
+    if ( find_levels )
+    {
+        memset( row_levels, 0, LU->n * sizeof( unsigned int) );
+        memset( level_rows_cnt, 0, LU->n * sizeof( unsigned int) );
+
+        if ( tri == LOWER )
+        {
+            for ( i = 0; i < LU->n; ++i )
+            {
+                local_level = 0;
+                for ( pj = LU->start[i]; pj < LU->start[i + 1] - 1; ++pj )
+                {
+                    local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 );
+                }
+        
+                levels = MAX( levels, local_level + 1 );
+                row_levels[i] = local_level;
+                level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i;
+                ++level_rows_cnt[local_level];
+            }
+        }
+        else
+        {
+            for ( i = LU->n - 1; i >= 0; --i )
+            {
+                local_level = 0;
+                for ( pj = LU->start[i] + 1; pj < LU->start[i + 1]; ++pj )
+                {
+                    local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 );
+                }
+        
+                levels = MAX( levels, local_level + 1 );
+                row_levels[i] = local_level;
+                level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i;
+                ++level_rows_cnt[local_level];
+            }
+        }
+    }
+
+    /* perform substitutions by level */
+    if ( tri == LOWER )
+    {
+        for ( i = 0; i < levels; ++i )
+        {
+            #pragma omp parallel for schedule(guided) \
+                default(none) private(j, pj, local_row) shared(stderr, i, level_rows_cnt, level_rows)
+            for ( j = 0; j < level_rows_cnt[i]; ++j )
+            {
+                local_row = level_rows[i * LU->n + j];
+                x[local_row] = y[local_row];
+                for ( pj = LU->start[local_row]; pj < LU->start[local_row + 1] - 1; ++pj )
+                {
+                    x[local_row] -= LU->val[pj] * x[LU->j[pj]];
+    
+                }
+                x[local_row] /= LU->val[pj];
+            }
+        }
+    }
+    else
     {
-        x[i] = y[i];
-        si = U->start[i];
-        ei = U->start[i + 1];
-        for ( pj = si + 1; pj < ei; ++pj )
+        for ( i = 0; i < levels; ++i )
         {
-            // TODO: remove assignments? compiler optimizes away?
-            j = U->entries[pj].j;
-            val = U->entries[pj].val;
-            x[i] -= val * x[j];
+            #pragma omp parallel for schedule(guided) \
+                default(none) private(j, pj, local_row) shared(i, level_rows_cnt, level_rows)
+            for ( j = 0; j < level_rows_cnt[i]; ++j )
+            {
+                local_row = level_rows[i * LU->n + j];
+                x[local_row] = y[local_row];
+                for ( pj = LU->start[local_row] + 1; pj < LU->start[local_row + 1]; ++pj )
+                {
+                    x[local_row] -= LU->val[pj] * x[LU->j[pj]];
+    
+                }
+                x[local_row] /= LU->val[LU->start[local_row]];
+            }
         }
-        x[i] /= U->entries[si].val;
+    }
+
+    /* save level info for re-use if performing repeated triangular solves via preconditioning */
+    if ( tri == LOWER )
+    {
+        row_levels_L = row_levels;
+        level_rows_L = level_rows;
+        level_rows_cnt_L = level_rows_cnt;
+        levels_L = levels;
+    }
+    else
+    {
+        row_levels_U = row_levels;
+        level_rows_U = level_rows;
+        level_rows_cnt_U = level_rows_cnt;
+        levels_U = levels;
     }
 }
 
@@ -262,9 +424,9 @@ static void Backward_Subs( const sparse_matrix * const U, const real * const y,
  *
  * Note: Newmann series arises from series expansion of the inverse of
  * the coefficient matrix in the triangular system */
-static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri,
-                         const real * const Dinv, const unsigned int n,
-                         const real * const b, real * const x, const unsigned int maxiter )
+static void jacobi_iter( const sparse_matrix * const R, const real * const Dinv,
+        const real * const b, real * const x, const TRIANGULARITY tri,
+        const unsigned int maxiter )
 {
     unsigned int i, k, si = 0, ei = 0, iter = 0;
 #ifdef _OPENMP
@@ -286,26 +448,26 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri,
              * overhead per Sparse_MatVec call*/
             if ( Dinv_b == NULL )
             {
-                if ( (Dinv_b = (real*) malloc(sizeof(real) * n)) == NULL )
+                if ( (Dinv_b = (real*) malloc(sizeof(real) * R->n)) == NULL )
                 {
                     fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
-                    exit(INSUFFICIENT_SPACE);
+                    exit( INSUFFICIENT_MEMORY );
                 }
             }
             if ( rp == NULL )
             {
-                if ( (rp = (real*) malloc(sizeof(real) * n)) == NULL )
+                if ( (rp = (real*) malloc(sizeof(real) * R->n)) == NULL )
                 {
                     fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
-                    exit(INSUFFICIENT_SPACE);
+                    exit( INSUFFICIENT_MEMORY );
                 }
             }
             if ( rp2 == NULL )
             {
-                    if ( (rp2 = (real*) malloc(sizeof(real) * n)) == NULL )
+                    if ( (rp2 = (real*) malloc(sizeof(real) * R->n)) == NULL )
                 {
                     fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
-                    exit(INSUFFICIENT_SPACE);
+                    exit( INSUFFICIENT_MEMORY );
                 }
             }
 #ifdef _OPENMP
@@ -314,19 +476,19 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri,
                 if ( (b_local = (real*) malloc( omp_get_num_threads() * R->n * sizeof(real))) == NULL )
                 {
                     fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
-                    exit( INSUFFICIENT_SPACE );
+                    exit( INSUFFICIENT_MEMORY );
                 }
 	    }
 #endif
     
-            Vector_MakeZero( rp, n );
+            Vector_MakeZero( rp, R->n );
 	}
     
         #pragma omp barrier
     
         /* precompute and cache, as invariant in loop below */
         #pragma omp for schedule(guided)
-        for ( i = 0; i < n; ++i )
+        for ( i = 0; i < R->n; ++i )
         {
             Dinv_b[i] = Dinv[i] * b[i];
         }
@@ -355,7 +517,7 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri,
                     si = R->start[i];
                     ei = R->start[i + 1] - 1;
                 }
-                else if (tri == UPPER)
+                else
                 {
     
                     si = R->start[i] + 1;
@@ -365,9 +527,9 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri,
                 for ( k = si; k < ei; ++k )
                 {
 #ifdef _OPENMP
-                    b_local[tid * R->n + i] += R->entries[k].val * rp[R->entries[k].j];
+                    b_local[tid * R->n + i] += R->val[k] * rp[R->j[k]];
 #else
-                    rp2[i] += R->entries[k].val * rp[R->entries[k].j];
+                    rp2[i] += R->val[k] * rp[R->j[k]];
 #endif
                 }
 #ifdef _OPENMP
@@ -405,42 +567,198 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri,
         while ( iter < maxiter );
     }
 
-    Vector_Copy( x, rp, n );
+    Vector_Copy( x, rp, R->n );
 }
 
 
-/* generalized minimual residual iterative solver for sparse linear systems,
- * diagonal preconditioner */
-int GMRES( static_storage *workspace, sparse_matrix *H,
-           real *b, real tol, real *x, FILE *fout, real *time, real *spmv_time )
+/* Solve triangular system LU*x = y using level scheduling
+ *
+ * workspace: lower/upper triangular, stored in CSR
+ * y: constants in linear system (RHS)
+ * x: solution
+ * tri: triangularity of preconditoning factor LU (lower/upper), if using ILU-based
+ * pc_type: preconditioner computation method used
+ * pa_type: preconditioner application method to use
+ * extra: parameter for some application methods, if applicable
+ * 
+ * Assumptions:
+ *   LU has non-zero diagonals
+ *   Each row of LU has at least one non-zero (i.e., no rows with all zeros) */
+void apply_preconditioner( const static_storage * const workspace, real * const y,
+        real * const x, const TRIANGULARITY tri, int pc_type, int pa_type, int extra )
 {
-    int i, j, k, itr, N;
-    real cc, tmp1, tmp2, temp, bnorm;
-    struct timeval start, stop;
+    int i, si;
+    sparse_matrix *LU;
+    static real *Dinv_L = NULL, *Dinv_U = NULL;
+
+    if ( tri == LOWER )
+    {
+        LU = workspace->L;
+    }
+    else
+    {
+        LU = workspace->U;
+    }
+
+    if ( ( pc_type != DIAG_PC || tri != LOWER ) && LU == NULL )
+    {
+        return;
+    }
+
+    switch ( pa_type )
+    {
+        case NONE_PA:
+            break;
+        case TRI_SOLVE_PA:
+            switch ( pc_type )
+            {
+                case DIAG_PC:
+                    diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
+                    tri_solve( LU, y, x, tri );
+                    break;
+                default:
+                    break;
+            }
+            break;
+        case TRI_SOLVE_LEVEL_SCHED_PA:
+            switch ( pc_type )
+            {
+                case DIAG_PC:
+                    diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
+                    tri_solve_level_sched( LU, y, x, tri, extra );
+                    break;
+                default:
+                    break;
+            }
+            break;
+        case JACOBI_ITER_PA:
+            switch ( pc_type )
+            {
+                case DIAG_PC:
+                    fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                    exit( INVALID_INPUT );
+                    break;
+                case ICHOLT_PC:
+                case ILU_PAR_PC:
+                case ILUT_PAR_PC:
+                    if ( tri == LOWER )
+                    {
+                        if ( Dinv_L == NULL )
+                        {
+                            if ( (Dinv_L = (real*) malloc(sizeof(real) * LU->n)) == NULL )
+                            {
+                                fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
+                                exit( INSUFFICIENT_MEMORY );
+                            }
+                        }
+
+                        /* construct D^{-1} */
+                        for ( i = 0; i < LU->n; ++i )
+                        {
+                            si = LU->start[i + 1] - 1;
+                            Dinv_L[i] = 1. / LU->val[si];
+                        }
+
+                        jacobi_iter( LU, Dinv_L, y, x, tri, extra );
+                    }
+                    else
+                    {
+                        if ( Dinv_U == NULL )
+                        {
+                            if ( (Dinv_U = (real*) malloc(sizeof(real) * LU->n)) == NULL )
+                            {
+                                fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
+                                exit( INSUFFICIENT_MEMORY );
+                            }
+                        }
+
+                        /* construct D^{-1}_U */
+                        for ( i = 0; i < LU->n; ++i )
+                        {
+                            si = LU->start[i];
+                            Dinv_U[i] = 1. / LU->val[si];
+                        }
+
+                        jacobi_iter( LU, Dinv_U, y, x, tri, extra );
+                    }
+                    break;
+                default:
+                    break;
+            }
+            break;
+        default:
+            fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+            exit( INVALID_INPUT );
+            break;
+
+    }
+
+    return;
+}
+
+
+/* generalized minimual residual iterative solver for sparse linear systems */
+int GMRES( const static_storage * const workspace, const control_params * const control,
+        const sparse_matrix * const H, const real * const b, real tol, real * const x,
+        const FILE * const fout, real * const time, real * const spmv_time )
+{
+    int i, j, k, itr, N, si;
+    real cc, tmp1, tmp2, temp, bnorm, time_start;
 
     N = H->n;
     bnorm = Norm( b, N );
-    /* apply the diagonal pre-conditioner to rhs */
-    gettimeofday( &start, NULL );
-    for ( i = 0; i < N; ++i )
-        workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i];
-    gettimeofday( &stop, NULL );
-    *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-             - (start.tv_sec + start.tv_usec / 1000000.0);
+
+    if ( control->pre_comp_type == DIAG_PC )
+    {
+        /* apply precondioner to RHS */
+        apply_preconditioner( workspace, workspace->b, workspace->b_prc, LOWER,
+                control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters );
+    }
 
     /* GMRES outer-loop */
     for ( itr = 0; itr < MAX_ITR; ++itr )
     {
         /* calculate r0 */
-        gettimeofday( &start, NULL );
+        time_start = Get_Time( );
         Sparse_MatVec( H, x, workspace->b_prm );
-        gettimeofday( &stop, NULL );
-        *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                 - (start.tv_sec + start.tv_usec / 1000000.0);
-        for ( i = 0; i < N; ++i )
-            workspace->b_prm[i] *= workspace->Hdia_inv[i]; /* pre-conditioner */
+        *spmv_time += Get_Timing_Info( time_start );
+
+        if ( control->pre_comp_type == DIAG_PC )
+        {
+            time_start = Get_Time( );
+            apply_preconditioner( workspace, workspace->b_prm, workspace->b_prm, LOWER,
+                    control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters );
+            *time += Get_Timing_Info( time_start );
+        }
+
+
+        if ( control->pre_comp_type == DIAG_PC )
+        {
+            Vector_Sum( workspace->v[0], 1., workspace->b_prc, -1., workspace->b_prm, N );
+        }
+        else
+        {
+            Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N );
+        }
+
+        if ( control->pre_comp_type != DIAG_PC )
+        {
+            time_start = Get_Time( );
+            apply_preconditioner( workspace, workspace->v[0], workspace->v[0], LOWER,
+                    control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters );
+            apply_preconditioner( workspace, workspace->v[0], workspace->v[0], UPPER,
+                    control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters );
+            *time += Get_Timing_Info( time_start );
+        }
 
-        Vector_Sum(workspace->v[0], 1., workspace->b_prc, -1., workspace->b_prm, N);
         workspace->g[0] = Norm( workspace->v[0], N );
         Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N );
         //fprintf( stderr, "res: %.15e\n", workspace->g[0] );
@@ -449,26 +767,28 @@ int GMRES( static_storage *workspace, sparse_matrix *H,
         for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ )
         {
             /* matvec */
-            gettimeofday( &start, NULL );
+            time_start = Get_Time( );
             Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] );
-            gettimeofday( &stop, NULL );
-            *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                     - (start.tv_sec + start.tv_usec / 1000000.0);
-            /*pre-conditioner*/
-            gettimeofday( &start, NULL );
-            for ( k = 0; k < N; ++k )
-                workspace->v[j + 1][k] *= workspace->Hdia_inv[k];
-            gettimeofday( &stop, NULL );
-            *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                     - (start.tv_sec + start.tv_usec / 1000000.0);
-            //fprintf( stderr, "%d-%d: matvec done.\n", itr, j );
+            *spmv_time += Get_Timing_Info( time_start );
+
+            time_start = Get_Time( );
+            apply_preconditioner( workspace, workspace->v[j + 1], workspace->v[j + 1], LOWER,
+                    control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters );
+            apply_preconditioner( workspace, workspace->v[j + 1], workspace->v[j + 1], UPPER,
+                    control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters );
+            *time += Get_Timing_Info( time_start );
 
             /* apply modified Gram-Schmidt to orthogonalize the new residual */
-            for ( i = 0; i <= j; i++ )
+            for ( i = 0; i < j - 1; i++ )
+            {
+                workspace->h[i][j] = 0;
+            }
+
+            //for( i = 0; i <= j; i++ ) {
+            for ( i = MAX(j - 1, 0); i <= j; i++ )
             {
                 workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N );
-                Vector_Add( workspace->v[j + 1],
-                            -workspace->h[i][j], workspace->v[i], N );
+                Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N );
             }
 
             workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N );
@@ -476,9 +796,8 @@ int GMRES( static_storage *workspace, sparse_matrix *H,
                           1. / workspace->h[j + 1][j], workspace->v[j + 1], N );
             // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j );
 
-
             /* Givens rotations on the upper-Hessenberg matrix to make it U */
-            for ( i = 0; i <= j; i++ )
+            for ( i = MAX(j - 1, 0); i <= j; i++ )
             {
                 if ( i == j )
                 {
@@ -502,32 +821,41 @@ int GMRES( static_storage *workspace, sparse_matrix *H,
             workspace->g[j] = tmp1;
             workspace->g[j + 1] = tmp2;
 
-            // fprintf( stderr, "h: " );
-            // for( i = 0; i <= j+1; ++i )
-            //  fprintf( stderr, "%.6f ", workspace->h[i][j] );
-            // fprintf( stderr, "\n" );
+            //fprintf( stderr, "h: " );
+            //for( i = 0; i <= j+1; ++i )
+            //fprintf( stderr, "%.6f ", workspace->h[i][j] );
+            //fprintf( stderr, "\n" );
             //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] );
         }
 
 
-        /* solve Hy = g.
-           H is now upper-triangular, do back-substitution */
+        /* TODO: solve using Jacobi iteration? */
+        /* solve Hy = g: H is now upper-triangular, do back-substitution */
         for ( i = j - 1; i >= 0; i-- )
         {
             temp = workspace->g[i];
             for ( k = j - 1; k > i; k-- )
+            {
                 temp -= workspace->h[i][k] * workspace->y[k];
+            }
 
             workspace->y[i] = temp / workspace->h[i][i];
         }
 
         /* update x = x_0 + Vy */
+        Vector_MakeZero( workspace->p, N );
         for ( i = 0; i < j; i++ )
-            Vector_Add( x, workspace->y[i], workspace->v[i], N );
+        {
+            Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N );
+        }
+
+        Vector_Add( x, 1., workspace->p, N );
 
         /* stopping condition */
         if ( fabs(workspace->g[j]) / bnorm <= tol )
+        {
             break;
+        }
     }
 
     // Sparse_MatVec( H, x, workspace->b_prm );
@@ -553,9 +881,9 @@ int GMRES( static_storage *workspace, sparse_matrix *H,
 }
 
 
-
-int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H,
-                       real *b, real tol, real *x, FILE *fout)
+int GMRES_HouseHolder( const static_storage * const workspace, const control_params * const control,
+        const sparse_matrix * const H, const real * const b, real tol, real * const x,
+        const FILE * const fout, real * const time, real * const spmv_time )
 {
     int  i, j, k, itr, N;
     real cc, tmp1, tmp2, temp, bnorm;
@@ -743,330 +1071,7 @@ int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H,
 }
 
 
-/* generalized minimual residual iterative solver for sparse linear systems,
- * with preconditioner using factors LU \approx H
- * and forward / backward substitution */
-int PGMRES( static_storage *workspace, sparse_matrix *H, real *b, real tol,
-            sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout, real *time, real *spmv_time )
-{
-    int i, j, k, itr, N;
-    real cc, tmp1, tmp2, temp, bnorm;
-    struct timeval start, stop;
-
-    N = H->n;
-    bnorm = Norm( b, N );
-
-    /* GMRES outer-loop */
-    for ( itr = 0; itr < MAX_ITR; ++itr )
-    {
-        /* calculate r0 */
-        gettimeofday( &start, NULL );
-        Sparse_MatVec( H, x, workspace->b_prm );
-        gettimeofday( &stop, NULL );
-        *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                 - (start.tv_sec + start.tv_usec / 1000000.0);
-        Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N );
-        gettimeofday( &start, NULL );
-        Forward_Subs( L, workspace->v[0], workspace->v[0] );
-        Backward_Subs( U, workspace->v[0], workspace->v[0] );
-        gettimeofday( &stop, NULL );
-        *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                 - (start.tv_sec + start.tv_usec / 1000000.0);
-        workspace->g[0] = Norm( workspace->v[0], N );
-        Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N );
-        //fprintf( stderr, "res: %.15e\n", workspace->g[0] );
-
-        /* GMRES inner-loop */
-        for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ )
-        {
-            /* matvec */
-            gettimeofday( &start, NULL );
-            Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] );
-            gettimeofday( &stop, NULL );
-            *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                     - (start.tv_sec + start.tv_usec / 1000000.0);
-            gettimeofday( &start, NULL );
-            Forward_Subs( L, workspace->v[j + 1], workspace->v[j + 1] );
-            Backward_Subs( U, workspace->v[j + 1], workspace->v[j + 1] );
-            gettimeofday( &stop, NULL );
-            *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                     - (start.tv_sec + start.tv_usec / 1000000.0);
-
-            /* apply modified Gram-Schmidt to orthogonalize the new residual */
-            for ( i = 0; i < j - 1; i++ ) workspace->h[i][j] = 0;
-
-            //for( i = 0; i <= j; i++ ) {
-            for ( i = MAX(j - 1, 0); i <= j; i++ )
-            {
-                workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N );
-                Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N );
-            }
-
-            workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N );
-            Vector_Scale( workspace->v[j + 1],
-                          1. / workspace->h[j + 1][j], workspace->v[j + 1], N );
-            // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j );
-
-            /* Givens rotations on the upper-Hessenberg matrix to make it U */
-            for ( i = MAX(j - 1, 0); i <= j; i++ )
-            {
-                if ( i == j )
-                {
-                    cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) );
-                    workspace->hc[j] = workspace->h[j][j] / cc;
-                    workspace->hs[j] = workspace->h[j + 1][j] / cc;
-                }
-
-                tmp1 =  workspace->hc[i] * workspace->h[i][j] +
-                        workspace->hs[i] * workspace->h[i + 1][j];
-                tmp2 = -workspace->hs[i] * workspace->h[i][j] +
-                       workspace->hc[i] * workspace->h[i + 1][j];
-
-                workspace->h[i][j] = tmp1;
-                workspace->h[i + 1][j] = tmp2;
-            }
-
-            /* apply Givens rotations to the rhs as well */
-            tmp1 =  workspace->hc[j] * workspace->g[j];
-            tmp2 = -workspace->hs[j] * workspace->g[j];
-            workspace->g[j] = tmp1;
-            workspace->g[j + 1] = tmp2;
-
-            //fprintf( stderr, "h: " );
-            //for( i = 0; i <= j+1; ++i )
-            //fprintf( stderr, "%.6f ", workspace->h[i][j] );
-            //fprintf( stderr, "\n" );
-            //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] );
-        }
-
-
-        /* solve Hy = g: H is now upper-triangular, do back-substitution */
-        for ( i = j - 1; i >= 0; i-- )
-        {
-            temp = workspace->g[i];
-            for ( k = j - 1; k > i; k-- )
-                temp -= workspace->h[i][k] * workspace->y[k];
-
-            workspace->y[i] = temp / workspace->h[i][i];
-        }
-
-        /* update x = x_0 + Vy */
-        Vector_MakeZero( workspace->p, N );
-        for ( i = 0; i < j; i++ )
-            Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N );
-        //Backward_Subs( U, workspace->p, workspace->p );
-        //Forward_Subs( L, workspace->p, workspace->p );
-        Vector_Add( x, 1., workspace->p, N );
-
-        /* stopping condition */
-        if ( fabs(workspace->g[j]) / bnorm <= tol )
-            break;
-    }
-
-    // Sparse_MatVec( H, x, workspace->b_prm );
-    // for( i = 0; i < N; ++i )
-    // workspace->b_prm[i] *= workspace->Hdia_inv[i];
-    // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" );
-    // for( i = 0; i < N; ++i )
-    // fprintf( fout, "%10.5f%15.12f%15.12f\n",
-    // workspace->b_prc[i], workspace->b_prm[i], x[i] );*/
-
-    // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n",
-    //          itr, j, fabs( workspace->g[j] ) / bnorm );
-    // data->timing.matvec += itr * RESTART + j;
-
-    if ( itr >= MAX_ITR )
-    {
-        fprintf( stderr, "GMRES convergence failed\n" );
-        // return -1;
-        return itr * (RESTART + 1) + j + 1;
-    }
-
-    return itr * (RESTART + 1) + j + 1;
-}
-
-
-/* generalized minimual residual iterative solver for sparse linear systems,
- * with preconditioner using factors LU \approx H
- * and Jacobi iteration for approximate factor application */
-int PGMRES_Jacobi( static_storage *workspace, sparse_matrix *H, real *b, real tol,
-                   sparse_matrix *L, sparse_matrix *U, real *x, unsigned int iters,
-		   FILE *fout, real *time, real *spmv_time )
-{
-    int i, j, k, itr, N, si;
-    real cc, tmp1, tmp2, temp, bnorm;
-    real *Dinv_L, *Dinv_U;
-    struct timeval start, stop;
-
-    N = H->n;
-    bnorm = Norm( b, N );
-
-    /* Compute Jacobi iteration matrices from
-     * truncated Newmann series: x_{k+1} = Gx_k + D^{-1}b
-     * where:
-     *   G = I - D^{-1}R
-     *   R = triangular matrix
-     *   D = diagonal matrix, diagonals from R */
-    if ( (Dinv_L = (real*) malloc(sizeof(real) * N)) == NULL
-            || (Dinv_U = (real*) malloc(sizeof(real) * N)) == NULL )
-    {
-        fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
-        exit(INSUFFICIENT_SPACE);
-    }
-
-    /* construct D^{-1}_L and D^{-1}_U */
-    for ( i = 0; i < N; ++i )
-    {
-        si = L->start[i + 1] - 1;
-        Dinv_L[i] = 1. / L->entries[si].val;
-
-        si = U->start[i];
-        Dinv_U[i] = 1. / U->entries[si].val;
-    }
-
-    /* GMRES outer-loop */
-    for ( itr = 0; itr < MAX_ITR; ++itr )
-    {
-        /* calculate r0 */
-        gettimeofday( &start, NULL );
-        Sparse_MatVec( H, x, workspace->b_prm );
-        gettimeofday( &stop, NULL );
-        *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                 - (start.tv_sec + start.tv_usec / 1000000.0);
-        Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N );
-        gettimeofday( &start, NULL );
-        Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[0], workspace->v[0], iters );
-        Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[0], workspace->v[0], iters );
-        gettimeofday( &stop, NULL );
-        *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                 - (start.tv_sec + start.tv_usec / 1000000.0);
-        workspace->g[0] = Norm( workspace->v[0], N );
-        Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N );
-        //fprintf( stderr, "res: %.15e\n", workspace->g[0] );
-
-        /* GMRES inner-loop */
-        for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ )
-        {
-            /* matvec */
-            gettimeofday( &start, NULL );
-            Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] );
-            gettimeofday( &stop, NULL );
-            *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                     - (start.tv_sec + start.tv_usec / 1000000.0);
-            gettimeofday( &start, NULL );
-            Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[j + 1], workspace->v[j + 1], iters );
-            Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[j + 1], workspace->v[j + 1], iters );
-            gettimeofday( &stop, NULL );
-            *time += (stop.tv_sec + stop.tv_usec / 1000000.0)
-                     - (start.tv_sec + start.tv_usec / 1000000.0);
-
-            /* apply modified Gram-Schmidt to orthogonalize the new residual */
-            for ( i = 0; i < j - 1; i++ )
-            {
-                workspace->h[i][j] = 0;
-            }
-
-            //for( i = 0; i <= j; i++ ) {
-            for ( i = MAX(j - 1, 0); i <= j; i++ )
-            {
-                workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N );
-                Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N );
-            }
-
-            workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N );
-            Vector_Scale( workspace->v[j + 1],
-                          1. / workspace->h[j + 1][j], workspace->v[j + 1], N );
-            // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j );
-
-            /* Givens rotations on the upper-Hessenberg matrix to make it U */
-            for ( i = MAX(j - 1, 0); i <= j; i++ )
-            {
-                if ( i == j )
-                {
-                    cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) );
-                    workspace->hc[j] = workspace->h[j][j] / cc;
-                    workspace->hs[j] = workspace->h[j + 1][j] / cc;
-                }
-
-                tmp1 =  workspace->hc[i] * workspace->h[i][j] +
-                        workspace->hs[i] * workspace->h[i + 1][j];
-                tmp2 = -workspace->hs[i] * workspace->h[i][j] +
-                       workspace->hc[i] * workspace->h[i + 1][j];
-
-                workspace->h[i][j] = tmp1;
-                workspace->h[i + 1][j] = tmp2;
-            }
-
-            /* apply Givens rotations to the rhs as well */
-            tmp1 =  workspace->hc[j] * workspace->g[j];
-            tmp2 = -workspace->hs[j] * workspace->g[j];
-            workspace->g[j] = tmp1;
-            workspace->g[j + 1] = tmp2;
-
-            //fprintf( stderr, "h: " );
-            //for( i = 0; i <= j+1; ++i )
-            //fprintf( stderr, "%.6f ", workspace->h[i][j] );
-            //fprintf( stderr, "\n" );
-            //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] );
-        }
-
-
-        /* TODO: solve using Jacobi iteration? */
-        /* solve Hy = g: H is now upper-triangular, do back-substitution */
-        for ( i = j - 1; i >= 0; i-- )
-        {
-            temp = workspace->g[i];
-            for ( k = j - 1; k > i; k-- )
-            {
-                temp -= workspace->h[i][k] * workspace->y[k];
-            }
-
-            workspace->y[i] = temp / workspace->h[i][i];
-        }
-
-        /* update x = x_0 + Vy */
-        Vector_MakeZero( workspace->p, N );
-        for ( i = 0; i < j; i++ )
-        {
-            Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N );
-        }
-        //Backward_Subs( U, workspace->p, workspace->p );
-        //Forward_Subs( L, workspace->p, workspace->p );
-        Vector_Add( x, 1., workspace->p, N );
-
-        /* stopping condition */
-        if ( fabs(workspace->g[j]) / bnorm <= tol )
-        {
-            break;
-        }
-    }
-
-    // Sparse_MatVec( H, x, workspace->b_prm );
-    // for( i = 0; i < N; ++i )
-    // workspace->b_prm[i] *= workspace->Hdia_inv[i];
-    // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" );
-    // for( i = 0; i < N; ++i )
-    // fprintf( fout, "%10.5f%15.12f%15.12f\n",
-    // workspace->b_prc[i], workspace->b_prm[i], x[i] );*/
-
-    // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n",
-    //          itr, j, fabs( workspace->g[j] ) / bnorm );
-    // data->timing.matvec += itr * RESTART + j;
-
-    free( Dinv_U );
-    free( Dinv_L );
-
-    if ( itr >= MAX_ITR )
-    {
-        fprintf( stderr, "GMRES convergence failed\n" );
-        // return -1;
-        return itr * (RESTART + 1) + j + 1;
-    }
-
-    return itr * (RESTART + 1) + j + 1;
-}
-
-
+/* Preconditioned Conjugate Gradient */
 int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol,
          sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout )
 {
@@ -1084,8 +1089,8 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol,
     //Print_Soln( workspace, x, q, b, N );
     //fprintf( stderr, "res: %.15e\n", r_norm );
 
-    Forward_Subs( L, workspace->r, workspace->d );
-    Backward_Subs( U, workspace->d, workspace->p );
+    tri_solve( L, workspace->r, workspace->d, LOWER );
+    tri_solve( U, workspace->d, workspace->p, UPPER );
     sig_new = Dot( workspace->r, workspace->p, N );
     sig0 = sig_new;
 
@@ -1103,8 +1108,8 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol,
         r_norm = Norm(workspace->r, N);
         //fprintf( stderr, "res: %.15e\n", r_norm );
 
-        Forward_Subs( L, workspace->r, workspace->d );
-        Backward_Subs( U, workspace->d, workspace->d );
+        tri_solve( L, workspace->r, workspace->d, LOWER );
+        tri_solve( U, workspace->d, workspace->d, UPPER );
         sig_old = sig_new;
         sig_new = Dot( workspace->r, workspace->d, N );
         beta = sig_new / sig_old;
@@ -1122,6 +1127,7 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol,
 }
 
 
+/* Conjugate Gradient */
 int CG( static_storage *workspace, sparse_matrix *H,
         real *b, real tol, real *x, FILE *fout )
 {
@@ -1136,7 +1142,9 @@ int CG( static_storage *workspace, sparse_matrix *H,
     Sparse_MatVec( H, x, workspace->q );
     Vector_Sum( workspace->r , 1.,  b, -1., workspace->q, N );
     for ( j = 0; j < N; ++j )
+    {
         workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
+    }
 
     sig_new = Dot( workspace->r, workspace->d, N );
     sig0 = sig_new;
@@ -1179,7 +1187,6 @@ int CG( static_storage *workspace, sparse_matrix *H,
 }
 
 
-
 /* Steepest Descent */
 int SDM( static_storage *workspace, sparse_matrix *H,
          real *b, real tol, real *x, FILE *fout )
@@ -1195,7 +1202,9 @@ int SDM( static_storage *workspace, sparse_matrix *H,
     Sparse_MatVec( H, x, workspace->q );
     Vector_Sum( workspace->r , 1.,  b, -1., workspace->q, N );
     for ( j = 0; j < N; ++j )
+    {
         workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
+    }
 
     sig = Dot( workspace->r, workspace->d, N );
     sig0 = sig;
@@ -1211,7 +1220,9 @@ int SDM( static_storage *workspace, sparse_matrix *H,
         Vector_Add( x, alpha, workspace->d, N );
         Vector_Add( workspace->r, -alpha, workspace->q, N );
         for ( j = 0; j < N; ++j )
+        {
             workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
+        }
 
         //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n",
         //     Norm(workspace->d,N), Norm(workspace->q,N), tmp );
@@ -1229,6 +1240,15 @@ int SDM( static_storage *workspace, sparse_matrix *H,
 }
 
 
+/* Estimate the stability of a 2-side preconditioning scheme
+ * using the factorization A \approx LU. Specifically, estimate the 1-norm of A^{-1}
+ * using the 1-norm of (LU)^{-1}e, with e = [1 1 ... 1]^T through 2 triangular solves:
+ *   1) Ly = e
+ *   2) Ux = y where y = Ux
+ * That is, we seek to solve e = LUx for unknown x 
+ *
+ * Reference: Incomplete LU Preconditioning with the Multilevel Fast Multipole Algorithm
+ *   for Electromagnetic Scattering, SIAM J. Sci. Computing, 2007 */
 real condest( const sparse_matrix * const L, const sparse_matrix * const U )
 {
     unsigned int i, N;
@@ -1239,7 +1259,7 @@ real condest( const sparse_matrix * const L, const sparse_matrix * const U )
     if ( (e = (real*) malloc(sizeof(real) * N)) == NULL )
     {
         fprintf( stderr, "Not enough memory for condest. Terminating.\n" );
-        exit(INSUFFICIENT_SPACE);
+        exit( INSUFFICIENT_MEMORY );
     }
 
     for ( i = 0; i < N; ++i )
@@ -1247,9 +1267,10 @@ real condest( const sparse_matrix * const L, const sparse_matrix * const U )
         e[i] = 1.;
     }
 
-    Forward_Subs( L, e, e );
-    Backward_Subs( U, e, e );
+    tri_solve( L, e, e, LOWER );
+    tri_solve( U, e, e, UPPER );
 
+    /* compute 1-norm of vector e */
     c = fabs(e[0]);
     for ( i = 1; i < N; ++i)
     {
diff --git a/sPuReMD/src/GMRES.h b/sPuReMD/src/GMRES.h
index 2acf61f2c2df46a175b4bccf31d229c147611755..d6d1c4844a9ea2870edffbdf1d65a5da358b362f 100644
--- a/sPuReMD/src/GMRES.h
+++ b/sPuReMD/src/GMRES.h
@@ -24,20 +24,13 @@
 
 #include "mytypes.h"
 
-int GMRES( static_storage*, sparse_matrix*,
-           real*, real, real*, FILE*, real*, real* );
+int GMRES( const static_storage * const, const control_params * const,
+        const sparse_matrix * const, const real * const, real, real * const,
+        const FILE * const, real * const, real * const );
 
-int GMRES_HouseHolder( static_storage*, sparse_matrix*,
-                       real*, real, real*, FILE* );
-
-int PGMRES( static_storage*, sparse_matrix*, real*, real,
-            sparse_matrix*, sparse_matrix*, real*, FILE*, real*, real* );
-
-int PGMRES_Jacobi( static_storage*, sparse_matrix*, real*, real,
-                   sparse_matrix*, sparse_matrix*, real*, unsigned int, FILE*, real*, real* );
-
-int PCG( static_storage*, sparse_matrix*, real*, real,
-         sparse_matrix*, sparse_matrix*, real*, FILE* );
+int GMRES_HouseHolder( const static_storage * const, const control_params * const,
+        const sparse_matrix * const, const real * const, real, real * const,
+        const FILE * const, real * const, real * const );
 
 int CG( static_storage*, sparse_matrix*,
         real*, real, real*, FILE* );
diff --git a/sPuReMD/src/QEq.c b/sPuReMD/src/QEq.c
index 8306ac1db912641a66dfedfe40ae691e6748812a..6e6cc539531ac4243a2c50a0affe9e556aeef225 100644
--- a/sPuReMD/src/QEq.c
+++ b/sPuReMD/src/QEq.c
@@ -24,30 +24,63 @@
 #include "GMRES.h"
 #include "list.h"
 #include "print_utils.h"
+#include "tool_box.h"
 #if defined(HAVE_SUPERLU_MT)
 #include "slu_mt_ddefs.h"
 #endif
 
+
 static int compare_matrix_entry(const void *v1, const void *v2)
 {
     /* larger element has larger column index */
-    return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j;
+    return *(int *)v1 - *(int *)v2;
 }
 
 
 static void Sort_Matrix_Rows( sparse_matrix * const A )
 {
-    int i, si, ei;
+    int i, j, k, si, ei, *temp_j;
+    real *temp_val;
+
+    if ( ( temp_j = (int*) malloc( A->n * sizeof(int)) ) == NULL
+            || ( temp_val = (real*) malloc( A->n * sizeof(real)) ) == NULL )
+    {
+	fprintf( stderr, "Not enough space for matrix row sort. Terminating...\n" );
+	exit( INSUFFICIENT_MEMORY );
+    }
 
     /* sort each row of A using column indices */
     for ( i = 0; i < A->n; ++i )
     {
         si = A->start[i];
         ei = A->start[i + 1];
-        /* polymorphic sort in standard C library */
-        qsort( &(A->entries[si]), ei - si,
-               sizeof(sparse_matrix_entry), compare_matrix_entry );
+        memcpy( temp_j, A->j + si, sizeof(int) * (ei - si) );
+        memcpy( temp_val, A->val + si, sizeof(real) * (ei - si) );
+
+        //TODO: consider implementing single custom one-pass sort instead of using qsort + manual sort
+        /* polymorphic sort in standard C library using column indices */
+        qsort( temp_j, ei - si, sizeof(int), compare_matrix_entry );
+
+        /* manually sort vals */
+        for ( j = 0; j < (ei - si); ++j )
+        {
+            for ( k = 0; k < (ei - si); ++k )
+            {
+                if ( A->j[si + j] == temp_j[k] )
+                {
+                    A->val[si + k] = temp_val[j];
+                    break;
+                }
+
+            }
+        }
+        
+        /* copy sorted column indices */
+        memcpy( A->j + si, temp_j, sizeof(int) * (ei - si) );
     }
+
+    free( temp_val );
+    free( temp_j );
 }
 
 
@@ -59,7 +92,7 @@ static void Transpose( const sparse_matrix const *A, sparse_matrix const *A_t )
     if ( (A_t_top = (unsigned int*) calloc( A->n + 1, sizeof(unsigned int))) == NULL )
     {
 	fprintf( stderr, "Not enough space for matrix tranpose. Terminating...\n" );
-	exit( INSUFFICIENT_SPACE );
+	exit( INSUFFICIENT_MEMORY );
     }
 
     memset( A_t->start, 0, (A->n + 1) * sizeof(unsigned int) );
@@ -69,7 +102,7 @@ static void Transpose( const sparse_matrix const *A, sparse_matrix const *A_t )
     {
         for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj )
         {
-                ++A_t->start[A->entries[pj].j + 1];
+                ++A_t->start[A->j[pj] + 1];
         }
     }
 
@@ -84,9 +117,9 @@ static void Transpose( const sparse_matrix const *A, sparse_matrix const *A_t )
     {
         for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj )
         {
-            j = A->entries[pj].j;
-            A_t->entries[A_t_top[j]].j = i;
-            A_t->entries[A_t_top[j]].val = A->entries[pj].val;
+            j = A->j[pj];
+            A_t->j[A_t_top[j]] = i;
+            A_t->val[A_t_top[j]] = A->val[pj];
             ++A_t_top[j];
         }
     }
@@ -103,52 +136,116 @@ static void Transpose_I( sparse_matrix * const A )
     if ( Allocate_Matrix( &A_t, A->n, A->m ) == 0 )
     {
         fprintf( stderr, "not enough memory for transposing matrices. terminating.\n" );
-        exit(INSUFFICIENT_SPACE);
+        exit( INSUFFICIENT_MEMORY );
     }
 
     Transpose( A, A_t );
 
     memcpy( A->start, A_t->start, sizeof(int) * (A_t->n + 1) );
-    memcpy( A->entries, A_t->entries, sizeof(sparse_matrix_entry) * (A_t->start[A_t->n]) );
+    memcpy( A->j, A_t->j, sizeof(int) * (A_t->start[A_t->n]) );
+    memcpy( A->val, A_t->val, sizeof(real) * (A_t->start[A_t->n]) );
 
     Deallocate_Matrix( A_t );
 }
 
 
-static void Calculate_Droptol( const sparse_matrix * const A, real * const droptol, real dtol )
+static void Calculate_Droptol( const sparse_matrix * const A, real * const droptol,
+        const real dtol )
 {
     int i, j, k;
     real val;
+#ifdef _OPENMP
+    static real *droptol_local;
+    unsigned int tid;
+#endif
 
-    /* init droptol to 0 */
-    for ( i = 0; i < A->n; ++i )
-        droptol[i] = 0;
-
-    /* calculate sqaure of the norm of each row */
-    for ( i = 0; i < A->n; ++i )
+    #pragma omp parallel default(none) private(i, j, k, val, tid), shared(droptol_local)
     {
-        for ( k = A->start[i]; k < A->start[i + 1] - 1; ++k )
+#ifdef _OPENMP
+        tid = omp_get_thread_num();
+
+        #pragma omp master
+        {
+            /* keep b_local for program duration to avoid allocate/free
+             * overhead per Sparse_MatVec call*/
+            if ( droptol_local == NULL )
+            {
+                if ( (droptol_local = (real*) malloc( omp_get_num_threads() * A->n * sizeof(real))) == NULL )
+                {
+                    exit( INSUFFICIENT_MEMORY );
+                }
+	    }
+	}
+
+        #pragma omp barrier
+#endif
+
+        /* init droptol to 0 */
+        for ( i = 0; i < A->n; ++i )
+        {
+#ifdef _OPENMP
+            droptol_local[tid * A->n + i] = 0.0;
+#else
+            droptol[i] = 0.0;
+#endif
+        }
+
+        #pragma omp barrier
+
+        /* calculate sqaure of the norm of each row */
+        #pragma omp parallel for schedule(guided) \
+            default(none) private(i, j, k, val, tid) shared(droptol_local)
+        for ( i = 0; i < A->n; ++i )
         {
-            j = A->entries[k].j;
-            val = A->entries[k].val;
+            for ( k = A->start[i]; k < A->start[i + 1] - 1; ++k )
+            {
+                j = A->j[k];
+                val = A->val[k];
+
+#ifdef _OPENMP
+                droptol_local[tid * A->n + i] += val * val;
+                droptol_local[tid * A->n + j] += val * val;
+#else
+                droptol[i] += val * val;
+                droptol[j] += val * val;
+#endif
+            }
 
+            val = A->val[k]; // diagonal entry
+#ifdef _OPENMP
+            droptol_local[tid * A->n + i] += val * val;
+#else
             droptol[i] += val * val;
-            droptol[j] += val * val;
+#endif
         }
 
-        val = A->entries[k].val; // diagonal entry
-        droptol[i] += val * val;
-    }
+        #pragma omp barrier
 
-    /* calculate local droptol for each row */
-    //fprintf( stderr, "droptol: " );
-    for ( i = 0; i < A->n; ++i )
-    {
-        //fprintf( stderr, "%f-->", droptol[i] );
-        droptol[i] = SQRT( droptol[i] ) * dtol;
-        //fprintf( stderr, "%f  ", droptol[i] );
+#ifdef _OPENMP
+        #pragma omp parallel for schedule(guided) default(none) private(i, k) shared(droptol_local)
+        for ( i = 0; i < A->n; ++i )
+        {
+            droptol[i] = 0.0;
+            for ( k = 0; k < omp_get_num_threads(); ++k )
+            {
+                droptol[i] += droptol_local[k * A->n + i];
+            }
+        }
+#endif
+
+        #pragma omp barrier
+
+        /* calculate local droptol for each row */
+        //fprintf( stderr, "droptol: " );
+        #pragma omp parallel for schedule(guided) default(none) private(i)
+        for ( i = 0; i < A->n; ++i )
+        {
+            //fprintf( stderr, "%f-->", droptol[i] );
+            droptol[i] = SQRT( droptol[i] ) * dtol;
+            //fprintf( stderr, "%f  ", droptol[i] );
+        }
+        //fprintf( stderr, "\n" );
     }
-    //fprintf( stderr, "\n" );
 }
 
 
@@ -160,17 +257,21 @@ static int Estimate_LU_Fill( const sparse_matrix * const A, const real * const d
 
     fillin = 0;
 
-    //fprintf( stderr, "n: %d\n", A->n );
+    #pragma omp parallel for schedule(guided) \
+        default(none) private(i, j, pj, val) reduction(+: fillin)
     for ( i = 0; i < A->n; ++i )
+    {
         for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj )
         {
-            j = A->entries[pj].j;
-            val = A->entries[pj].val;
-            //fprintf( stderr, "i: %d, j: %d", i, j );
+            j = A->j[pj];
+            val = A->val[pj];
 
-            if ( fabs(val) > droptol[i] )
+            if ( FABS(val) > droptol[i] )
+            {
                 ++fillin;
+            }
         }
+    }
 
     return fillin + A->n;
 }
@@ -266,12 +367,12 @@ static real SuperLU_Factorize( const sparse_matrix * const A,
        || ( (Utop = (unsigned int*) malloc( (A->n + 1) * sizeof(unsigned int))) == NULL ) )
     {
 	fprintf( stderr, "Not enough space for SuperLU factorization. Terminating...\n" );
-	exit( INSUFFICIENT_SPACE );
+	exit( INSUFFICIENT_MEMORY );
     }
     if ( Allocate_Matrix( &A_t, A->n, A->m ) == 0 )
     {
         fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
-        exit(INSUFFICIENT_SPACE);
+        exit( INSUFFICIENT_MEMORY );
     }
 
     /* Set up the sparse matrix data structure for A. */
@@ -474,13 +575,13 @@ static real SuperLU_Factorize( const sparse_matrix * const A,
 #endif
 
 
-/* Diagonal preconditioner */
-static real diagonal_pre( const reax_system * const system, real * const Hdia_inv )
+/* Diagonal (Jacobi) preconditioner computation */
+static real diag_pre_comp( const reax_system * const system, real * const Hdia_inv )
 {
     unsigned int i;
-    struct timeval start, stop;
+    real start;
 
-    gettimeofday( &start, NULL );
+    start = Get_Time( );
 
     #pragma omp parallel for schedule(guided) \
         default(none) private(i)
@@ -489,23 +590,22 @@ static real diagonal_pre( const reax_system * const system, real * const Hdia_in
         Hdia_inv[i] = 1.0 / system->reaxprm.sbp[system->atoms[i].type].eta;
     }
 
-    gettimeofday( &stop, NULL );
-    return (stop.tv_sec + stop.tv_usec / 1000000.0)
-        - (start.tv_sec + start.tv_usec / 1000000.0);
+    return Get_Timing_Info( start );
 }
 
 
-/* Incomplete Cholesky factorization with thresholding */
+/* Incomplete Cholesky factorization with dual thresholding */
 static real ICHOLT( const sparse_matrix * const A, const real * const droptol,
             sparse_matrix * const L, sparse_matrix * const U )
 {
-    sparse_matrix_entry tmp[1000];
+    //TODO: either add compilation parameter or dynamically allocate
+    int tmp_j[1000];
+    real tmp_val[1000];
     int i, j, pj, k1, k2, tmptop, Ltop;
-    real val;
+    real val, start;
     int *Utop;
-    struct timeval start, stop;
 
-    gettimeofday( &start, NULL );
+    start = Get_Time( );
     
     Utop = (int*) malloc((A->n + 1) * sizeof(int));
 
@@ -513,10 +613,14 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol,
     Ltop = 0;
     tmptop = 0;
     for ( i = 0; i <= A->n; ++i )
+    {
         L->start[i] = U->start[i] = 0;
+    }
 
     for ( i = 0; i < A->n; ++i )
+    {
         Utop[i] = 0;
+    }
 
     //fprintf( stderr, "n: %d\n", A->n );
     for ( i = 0; i < A->n; ++i )
@@ -526,30 +630,36 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol,
 
         for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj )
         {
-            j = A->entries[pj].j;
-            val = A->entries[pj].val;
+            j = A->j[pj];
+            val = A->val[pj];
             //fprintf( stderr, "i: %d, j: %d", i, j );
 
-            if ( fabs(val) > droptol[i] )
+            if ( FABS(val) > droptol[i] )
             {
                 k1 = 0;
                 k2 = L->start[j];
                 while ( k1 < tmptop && k2 < L->start[j + 1] )
                 {
-                    if ( tmp[k1].j < L->entries[k2].j )
+                    if ( tmp_j[k1] < L->j[k2] )
+                    {
                         ++k1;
-                    else if ( tmp[k1].j > L->entries[k2].j )
+                    }
+                    else if ( tmp_j[k1] > L->j[k2] )
+                    {
                         ++k2;
+                    }
                     else
-                        val -= (tmp[k1++].val * L->entries[k2++].val);
+                    {
+                        val -= (tmp_val[k1++] * L->val[k2++]);
+                    }
                 }
 
                 // L matrix is lower triangular,
                 // so right before the start of next row comes jth diagonal
-                val /= L->entries[L->start[j + 1] - 1].val;
+                val /= L->val[L->start[j + 1] - 1];
 
-                tmp[tmptop].j = j;
-                tmp[tmptop].val = val;
+                tmp_j[tmptop] = j;
+                tmp_val[tmptop] = val;
                 ++tmptop;
             }
             //fprintf( stderr, " -- done\n" );
@@ -557,18 +667,20 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol,
 
         // compute the ith diagonal in L
         // sanity check
-        if ( A->entries[pj].j != i )
+        if ( A->j[pj] != i )
         {
             fprintf( stderr, "i=%d, badly built A matrix!\n", i );
-            exit(999);
+            exit( NUMERIC_BREAKDOWN );
         }
 
-        val = A->entries[pj].val;
+        val = A->val[pj];
         for ( k1 = 0; k1 < tmptop; ++k1 )
-            val -= (tmp[k1].val * tmp[k1].val);
+        {
+            val -= (tmp_val[k1] * tmp_val[k1]);
+        }
 
-        tmp[tmptop].j = i;
-        tmp[tmptop].val = SQRT(val);
+        tmp_j[tmptop] = i;
+        tmp_val[tmptop] = SQRT(val);
 
         // apply the dropping rule once again
         //fprintf( stderr, "row%d: tmptop: %d\n", i, tmptop );
@@ -577,17 +689,19 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol,
         //fprintf( stderr, "\n" );
         //fprintf( stderr, "row(%d): droptol=%.4f\n", i+1, droptol[i] );
         for ( k1 = 0; k1 < tmptop; ++k1 )
-            if ( fabs(tmp[k1].val) > droptol[i] / tmp[tmptop].val )
+        {
+            if ( FABS(tmp_val[k1]) > droptol[i] / tmp_val[tmptop] )
             {
-                L->entries[Ltop].j = tmp[k1].j;
-                L->entries[Ltop].val = tmp[k1].val;
-                U->start[tmp[k1].j + 1]++;
+                L->j[Ltop] = tmp_j[k1];
+                L->val[Ltop] = tmp_val[k1];
+                U->start[tmp_j[k1] + 1]++;
                 ++Ltop;
                 //fprintf( stderr, "%d(%.4f)  ", tmp[k1].j+1, tmp[k1].val );
             }
+        }
         // keep the diagonal in any case
-        L->entries[Ltop].j = tmp[k1].j;
-        L->entries[Ltop].val = tmp[k1].val;
+        L->j[Ltop] = tmp_j[k1];
+        L->val[Ltop] = tmp_val[k1];
         ++Ltop;
         //fprintf( stderr, "%d(%.4f)\n", tmp[k1].j+1,  tmp[k1].val );
     }
@@ -604,9 +718,9 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol,
     {
         for ( pj = L->start[i]; pj < L->start[i + 1]; ++pj )
         {
-            j = L->entries[pj].j;
-            U->entries[Utop[j]].j = i;
-            U->entries[Utop[j]].val = L->entries[pj].val;
+            j = L->j[pj];
+            U->j[Utop[j]] = i;
+            U->val[Utop[j]] = L->val[pj];
             Utop[j]++;
         }
     }
@@ -615,9 +729,7 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol,
 
     free(Utop);
 
-    gettimeofday( &stop, NULL );
-    return (stop.tv_sec + stop.tv_usec / 1000000.0)
-        - (start.tv_sec + start.tv_usec / 1000000.0);
+    return Get_Timing_Info( start );
 }
 
 
@@ -631,26 +743,29 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
                        sparse_matrix * const U_t, sparse_matrix * const U )
 {
     unsigned int i, j, k, pj, x = 0, y = 0, ei_x, ei_y;
-    real *D, *D_inv, sum;
+    real *D, *D_inv, sum, start;
     sparse_matrix *DAD;
     int *Utop;
-    struct timeval start, stop;
 
-    gettimeofday( &start, NULL );
+    start = Get_Time( );
 
     if ( Allocate_Matrix( &DAD, A->n, A->m ) == 0 )
     {
         fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
-        exit(INSUFFICIENT_SPACE);
+        exit( INSUFFICIENT_MEMORY );
     }
 
-    D = (real*) malloc(A->n * sizeof(real));
-    D_inv = (real*) malloc(A->n * sizeof(real));
-    Utop = (int*) malloc((A->n + 1) * sizeof(int));
+    if( ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL ||
+            ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL ||
+            ( Utop = (int*) malloc((A->n + 1) * sizeof(int)) ) == NULL )
+    {
+        fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
+        exit( INSUFFICIENT_MEMORY );
+    }
 
     for ( i = 0; i < A->n; ++i )
     {
-        D_inv[i] = SQRT( A->entries[A->start[i + 1] - 1].val );
+        D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] );
         D[i] = 1. / D_inv[i];
     }
 
@@ -668,19 +783,19 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
         /* non-diagonals */
         for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj )
         {
-            DAD->entries[pj].j = A->entries[pj].j;
-            DAD->entries[pj].val =
-                A->entries[pj].val * D[i] * D[A->entries[pj].j];
+            DAD->j[pj] = A->j[pj];
+            DAD->val[pj] = A->val[pj] * D[i] * D[A->j[pj]];
         }
         /* diagonal */
-        DAD->entries[pj].j = A->entries[pj].j;
-        DAD->entries[pj].val = 1.;
+        DAD->j[pj] = A->j[pj];
+        DAD->val[pj] = 1.;
     }
 
     /* initial guesses for U^T,
      * assume: A and DAD symmetric and stored lower triangular */
     memcpy( U_t->start, DAD->start, sizeof(int) * (DAD->n + 1) );
-    memcpy( U_t->entries, DAD->entries, sizeof(sparse_matrix_entry) * (DAD->m) );
+    memcpy( U_t->j, DAD->j, sizeof(int) * (DAD->m) );
+    memcpy( U_t->val, DAD->val, sizeof(real) * (DAD->m) );
 
     for ( i = 0; i < sweeps; ++i )
     {
@@ -704,21 +819,21 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
                 }
             }
             /* column bounds of current nonzero */
-            y = U_t->start[U_t->entries[j].j];
-            ei_y = U_t->start[U_t->entries[j].j + 1];
+            y = U_t->start[U_t->j[j]];
+            ei_y = U_t->start[U_t->j[j] + 1];
 
             /* sparse dot product: dot( U^T(i,1:j-1), U^T(j,1:j-1) ) */
-            while ( U_t->entries[x].j < U_t->entries[j].j &&
-                    U_t->entries[y].j < U_t->entries[j].j &&
+            while ( U_t->j[x] < U_t->j[j] &&
+                    U_t->j[y] < U_t->j[j] &&
                     x < ei_x && y < ei_y )
             {
-                if ( U_t->entries[x].j == U_t->entries[y].j )
+                if ( U_t->j[x] == U_t->j[y] )
                 {
-                    sum += (U_t->entries[x].val * U_t->entries[y].val);
+                    sum += (U_t->val[x] * U_t->val[y]);
                     ++x;
                     ++y;
                 }
-                else if ( U_t->entries[x].j < U_t->entries[y].j )
+                else if ( U_t->j[x] < U_t->j[y] )
                 {
                     ++x;
                 }
@@ -728,10 +843,10 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
                 }
             }
 
-            sum = DAD->entries[j].val - sum;
+            sum = DAD->val[j] - sum;
 
             /* diagonal entries */
-            if ( (k - 1) == U_t->entries[j].j )
+            if ( (k - 1) == U_t->j[j] )
             {
                 /* sanity check */
                 if ( sum < ZERO )
@@ -745,12 +860,12 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
                     exit(NUMERIC_BREAKDOWN);
                 }
 
-                U_t->entries[j].val = SQRT( sum );
+                U_t->val[j] = SQRT( sum );
             }
             /* non-diagonal entries */
             else
             {
-                U_t->entries[j].val = sum / U_t->entries[ei_y - 1].val;
+                U_t->val[j] = sum / U_t->val[ei_y - 1];
             }
         }
     }
@@ -762,7 +877,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     {
         for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj )
         {
-            U_t->entries[pj].val *= D_inv[i];
+            U_t->val[pj] *= D_inv[i];
         }
     }
 
@@ -777,7 +892,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
          * store in U->start */
         for ( pj = U_t->start[i]; pj < U_t->start[i + 1]; ++pj )
         {
-            U->start[U_t->entries[pj].j + 1]++;
+            U->start[U_t->j[pj] + 1]++;
         }
     }
     /* set correct row pointer in U */
@@ -791,9 +906,9 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
         /* for each nonzero (column-wise) in U^T */
         for ( pj = U_t->start[i]; pj < U_t->start[i + 1]; ++pj )
         {
-            j = U_t->entries[pj].j;
-            U->entries[Utop[j]].j = i;
-            U->entries[Utop[j]].val = U_t->entries[pj].val;
+            j = U_t->j[pj];
+            U->j[Utop[j]] = i;
+            U->val[Utop[j]] = U_t->val[pj];
             /* Utop contains pointer within rows of U
              * (columns of U^T) to add next nonzero, so increment */
             Utop[j]++;
@@ -809,9 +924,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     free(D);
     free(Utop);
 
-    gettimeofday( &stop, NULL );
-    return (stop.tv_sec + stop.tv_usec / 1000000.0)
-        - (start.tv_sec + start.tv_usec / 1000000.0);
+    return Get_Timing_Info( start );
 }
 
 
@@ -822,34 +935,36 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
  * Fine-Grained Parallel Incomplete LU Factorization
  * SIAM J. Sci. Comp.
  * 
- * A: symmetric, half-format (lower triangular)
- * sweeps: number of ILUL_PAR loops over non-zeros
- * L / U: factorized matrices (A \approx LU) */
+ * A: symmetric, half-stored (lower triangular), CSR format
+ * sweeps: number of loops over non-zeros for computation
+ * L / U: factorized triangular matrices (A \approx LU), CSR format */
 static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
-                       sparse_matrix * const L, sparse_matrix * const U )
+        sparse_matrix * const L, sparse_matrix * const U )
 {
     unsigned int i, j, k, pj, x, y, ei_x, ei_y;
-    real *D, *D_inv, sum;
+    real *D, *D_inv, sum, start;
     sparse_matrix *DAD;
-    struct timeval start, stop;
 
-    gettimeofday( &start, NULL );
+    start = Get_Time( );
 
     if ( Allocate_Matrix( &DAD, A->n, A->m ) == 0 )
     {
         fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
-        exit(INSUFFICIENT_SPACE);
+        exit( INSUFFICIENT_MEMORY );
     }
 
-    /*TODO: check malloc return status*/
-    D = (real*) malloc(A->n * sizeof(real));
-    D_inv = (real*) malloc(A->n * sizeof(real));
+    if( ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL ||
+            ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL )
+    {
+        fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
+        exit( INSUFFICIENT_MEMORY );
+    }
 
     #pragma omp parallel for schedule(guided) \
         default(none) shared(D, D_inv) private(i)
     for ( i = 0; i < A->n; ++i )
     {
-        D_inv[i] = SQRT( A->entries[A->start[i + 1] - 1].val );
+        D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] );
         D[i] = 1.0 / D_inv[i];
     }
 
@@ -863,29 +978,30 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
         /* non-diagonals */
         for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj )
         {
-            DAD->entries[pj].j = A->entries[pj].j;
-            DAD->entries[pj].val =
-                D[i] * A->entries[pj].val * D[A->entries[pj].j];
+            DAD->j[pj] = A->j[pj];
+            DAD->val[pj] = D[i] * A->val[pj] * D[A->j[pj]];
         }
         /* diagonal */
-        DAD->entries[pj].j = A->entries[pj].j;
-        DAD->entries[pj].val = 1.0;
+        DAD->j[pj] = A->j[pj];
+        DAD->val[pj] = 1.0;
     }
 
     /* initial guesses for L and U,
      * assume: A and DAD symmetric and stored lower triangular */
     memcpy( L->start, DAD->start, sizeof(int) * (DAD->n + 1) );
-    memcpy( L->entries, DAD->entries, sizeof(sparse_matrix_entry) * (DAD->start[DAD->n]) );
+    memcpy( L->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) );
+    memcpy( L->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) );
     /* store U^T in CSR for row-wise access and tranpose later */
     memcpy( U->start, DAD->start, sizeof(int) * (DAD->n + 1) );
-    memcpy( U->entries, DAD->entries, sizeof(sparse_matrix_entry) * (DAD->start[DAD->n]) );
+    memcpy( U->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) );
+    memcpy( U->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) );
 
     /* L has unit diagonal, by convention */
     #pragma omp parallel for schedule(guided) \
         default(none) private(i)
     for ( i = 0; i < A->n; ++i )
     {
-        L->entries[L->start[i + 1] - 1].val = 1.0;
+        L->val[L->start[i + 1] - 1] = 1.0;
     }
 
     for ( i = 0; i < sweeps; ++i )
@@ -910,22 +1026,22 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
                 }
             }
             /* determine column bounds of current nonzero */
-            y = DAD->start[DAD->entries[j].j];
-            ei_y = DAD->start[DAD->entries[j].j + 1];
+            y = DAD->start[DAD->j[j]];
+            ei_y = DAD->start[DAD->j[j] + 1];
 
             /* sparse dot product:
              *   dot( L(i,1:j-1), U(1:j-1,j) ) */
-            while ( L->entries[x].j < L->entries[j].j &&
-                    L->entries[y].j < L->entries[j].j &&
+            while ( L->j[x] < L->j[j] &&
+                    L->j[y] < L->j[j] &&
                     x < ei_x && y < ei_y )
             {
-                if ( L->entries[x].j == L->entries[y].j )
+                if ( L->j[x] == L->j[y] )
                 {
-                    sum += (L->entries[x].val * U->entries[y].val);
+                    sum += (L->val[x] * U->val[y]);
                     ++x;
                     ++y;
                 }
-                else if ( L->entries[x].j < L->entries[y].j )
+                else if ( L->j[x] < L->j[y] )
                 {
                     ++x;
                 }
@@ -937,7 +1053,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
 
             if( j != ei_x - 1 )
             {
-                L->entries[j].val = ( DAD->entries[j].val - sum ) / U->entries[ei_y - 1].val;
+                L->val[j] = ( DAD->val[j] - sum ) / U->val[ei_y - 1];
             }
         }
 
@@ -960,22 +1076,22 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
                 }
             }
             /* determine column bounds of current nonzero */
-            y = DAD->start[DAD->entries[j].j];
-            ei_y = DAD->start[DAD->entries[j].j + 1];
+            y = DAD->start[DAD->j[j]];
+            ei_y = DAD->start[DAD->j[j] + 1];
 
             /* sparse dot product:
              *   dot( L(i,1:i-1), U(1:i-1,j) ) */
-            while ( U->entries[x].j < U->entries[j].j &&
-                    U->entries[y].j < U->entries[j].j &&
+            while ( U->j[x] < U->j[j] &&
+                    U->j[y] < U->j[j] &&
                     x < ei_x && y < ei_y )
             {
-                if ( U->entries[x].j == U->entries[y].j )
+                if ( U->j[x] == U->j[y] )
                 {
-                    sum += (L->entries[y].val * U->entries[x].val);
+                    sum += (L->val[y] * U->val[x]);
                     ++x;
                     ++y;
                 }
-                else if ( U->entries[x].j < U->entries[y].j )
+                else if ( U->j[x] < U->j[y] )
                 {
                     ++x;
                 }
@@ -985,7 +1101,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
                 }
             }
 
-            U->entries[j].val = DAD->entries[j].val - sum;
+            U->val[j] = DAD->val[j] - sum;
         }
     }
 
@@ -998,9 +1114,9 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     {
         for ( pj = DAD->start[i]; pj < DAD->start[i + 1]; ++pj )
         {
-            L->entries[pj].val = D_inv[i] * L->entries[pj].val;
+            L->val[pj] = D_inv[i] * L->val[pj];
             /* currently storing U^T, so use row index instead of column index */
-            U->entries[pj].val = U->entries[pj].val * D_inv[i];
+            U->val[pj] = U->val[pj] * D_inv[i];
         }
     }
 
@@ -1015,9 +1131,261 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     free( D_inv );
     free( D );
 
-    gettimeofday( &stop, NULL );
-    return (stop.tv_sec + stop.tv_usec / 1000000.0)
-        - (start.tv_sec + start.tv_usec / 1000000.0);
+    return Get_Timing_Info( start );
+}
+
+
+/* Fine-grained (parallel) incomplete LU factorization with thresholding
+ * 
+ * Reference:
+ * Edmond Chow and Aftab Patel
+ * Fine-Grained Parallel Incomplete LU Factorization
+ * SIAM J. Sci. Comp.
+ * 
+ * A: symmetric, half-stored (lower triangular), CSR format
+ * droptol: row-wise tolerances used for dropping
+ * sweeps: number of loops over non-zeros for computation
+ * L / U: factorized triangular matrices (A \approx LU), CSR format */
+static real ILUT_PAR( const sparse_matrix * const A, const real * droptol,
+        const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U )
+{
+    unsigned int i, j, k, pj, x, y, ei_x, ei_y, Ltop, Utop;
+    real *D, *D_inv, sum, start;
+    sparse_matrix *DAD, *L_temp, *U_temp;
+
+    start = Get_Time( );
+
+    if ( Allocate_Matrix( &DAD, A->n, A->m ) == 0 ||
+            Allocate_Matrix( &L_temp, A->n, A->m ) == 0 ||
+            Allocate_Matrix( &U_temp, A->n, A->m ) == 0 )
+    {
+        fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
+        exit( INSUFFICIENT_MEMORY );
+    }
+
+    if( ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL ||
+            ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL )
+    {
+        fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
+        exit( INSUFFICIENT_MEMORY );
+    }
+
+    #pragma omp parallel for schedule(guided) \
+        default(none) shared(D, D_inv) private(i)
+    for ( i = 0; i < A->n; ++i )
+    {
+        D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] );
+        D[i] = 1.0 / D_inv[i];
+    }
+
+    /* to get convergence, A must have unit diagonal, so apply
+     * transformation DAD, where D = D(1./sqrt(D(A))) */
+    memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) );
+    #pragma omp parallel for schedule(guided) \
+        default(none) shared(DAD, D) private(i, pj)
+    for ( i = 0; i < A->n; ++i )
+    {
+        /* non-diagonals */
+        for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj )
+        {
+            DAD->j[pj] = A->j[pj];
+            DAD->val[pj] = D[i] * A->val[pj] * D[A->j[pj]];
+        }
+        /* diagonal */
+        DAD->j[pj] = A->j[pj];
+        DAD->val[pj] = 1.0;
+    }
+
+    /* initial guesses for L and U,
+     * assume: A and DAD symmetric and stored lower triangular */
+    memcpy( L_temp->start, DAD->start, sizeof(int) * (DAD->n + 1) );
+    memcpy( L_temp->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) );
+    memcpy( L_temp->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) );
+    /* store U^T in CSR for row-wise access and tranpose later */
+    memcpy( U_temp->start, DAD->start, sizeof(int) * (DAD->n + 1) );
+    memcpy( U_temp->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) );
+    memcpy( U_temp->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) );
+
+    /* L has unit diagonal, by convention */
+    #pragma omp parallel for schedule(guided) \
+        default(none) private(i) shared(L_temp)
+    for ( i = 0; i < A->n; ++i )
+    {
+        L_temp->val[L_temp->start[i + 1] - 1] = 1.0;
+    }
+
+    for ( i = 0; i < sweeps; ++i )
+    {
+        /* for each nonzero in L */
+        #pragma omp parallel for schedule(guided) \
+            default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum)
+        for ( j = 0; j < DAD->start[DAD->n]; ++j )
+        {
+            sum = ZERO;
+
+            /* determine row bounds of current nonzero */
+            x = 0;
+            ei_x = 0;
+            for ( k = 1; k <= DAD->n; ++k )
+            {
+                if ( DAD->start[k] > j )
+                {
+                    x = DAD->start[k - 1];
+                    ei_x = DAD->start[k];
+                    break;
+                }
+            }
+            /* determine column bounds of current nonzero */
+            y = DAD->start[DAD->j[j]];
+            ei_y = DAD->start[DAD->j[j] + 1];
+
+            /* sparse dot product:
+             *   dot( L(i,1:j-1), U(1:j-1,j) ) */
+            while ( L_temp->j[x] < L_temp->j[j] &&
+                    L_temp->j[y] < L_temp->j[j] &&
+                    x < ei_x && y < ei_y )
+            {
+                if ( L_temp->j[x] == L_temp->j[y] )
+                {
+                    sum += (L_temp->val[x] * U_temp->val[y]);
+                    ++x;
+                    ++y;
+                }
+                else if ( L_temp->j[x] < L_temp->j[y] )
+                {
+                    ++x;
+                }
+                else
+                {
+                    ++y;
+                }
+            }
+
+            if( j != ei_x - 1 )
+            {
+                L_temp->val[j] = ( DAD->val[j] - sum ) / U_temp->val[ei_y - 1];
+            }
+        }
+
+        #pragma omp parallel for schedule(guided) \
+            default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum)
+        for ( j = 0; j < DAD->start[DAD->n]; ++j )
+        {
+            sum = ZERO;
+
+            /* determine row bounds of current nonzero */
+            x = 0;
+            ei_x = 0;
+            for ( k = 1; k <= DAD->n; ++k )
+            {
+                if ( DAD->start[k] > j )
+                {
+                    x = DAD->start[k - 1];
+                    ei_x = DAD->start[k];
+                    break;
+                }
+            }
+            /* determine column bounds of current nonzero */
+            y = DAD->start[DAD->j[j]];
+            ei_y = DAD->start[DAD->j[j] + 1];
+
+            /* sparse dot product:
+             *   dot( L(i,1:i-1), U(1:i-1,j) ) */
+            while ( U_temp->j[x] < U_temp->j[j] &&
+                    U_temp->j[y] < U_temp->j[j] &&
+                    x < ei_x && y < ei_y )
+            {
+                if ( U_temp->j[x] == U_temp->j[y] )
+                {
+                    sum += (L_temp->val[y] * U_temp->val[x]);
+                    ++x;
+                    ++y;
+                }
+                else if ( U_temp->j[x] < U_temp->j[y] )
+                {
+                    ++x;
+                }
+                else
+                {
+                    ++y;
+                }
+            }
+
+            U_temp->val[j] = DAD->val[j] - sum;
+        }
+    }
+
+    /* apply inverse transformation:
+     * since DAD \approx LU, then
+     * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */
+    #pragma omp parallel for schedule(guided) \
+        default(none) shared(DAD, L_temp, U_temp, D_inv) private(i, pj)
+    for ( i = 0; i < DAD->n; ++i )
+    {
+        for ( pj = DAD->start[i]; pj < DAD->start[i + 1]; ++pj )
+        {
+            L_temp->val[pj] = D_inv[i] * L_temp->val[pj];
+            /* currently storing U^T, so use row index instead of column index */
+            U_temp->val[pj] = U_temp->val[pj] * D_inv[i];
+        }
+    }
+
+    /* apply the dropping rule */
+    Ltop = 0;
+    Utop = 0;
+    for ( i = 0; i < DAD->n; ++i )
+    {
+        L->start[i] = Ltop;
+        U->start[i] = Utop;
+
+        for ( pj = L_temp->start[i]; pj < L_temp->start[i + 1] - 1; ++pj )
+        {
+            if ( FABS( L_temp->val[pj] ) > FABS( droptol[i] / L_temp->val[L_temp->start[i + 1] - 1] ) )
+            {
+                L->j[Ltop] = L_temp->j[pj];
+                L->val[Ltop] = L_temp->val[pj];
+                ++Ltop;
+            }
+        }
+
+        /* diagonal */
+        L->j[Ltop] = L_temp->j[pj];
+        L->val[Ltop] = L_temp->val[pj];
+        ++Ltop;
+
+        for ( pj = U_temp->start[i]; pj < U_temp->start[i + 1] - 1; ++pj )
+        {
+            if ( FABS( U_temp->val[pj] ) > FABS( droptol[i] / U_temp->val[U_temp->start[i + 1] - 1] ) )
+            {
+                U->j[Utop] = U_temp->j[pj];
+                U->val[Utop] = U_temp->val[pj];
+                ++Utop;
+            }
+        }
+
+        /* diagonal */
+        U->j[Utop] = U_temp->j[pj];
+        U->val[Utop] = U_temp->val[pj];
+        ++Utop;
+    }
+
+    L->start[i] = Ltop;
+    U->start[i] = Utop;
+
+    Transpose_I( U );
+
+#if defined(DEBUG_FOCUS)
+    fprintf( stderr, "nnz(L): %d\n", L->start[L->n] );
+    fprintf( stderr, "nnz(U): %d\n", U->start[U->n] );
+#endif
+
+    Deallocate_Matrix( U_temp );
+    Deallocate_Matrix( L_temp );
+    Deallocate_Matrix( DAD );
+    free( D_inv );
+    free( D );
+
+    return Get_Timing_Info( start );
 }
 
 
@@ -1045,9 +1413,13 @@ static void Init_MatVec( const reax_system * const system, const control_params
 	    case DIAG_PC:
                 if ( workspace->Hdia_inv == NULL )
                 {
-                    workspace->Hdia_inv = (real *) calloc( system->N, sizeof( real ) );
+                    if ( ( workspace->Hdia_inv = (real *) calloc( system->N, sizeof( real ) ) ) == NULL )
+                    {
+                        fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
+                        exit( INSUFFICIENT_MEMORY );
+                    }
                 }
-                data->timing.pre_comp += diagonal_pre( system, workspace->Hdia_inv );
+                data->timing.pre_comp += diag_pre_comp( system, workspace->Hdia_inv );
 		break;
 	    case ICHOLT_PC:
                 Calculate_Droptol( workspace->H, workspace->droptol, control->pre_comp_droptol );
@@ -1061,7 +1433,7 @@ static void Init_MatVec( const reax_system * const system, const control_params
                             Allocate_Matrix( &(workspace->U), far_nbrs->n, fillin ) == 0 )
                     {
                         fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
-                        exit(INSUFFICIENT_SPACE);
+                        exit( INSUFFICIENT_MEMORY );
                     }
 #if defined(DEBUG)
                     fprintf( stderr, "fillin = %d\n", fillin );
@@ -1073,7 +1445,7 @@ static void Init_MatVec( const reax_system * const system, const control_params
                 data->timing.pre_comp += ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U );
 //                data->timing.pre_comp += ICHOLT( workspace->H_sp, workspace->droptol, workspace->L, workspace->U );
 		break;
-	    case ICHOL_PAR_PC:
+	    case ILU_PAR_PC:
                 if ( workspace->L == NULL )
                 {
                     /* factors have sparsity pattern as H */
@@ -1081,7 +1453,7 @@ static void Init_MatVec( const reax_system * const system, const control_params
                             Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 )
                     {
                         fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
-                        exit(INSUFFICIENT_SPACE);
+                        exit( INSUFFICIENT_MEMORY );
                     }
                 }
 
@@ -1097,7 +1469,7 @@ static void Init_MatVec( const reax_system * const system, const control_params
 //                     Allocate_Matrix( &U_test, 3, 6 ) == 0 )
 //                {
 //                    fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
-//                    exit(INSUFFICIENT_SPACE);
+//                    exit( INSUFFICIENT_MEMORY );
 //                }
 //
 //                //3x3, SPD, store lower half
@@ -1127,6 +1499,26 @@ static void Init_MatVec( const reax_system * const system, const control_params
 //                Print_Sparse_Matrix2( U_test, "U_SLU.out" );
 //                exit( 0 );
 		break;
+	    case ILUT_PAR_PC:
+                Calculate_Droptol( workspace->H, workspace->droptol, control->pre_comp_droptol );
+#if defined(DEBUG_FOCUS)
+                fprintf( stderr, "drop tolerances calculated\n" );
+#endif
+
+                if ( workspace->L == NULL )
+                {
+                    /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */
+                    if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 ||
+                            Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 )
+                    {
+                        fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
+                        exit( INSUFFICIENT_MEMORY );
+                    }
+                }
+
+                data->timing.pre_comp += ILUT_PAR( workspace->H, workspace->droptol, control->pre_comp_sweeps,
+                        workspace->L, workspace->U );
+                break;
 	    case ILU_SUPERLU_MT_PC:
                 if ( workspace->L == NULL )
                 {
@@ -1135,11 +1527,15 @@ static void Init_MatVec( const reax_system * const system, const control_params
                             Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 )
                     {
                         fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
-                        exit(INSUFFICIENT_SPACE);
+                        exit( INSUFFICIENT_MEMORY );
                     }
                 }
-
+#if defined(HAVE_SUPERLU_MT)
                 data->timing.pre_comp += SuperLU_Factorize( workspace->H, workspace->L, workspace->U );
+#else
+                fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" );
+                exit( INVALID_INPUT );
+#endif
 		break;
 	    default:
                 fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" );
@@ -1238,52 +1634,40 @@ void QEq( reax_system * const system, control_params * const control, simulation
     switch ( control->qeq_solver_type )
     {
         case GMRES_S:
-            matvecs = GMRES( workspace, workspace->H,
-                             workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log,
-                             &(data->timing.pre_app), &(data->timing.spmv) );
-            matvecs += GMRES( workspace, workspace->H,
-                              workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log, 
-                              &(data->timing.pre_app), &(data->timing.spmv) );
+            matvecs = GMRES( workspace, control, workspace->H, workspace->b_s, control->qeq_solver_q_err,
+                    workspace->s[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) );
+            matvecs += GMRES( workspace, control, workspace->H, workspace->b_t, control->qeq_solver_q_err,
+                    workspace->t[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) );
             break;
         case GMRES_H_S:
-            matvecs = GMRES_HouseHolder( workspace, workspace->H,
-                                         workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log );
-            matvecs += GMRES_HouseHolder( workspace, workspace->H,
-                                          workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log );
-            break;
-        case PGMRES_S:
-            matvecs = PGMRES( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err,
-                              workspace->L, workspace->U, workspace->s[0], out_control->log,
-                              &(data->timing.pre_app), &(data->timing.spmv) );
-            matvecs += PGMRES( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err,
-                               workspace->L, workspace->U, workspace->t[0], out_control->log,
-                               &(data->timing.pre_app), &(data->timing.spmv) );
-            break;
-        case PGMRES_J_S:
-            matvecs = PGMRES_Jacobi( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err,
-                                     workspace->L, workspace->U, workspace->s[0], control->pre_app_jacobi_iters,
-				     out_control->log, &(data->timing.pre_app), &(data->timing.spmv) );
-            matvecs += PGMRES_Jacobi( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err,
-                                      workspace->L, workspace->U, workspace->t[0], control->pre_app_jacobi_iters,
-				      out_control->log, &(data->timing.pre_app), &(data->timing.spmv) );
+            matvecs = GMRES_HouseHolder( workspace, control, workspace->H, workspace->b_s, control->qeq_solver_q_err,
+                    workspace->s[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) );
+            matvecs += GMRES_HouseHolder( workspace, control, workspace->H, workspace->b_t, control->qeq_solver_q_err,
+                    workspace->t[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) );
             break;
         case CG_S:
-            matvecs = CG( workspace, workspace->H,
-                          workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log ) + 1;
-            matvecs += CG( workspace, workspace->H,
-                           workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log ) + 1;
-            break;
-        case PCG_S:
-            matvecs = PCG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err,
-                         workspace->L, workspace->U, workspace->s[0], out_control->log ) + 1;
-            matvecs += PCG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err,
-                            workspace->L, workspace->U, workspace->t[0], out_control->log ) + 1;
+            matvecs = CG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err,
+                    workspace->s[0], out_control->log ) + 1;
+            matvecs += CG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err,
+                    workspace->t[0], out_control->log ) + 1;
+//            matvecs = CG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err,
+//                    workspace->L, workspace->U, workspace->s[0], control->pre_app_type,
+//                    control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1;
+//            matvecs += CG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err,
+//                    workspace->L, workspace->U, workspace->t[0], control->pre_app_type,
+//                    control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1;
             break;
         case SDM_S:
-            matvecs = SDM( workspace, workspace->H,
-                           workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log ) + 1;
-            matvecs += SDM( workspace, workspace->H,
-                            workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log ) + 1;
+            matvecs = SDM( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err,
+                    workspace->s[0], out_control->log ) + 1;
+            matvecs += SDM( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err,
+                    workspace->t[0], out_control->log ) + 1;
+//            matvecs = SDM( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err,
+//                    workspace->L, workspace->U, workspace->s[0], control->pre_app_type,
+//                    control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1;
+//            matvecs += SDM( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err,
+//                    workspace->L, workspace->U, workspace->t[0], control->pre_app_type,
+//                    control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1;
             break;
 	default:
             fprintf( stderr, "Unrecognized QEq solver selection. Terminating...\n" );
diff --git a/sPuReMD/src/allocate.c b/sPuReMD/src/allocate.c
index 56bacd6d70db16bdf0d5701c589aa11a782eb5d3..b4b6f90fe290669eb32ddf860e47fa1289976ebe 100644
--- a/sPuReMD/src/allocate.c
+++ b/sPuReMD/src/allocate.c
@@ -21,6 +21,38 @@
 
 #include "allocate.h"
 #include "list.h"
+#include "tool_box.h"
+
+/* allocate space for atoms */
+int PreAllocate_Space( reax_system *system, control_params *control,
+        static_storage *workspace )
+{
+    int i;
+
+    system->atoms = (reax_atom*) scalloc( system->N,
+            sizeof(reax_atom), "atoms" );
+    workspace->orig_id = (int*) scalloc( system->N,
+            sizeof(int), "orid_id" );
+
+    /* space for keeping restriction info, if any */
+    if ( control->restrict_bonds )
+    {
+        workspace->restricted = (int*) scalloc( system->N,
+                sizeof(int), "restricted_atoms" );
+
+        workspace->restricted_list = (int**) scalloc( system->N,
+                sizeof(int*), "restricted_list" );
+
+        for ( i = 0; i < system->N; ++i )
+        {
+            workspace->restricted_list[i] = (int*) scalloc( MAX_RESTRICT,
+                    sizeof(int), "restricted_list[i]" );
+        }
+    }
+
+    return SUCCESS;
+}
+
 
 void Reallocate_Neighbor_List( list *far_nbrs, int n, int num_intrs )
 {
@@ -28,7 +60,7 @@ void Reallocate_Neighbor_List( list *far_nbrs, int n, int num_intrs )
     if (!Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, far_nbrs ))
     {
         fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n");
-        exit( INIT_ERR );
+        exit( CANNOT_INITIALIZE );
     }
 
 #if defined(DEBUG_FOCUS)
@@ -55,16 +87,11 @@ int Allocate_Matrix( sparse_matrix **pH, int n, int m )
     {
         return 0;
     }
-    if ( (H->entries =
-                (sparse_matrix_entry*) malloc(sizeof(sparse_matrix_entry) * m)) == NULL )
+    if ( (H->j = (int*) malloc(sizeof(int) * m)) == NULL
+        || (H->val = (real*) malloc(sizeof(real) * m)) == NULL )
     {
         return 0;
     }
-//  if( ((H->j = (int*) malloc(sizeof(int)*m)) == NULL) ||
-//      ((H->val = (real*) malloc(sizeof(real)*m)) == NULL) )
-//  {
-//    return 0;
-//  }
 
     return 1;
 }
@@ -73,7 +100,8 @@ int Allocate_Matrix( sparse_matrix **pH, int n, int m )
 void Deallocate_Matrix( sparse_matrix *H )
 {
     free(H->start);
-    free(H->entries);
+    free(H->j);
+    free(H->val);
     free(H);
 }
 
@@ -111,7 +139,7 @@ int Allocate_HBond_List( int n, int num_h, int *h_index, int *hb_top,
     if ( !Make_List(num_h, num_hbonds, TYP_HBOND, hbonds ) )
     {
         fprintf( stderr, "not enough space for hbonds list. terminating!\n" );
-        exit( INIT_ERR );
+        exit( CANNOT_INITIALIZE );
     }
 
     for ( i = 0; i < n; ++i )
@@ -171,7 +199,7 @@ int Allocate_Bond_List( int n, int *bond_top, list *bonds )
     if ( !Make_List(n, num_bonds, TYP_BOND, bonds ) )
     {
         fprintf( stderr, "not enough space for bonds list. terminating!\n" );
-        exit( INIT_ERR );
+        exit( CANNOT_INITIALIZE );
     }
 
     Set_Start_Index( 0, 0, bonds );
@@ -274,7 +302,7 @@ void Reallocate( reax_system *system, static_storage *workspace, list **lists,
                          TYP_THREE_BODY, (*lists) + THREE_BODIES ) )
         {
             fprintf( stderr, "Problem in initializing angles list. Terminating!\n" );
-            exit( INIT_ERR );
+            exit( CANNOT_INITIALIZE );
         }
         realloc->num_3body = -1;
 #if defined(DEBUG_FOCUS)
diff --git a/sPuReMD/src/allocate.h b/sPuReMD/src/allocate.h
index 2581b3bab0976fe65e2ea14b80cee1a8267ee2a9..e8fdc4d70b96a5c39de5294187121644935e9630 100644
--- a/sPuReMD/src/allocate.h
+++ b/sPuReMD/src/allocate.h
@@ -24,6 +24,8 @@
 
 #include "mytypes.h"
 
+int PreAllocate_Space( reax_system*, control_params*, static_storage* );
+
 void Reallocate( reax_system*, static_storage*, list**, int );
 
 int Allocate_Matrix( sparse_matrix**, int, int );
diff --git a/sPuReMD/src/box.c b/sPuReMD/src/box.c
index c9bd50c4a14392759f0eb8935748b0b64a46ee63..6c2fda0ba797a4472333aceaa7ebc9e19ecad8df 100644
--- a/sPuReMD/src/box.c
+++ b/sPuReMD/src/box.c
@@ -20,89 +20,11 @@
   ----------------------------------------------------------------------*/
 
 #include "box.h"
+#include "tool_box.h"
 #include "vector.h"
 
 
-void Init_Box_From_CRYST(real a, real b, real c,
-                         real alpha, real beta, real gamma,
-                         simulation_box* box )
-{
-    double c_alpha, c_beta, c_gamma, s_gamma, zi;
-
-    c_alpha = cos(DEG2RAD(alpha));
-    c_beta  = cos(DEG2RAD(beta));
-    c_gamma = cos(DEG2RAD(gamma));
-    s_gamma = sin(DEG2RAD(gamma));
-
-    zi = (c_alpha - c_beta * c_gamma) / s_gamma;
-
-    box->box[0][0] = a;
-    box->box[0][1] = 0.0;
-    box->box[0][2] = 0.0;
-
-    box->box[1][0] = b * c_gamma;
-    box->box[1][1] = b * s_gamma;
-    box->box[1][2] = 0.0;
-
-    box->box[2][0] = c * c_beta;
-    box->box[2][1] = c * zi;
-    box->box[2][2] = c * SQRT(1.0 - SQR(c_beta) - SQR(zi));
-
-    Make_Consistent( box );
-
-#if defined(DEBUG_FOCUS)
-    fprintf( stderr, "box is %8.2f x %8.2f x %8.2f\n",
-             box->box[0][0], box->box[1][1], box->box[2][2] );
-#endif
-}
-
-
-void Update_Box( rtensor box_tensor, simulation_box* box )
-{
-    int i, j;
-
-    for (i = 0; i < 3; i++)
-        for (j = 0; j < 3; j++)
-            box->box[i][j] = box_tensor[i][j];
-
-    Make_Consistent( box );
-}
-
-
-void Update_Box_Isotropic( simulation_box *box, real mu )
-{
-    /*box->box[0][0] =
-      POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 );
-    box->box[1][1] = box->box[0][0] * box->side_prop[1];
-    box->box[2][2] = box->box[0][0] * box->side_prop[2];
-    */
-    rtensor_Copy( box->old_box, box->box );
-    box->box[0][0] *= mu;
-    box->box[1][1] *= mu;
-    box->box[2][2] *= mu;
-
-    box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2];
-    Make_Consistent(box/*, periodic*/);
-}
-
-
-void Update_Box_SemiIsotropic( simulation_box *box, rvec mu )
-{
-    /*box->box[0][0] =
-      POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 );
-    box->box[1][1] = box->box[0][0] * box->side_prop[1];
-    box->box[2][2] = box->box[0][0] * box->side_prop[2]; */
-    rtensor_Copy( box->old_box, box->box );
-    box->box[0][0] *= mu[0];
-    box->box[1][1] *= mu[1];
-    box->box[2][2] *= mu[2];
-
-    box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2];
-    Make_Consistent(box);
-}
-
-
-void Make_Consistent(simulation_box* box)
+void Make_Consistent( simulation_box* box )
 {
     real one_vol;
 
@@ -196,7 +118,6 @@ void Make_Consistent(simulation_box* box)
 //       fprintf(stderr,"\n");
 //     }
 
-
     box->g[0][0] = box->box[0][0] * box->box[0][0] +
                    box->box[0][1] * box->box[0][1] +
                    box->box[0][2] * box->box[0][2];
@@ -228,44 +149,85 @@ void Make_Consistent(simulation_box* box)
 }
 
 
-void Transform( rvec x1, simulation_box *box, char flag, rvec x2 )
+/* setup the simulation box */
+void Setup_Box( real a, real b, real c, real alpha, real beta, real gamma,
+        simulation_box* box )
 {
-    int i, j;
-    real tmp;
-
-    //  printf(">x1: (%lf, %lf, %lf)\n",x1[0],x1[1],x1[2]);
+    double c_alpha, c_beta, c_gamma, s_gamma, zi;
 
-    if (flag > 0)
-    {
-        for (i = 0; i < 3; i++)
-        {
-            tmp = 0.0;
-            for (j = 0; j < 3; j++)
-                tmp += box->trans[i][j] * x1[j];
-            x2[i] = tmp;
-        }
-    }
-    else
+    if ( IS_NAN_REAL(a) || IS_NAN_REAL(b) || IS_NAN_REAL(c)
+            || IS_NAN_REAL(alpha) || IS_NAN_REAL(beta) || IS_NAN_REAL(gamma) )
     {
-        for (i = 0; i < 3; i++)
-        {
-            tmp = 0.0;
-            for (j = 0; j < 3; j++)
-                tmp += box->trans_inv[i][j] * x1[j];
-            x2[i] = tmp;
-        }
+        fprintf( stderr, "Invalid simulation box boundaries for big box (NaN). Terminating...\n" );
+        exit( INVALID_INPUT );
     }
-    //  printf(">x2: (%lf, %lf, %lf)\n", x2[0], x2[1], x2[2]);
+
+    c_alpha = COS(DEG2RAD(alpha));
+    c_beta  = COS(DEG2RAD(beta));
+    c_gamma = COS(DEG2RAD(gamma));
+    s_gamma = SIN(DEG2RAD(gamma));
+    zi = (c_alpha - c_beta * c_gamma) / s_gamma;
+
+    box->box[0][0] = a;
+    box->box[0][1] = 0.0;
+    box->box[0][2] = 0.0;
+    box->box[1][0] = b * c_gamma;
+    box->box[1][1] = b * s_gamma;
+    box->box[1][2] = 0.0;
+    box->box[2][0] = c * c_beta;
+    box->box[2][1] = c * zi;
+    box->box[2][2] = c * SQRT(1.0 - SQR(c_beta) - SQR(zi));
+#if defined(DEBUG)
+    fprintf( stderr, "box is %8.2f x %8.2f x %8.2f\n",
+             box->box[0][0], box->box[1][1], box->box[2][2] );
+#endif
+
+    Make_Consistent( box );
+}
+
+
+void Update_Box( rtensor box_tensor, simulation_box* box )
+{
+    int i, j;
+
+    for (i = 0; i < 3; i++)
+        for (j = 0; j < 3; j++)
+            box->box[i][j] = box_tensor[i][j];
+
+    Make_Consistent( box );
 }
 
 
-void Transform_to_UnitBox( rvec x1, simulation_box *box, char flag, rvec x2 )
+void Update_Box_Isotropic( simulation_box *box, real mu )
 {
-    Transform( x1, box, flag, x2 );
+    /*box->box[0][0] =
+      POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 );
+    box->box[1][1] = box->box[0][0] * box->side_prop[1];
+    box->box[2][2] = box->box[0][0] * box->side_prop[2];
+    */
+    rtensor_Copy( box->old_box, box->box );
+    box->box[0][0] *= mu;
+    box->box[1][1] *= mu;
+    box->box[2][2] *= mu;
 
-    x2[0] /= box->box_norms[0];
-    x2[1] /= box->box_norms[1];
-    x2[2] /= box->box_norms[2];
+    box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2];
+    Make_Consistent(box/*, periodic*/);
+}
+
+
+void Update_Box_SemiIsotropic( simulation_box *box, rvec mu )
+{
+    /*box->box[0][0] =
+      POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 );
+    box->box[1][1] = box->box[0][0] * box->side_prop[1];
+    box->box[2][2] = box->box[0][0] * box->side_prop[2]; */
+    rtensor_Copy( box->old_box, box->box );
+    box->box[0][0] *= mu[0];
+    box->box[1][1] *= mu[1];
+    box->box[2][2] *= mu[2];
+
+    box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2];
+    Make_Consistent(box);
 }
 
 
@@ -278,7 +240,7 @@ void Inc_on_T3( rvec x, rvec dx, simulation_box *box )
     {
         tmp = x[i] + dx[i];
         if ( tmp <= -box->box_norms[i] || tmp >= box->box_norms[i] )
-            tmp = fmod( tmp, box->box_norms[i] );
+            tmp = FMOD( tmp, box->box_norms[i] );
 
         if ( tmp < 0 ) tmp += box->box_norms[i];
         x[i] = tmp;
@@ -369,7 +331,6 @@ real Metric_Product( rvec x1, rvec x2, simulation_box* box )
 }
 
 
-
 int Are_Far_Neighbors( rvec x1, rvec x2, simulation_box *box,
                        real cutoff, far_neighbor_data *data )
 {
@@ -417,7 +378,6 @@ int Are_Far_Neighbors( rvec x1, rvec x2, simulation_box *box,
 }
 
 
-
 /* Determines if the distance between x1 and x2 is < vlist_cut.
    If so, this neighborhood is added to the list of far neighbors.
    Periodic boundary conditions do not apply. */
@@ -617,7 +577,7 @@ void Get_Periodic_Far_Neighbors_Small_Box( rvec x1, rvec x2, simulation_box *box
 }*/
 
 
-void Print_Box_Information( simulation_box* box, FILE *out )
+void Print_Box( simulation_box* box, FILE *out )
 {
     int i, j;
 
diff --git a/sPuReMD/src/box.h b/sPuReMD/src/box.h
index de3e33debea62d57bd15731e97eadfdf623d96de..4902a3404bc60be6dc18a4f4185cf7da9f25d5a9 100644
--- a/sPuReMD/src/box.h
+++ b/sPuReMD/src/box.h
@@ -24,9 +24,7 @@
 
 #include "mytypes.h"
 
-/* Initializes box from CRYST1 line of PDB */
-void Init_Box_From_CRYST(real, real, real, real, real, real,
-                         simulation_box*/*, int*/);
+void Setup_Box( real, real, real, real, real, real, simulation_box* );
 
 /* Initializes box from box rtensor */
 void Update_Box(rtensor, simulation_box* /*, int*/);
@@ -37,12 +35,6 @@ void Update_Box_SemiIsotropic( simulation_box*, rvec /*, int*/ );
    metric and other quantities from box rtensor */
 void Make_Consistent(simulation_box*/*, int*/ );
 
-/* Applies transformation to and from
-   Cartesian to Triclinic coordinates based on flag */
-/* Use -1 flag for Cartesian -> Triclinic and +1 for otherway */
-void Transform( rvec, simulation_box*, char, rvec );
-void Transform_to_UnitBox( rvec, simulation_box*, char, rvec );
-
 int Are_Far_Neighbors( rvec, rvec, simulation_box*, real, far_neighbor_data* );
 void Get_NonPeriodic_Far_Neighbors( rvec, rvec, simulation_box*,
                                     control_params*, far_neighbor_data*, int* );
@@ -66,6 +58,6 @@ void Inc_on_T3( rvec, rvec, simulation_box* );
 
 real Metric_Product( rvec, rvec, simulation_box* );
 
-void Print_Box_Information( simulation_box*, FILE* );
+void Print_Box( simulation_box*, FILE* );
 
 #endif
diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c
new file mode 100644
index 0000000000000000000000000000000000000000..62311c5934d5da95ee7e5473f799c5ec506e0c20
--- /dev/null
+++ b/sPuReMD/src/control.c
@@ -0,0 +1,550 @@
+/*----------------------------------------------------------------------
+  SerialReax - Reax Force Field Simulator
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, haktulga@cs.purdue.edu
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include <ctype.h>
+
+#include "control.h"
+#include "traj.h"
+#include "tool_box.h"
+
+
+char Read_Control_File( FILE* fp, reax_system *system, control_params* control,
+        output_controls *out_control )
+{
+    char *s, **tmp;
+    int c, i;
+    real val;
+    int ival;
+
+    /* assign default values */
+    strcpy( control->sim_name, "default.sim" );
+
+    control->restart = 0;
+    out_control->restart_format = WRITE_BINARY;
+    out_control->restart_freq = 0;
+    strcpy( control->restart_from, "default.res" );
+    out_control->restart_freq = 0;
+    control->random_vel = 0;
+
+    control->reposition_atoms = 0;
+
+    control->ensemble = NVE;
+    control->nsteps = 0;
+    control->dt = 0.25;
+
+    control->geo_format = PDB;
+    control->restrict_bonds = 0;
+
+    control->periodic_boundaries = 1;
+    control->periodic_images[0] = 0;
+    control->periodic_images[1] = 0;
+    control->periodic_images[2] = 0;
+
+    control->reneighbor = 1;
+    control->vlist_cut = 0;
+    control->nbr_cut = 4.;
+    control->r_cut = 10;
+    control->max_far_nbrs = 1000;
+    control->bo_cut = 0.01;
+    control->thb_cut = 0.001;
+    control->hb_cut = 7.50;
+
+    control->tabulate = 0;
+
+    control->qeq_solver_type = GMRES_S;
+    control->qeq_solver_q_err = 0.000001;
+    control->pre_comp_type = ICHOLT_PC;
+    control->pre_comp_sweeps = 3;
+    control->pre_comp_refactor = 100;
+    control->pre_comp_droptol = 0.01;
+    control->pre_app_type = TRI_SOLVE_PA;
+    control->pre_app_jacobi_iters = 50;
+
+    control->T_init = 0.;
+    control->T_final = 300.;
+    control->Tau_T = 1.0;
+    control->T_mode = 0.;
+    control->T_rate = 1.;
+    control->T_freq = 1.;
+
+    control->P[0] = 0.000101325;
+    control->P[1] = 0.000101325;
+    control->P[2] = 0.000101325;
+    control->Tau_P[0]  = 500.0;
+    control->Tau_P[1]  = 500.0;
+    control->Tau_P[2]  = 500.0;
+    control->Tau_PT = 500.0;
+    control->compressibility = 1.0;
+    control->press_mode = 0;
+
+    control->remove_CoM_vel = 25;
+
+    out_control->debug_level = 0;
+    out_control->energy_update_freq = 10;
+
+    out_control->write_steps = 100;
+    out_control->traj_compress = 0;
+    out_control->write = fprintf;
+    out_control->traj_format = 0;
+    out_control->write_header =
+        (int (*)( reax_system*, control_params*,
+                  static_storage*, void* )) Write_Custom_Header;
+    out_control->append_traj_frame =
+        (int (*)( reax_system*, control_params*, simulation_data*,
+                  static_storage*, list **, void* )) Append_Custom_Frame;
+
+    strcpy( out_control->traj_title, "default_title" );
+    out_control->atom_format = 0;
+    out_control->bond_info = 0;
+    out_control->angle_info = 0;
+
+    control->molec_anal = NO_ANALYSIS;
+    control->freq_molec_anal = 0;
+    control->bg_cut = 0.3;
+    control->num_ignored = 0;
+    memset( control->ignore, 0, sizeof(int)*MAX_ATOM_TYPES );
+
+    control->dipole_anal = 0;
+    control->freq_dipole_anal = 0;
+
+    control->diffusion_coef = 0;
+    control->freq_diffusion_coef = 0;
+    control->restrict_type = 0;
+
+    /* memory allocations */
+    s = (char*) malloc(sizeof(char) * MAX_LINE);
+    tmp = (char**) malloc(sizeof(char*)*MAX_TOKENS);
+    for (i = 0; i < MAX_TOKENS; i++)
+        tmp[i] = (char*) malloc(sizeof(char) * MAX_LINE);
+
+    /* read control parameters file */
+    while (fgets(s, MAX_LINE, fp))
+    {
+        c = Tokenize(s, &tmp);
+
+        if ( strcmp(tmp[0], "simulation_name") == 0 )
+        {
+            strcpy( control->sim_name, tmp[1] );
+        }
+        //else if( strcmp(tmp[0], "restart") == 0 ) {
+        //  ival = atoi(tmp[1]);
+        //  control->restart = ival;
+        //}
+        else if ( strcmp(tmp[0], "restart_format") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->restart_format = ival;
+        }
+        else if ( strcmp(tmp[0], "restart_freq") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->restart_freq = ival;
+        }
+        else if ( strcmp(tmp[0], "random_vel") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->random_vel = ival;
+        }
+        else if ( strcmp(tmp[0], "reposition_atoms") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->reposition_atoms = ival;
+        }
+        else if ( strcmp(tmp[0], "ensemble_type") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->ensemble = ival;
+        }
+        else if ( strcmp(tmp[0], "nsteps") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->nsteps = ival;
+        }
+        else if ( strcmp(tmp[0], "dt") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->dt = val * 1.e-3;  // convert dt from fs to ps!
+        }
+        else if ( strcmp(tmp[0], "periodic_boundaries") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->periodic_boundaries = ival;
+        }
+        else if ( strcmp(tmp[0], "periodic_images") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->periodic_images[0] = ival;
+            ival = atoi(tmp[2]);
+            control->periodic_images[1] = ival;
+            ival = atoi(tmp[3]);
+            control->periodic_images[2] = ival;
+        }
+        else if ( strcmp(tmp[0], "geo_format") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->geo_format = ival;
+        }
+        else if ( strcmp(tmp[0], "restrict_bonds") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->restrict_bonds = ival;
+        }
+        else if ( strcmp(tmp[0], "tabulate_long_range") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->tabulate = ival;
+        }
+        else if ( strcmp(tmp[0], "reneighbor") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->reneighbor = ival;
+        }
+        else if ( strcmp(tmp[0], "vlist_buffer") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->vlist_cut = val;
+        }
+        else if ( strcmp(tmp[0], "nbrhood_cutoff") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->nbr_cut = val;
+        }
+        else if ( strcmp(tmp[0], "thb_cutoff") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->thb_cut = val;
+        }
+        else if ( strcmp(tmp[0], "hbond_cutoff") == 0 )
+        {
+            val = atof( tmp[1] );
+            control->hb_cut = val;
+        }
+        else if ( strcmp(tmp[0], "qeq_solver_type") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->qeq_solver_type = ival;
+        }
+        else if ( strcmp(tmp[0], "qeq_solver_q_err") == 0 )
+        {
+            val = atof( tmp[1] );
+            control->qeq_solver_q_err = val;
+        }
+        else if ( strcmp(tmp[0], "pre_comp_type") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->pre_comp_type = ival;
+        }
+        else if ( strcmp(tmp[0], "pre_comp_refactor") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->pre_comp_refactor = ival;
+        }
+        else if ( strcmp(tmp[0], "pre_comp_droptol") == 0 )
+        {
+            val = atof( tmp[1] );
+            control->pre_comp_droptol = val;
+        }
+        else if ( strcmp(tmp[0], "pre_comp_sweeps") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->pre_comp_sweeps = ival;
+        }
+        else if ( strcmp(tmp[0], "pre_app_type") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->pre_app_type = ival;
+        }
+        else if ( strcmp(tmp[0], "pre_app_jacobi_iters") == 0 )
+        {
+            ival = atoi( tmp[1] );
+            control->pre_app_jacobi_iters = ival;
+        }
+        else if ( strcmp(tmp[0], "temp_init") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->T_init = val;
+
+            if ( control->T_init < 0.001 )
+                control->T_init = 0.001;
+        }
+        else if ( strcmp(tmp[0], "temp_final") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->T_final = val;
+
+            if ( control->T_final < 0.1 )
+                control->T_final = 0.1;
+        }
+        else if ( strcmp(tmp[0], "t_mass") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->Tau_T = val * 1.e-3;    // convert t_mass from fs to ps
+        }
+        else if ( strcmp(tmp[0], "t_mode") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->T_mode = ival;
+        }
+        else if ( strcmp(tmp[0], "t_rate") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->T_rate = val;
+        }
+        else if ( strcmp(tmp[0], "t_freq") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->T_freq = val;
+        }
+        else if ( strcmp(tmp[0], "pressure") == 0 )
+        {
+            if ( control->ensemble == iNPT )
+            {
+                val = atof(tmp[1]);
+                control->P[0] = control->P[1] = control->P[2] = val;
+            }
+            else if ( control->ensemble == sNPT )
+            {
+                val = atof(tmp[1]);
+                control->P[0] = val;
+
+                val = atof(tmp[2]);
+                control->P[1] = val;
+
+                val = atof(tmp[3]);
+                control->P[2] = val;
+            }
+        }
+        else if ( strcmp(tmp[0], "p_mass") == 0 )
+        {
+            if ( control->ensemble == iNPT )
+            {
+                val = atof(tmp[1]);
+                control->Tau_P[0] = val * 1.e-3;   // convert p_mass from fs to ps
+            }
+            else if ( control->ensemble == sNPT )
+            {
+                val = atof(tmp[1]);
+                control->Tau_P[0] = val * 1.e-3;   // convert p_mass from fs to ps
+
+                val = atof(tmp[2]);
+                control->Tau_P[1] = val * 1.e-3;   // convert p_mass from fs to ps
+
+                val = atof(tmp[3]);
+                control->Tau_P[2] = val * 1.e-3;   // convert p_mass from fs to ps
+            }
+        }
+        else if ( strcmp(tmp[0], "pt_mass") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->Tau_PT = val * 1.e-3;  // convert pt_mass from fs to ps
+        }
+        else if ( strcmp(tmp[0], "compress") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->compressibility = val;
+        }
+        else if ( strcmp(tmp[0], "press_mode") == 0 )
+        {
+            val = atoi(tmp[1]);
+            control->press_mode = val;
+        }
+        else if ( strcmp(tmp[0], "remove_CoM_vel") == 0 )
+        {
+            val = atoi(tmp[1]);
+            control->remove_CoM_vel = val;
+        }
+        else if ( strcmp(tmp[0], "debug_level") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->debug_level = ival;
+        }
+        else if ( strcmp(tmp[0], "energy_update_freq") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->energy_update_freq = ival;
+        }
+        else if ( strcmp(tmp[0], "write_freq") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->write_steps = ival;
+        }
+        else if ( strcmp(tmp[0], "traj_compress") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->traj_compress = ival;
+
+            if ( out_control->traj_compress )
+                out_control->write = (int (*)(FILE *, const char *, ...)) gzprintf;
+            else out_control->write = fprintf;
+        }
+        else if ( strcmp(tmp[0], "traj_format") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->traj_format = ival;
+
+            if ( out_control->traj_format == 0 )
+            {
+                out_control->write_header =
+                    (int (*)( reax_system*, control_params*,
+                              static_storage*, void* )) Write_Custom_Header;
+                out_control->append_traj_frame =
+                    (int (*)(reax_system*, control_params*, simulation_data*,
+                             static_storage*, list **, void*)) Append_Custom_Frame;
+            }
+            else if ( out_control->traj_format == 1 )
+            {
+                out_control->write_header =
+                    (int (*)( reax_system*, control_params*,
+                              static_storage*, void* )) Write_xyz_Header;
+                out_control->append_traj_frame =
+                    (int (*)( reax_system*,  control_params*, simulation_data*,
+                              static_storage*, list **, void* )) Append_xyz_Frame;
+            }
+        }
+        else if ( strcmp(tmp[0], "traj_title") == 0 )
+        {
+            strcpy( out_control->traj_title, tmp[1] );
+        }
+        else if ( strcmp(tmp[0], "atom_info") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->atom_format += ival * 4;
+        }
+        else if ( strcmp(tmp[0], "atom_velocities") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->atom_format += ival * 2;
+        }
+        else if ( strcmp(tmp[0], "atom_forces") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->atom_format += ival * 1;
+        }
+        else if ( strcmp(tmp[0], "bond_info") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->bond_info = ival;
+        }
+        else if ( strcmp(tmp[0], "angle_info") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            out_control->angle_info = ival;
+        }
+        else if ( strcmp(tmp[0], "test_forces") == 0 )
+        {
+            ival = atoi(tmp[1]);
+        }
+        else if ( strcmp(tmp[0], "molec_anal") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->molec_anal = ival;
+        }
+        else if ( strcmp(tmp[0], "freq_molec_anal") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->freq_molec_anal = ival;
+        }
+        else if ( strcmp(tmp[0], "bond_graph_cutoff") == 0 )
+        {
+            val = atof(tmp[1]);
+            control->bg_cut = val;
+        }
+        else if ( strcmp(tmp[0], "ignore") == 0 )
+        {
+            control->num_ignored = atoi(tmp[1]);
+            for ( i = 0; i < control->num_ignored; ++i )
+                control->ignore[atoi(tmp[i + 2])] = 1;
+        }
+        else if ( strcmp(tmp[0], "dipole_anal") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->dipole_anal = ival;
+        }
+        else if ( strcmp(tmp[0], "freq_dipole_anal") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->freq_dipole_anal = ival;
+        }
+        else if ( strcmp(tmp[0], "diffusion_coef") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->diffusion_coef = ival;
+        }
+        else if ( strcmp(tmp[0], "freq_diffusion_coef") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->freq_diffusion_coef = ival;
+        }
+        else if ( strcmp(tmp[0], "restrict_type") == 0 )
+        {
+            ival = atoi(tmp[1]);
+            control->restrict_type = ival;
+        }
+        else
+        {
+            fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] );
+            exit( 15 );
+        }
+    }
+
+    if (ferror(fp))
+    {
+        fprintf(stderr, "Error reading control file. Terminating.\n");
+        exit( INVALID_INPUT );
+    }
+
+    /* determine target T */
+    if ( control->T_mode == 0 )
+        control->T = control->T_final;
+    else control->T = control->T_init;
+
+
+    /* near neighbor and far neighbor cutoffs */
+    control->bo_cut = 0.01 * system->reaxprm.gp.l[29];
+    control->r_low  = system->reaxprm.gp.l[11];
+    control->r_cut  = system->reaxprm.gp.l[12];
+    control->vlist_cut += control->r_cut;
+
+    system->g.cell_size = control->vlist_cut / 2.;
+    for ( i = 0; i < 3; ++i )
+    {
+        system->g.spread[i] = 2;
+    }
+
+    /* free memory allocations at the top */
+    for ( i = 0; i < MAX_TOKENS; i++ )
+    {
+        free( tmp[i] );
+    }
+    free( tmp );
+    free( s );
+
+#if defined(DEBUG_FOCUS)
+    fprintf( stderr,
+             "en=%d steps=%d dt=%.5f opt=%d T=%.5f P=%.5f %.5f %.5f\n",
+             control->ensemble, control->nsteps, control->dt, control->tabulate,
+             control->T, control->P[0], control->P[1], control->P[2] );
+
+    fprintf(stderr, "control file read\n" );
+#endif
+
+    return SUCCESS;
+}
diff --git a/sPuReMD/src/control.h b/sPuReMD/src/control.h
new file mode 100644
index 0000000000000000000000000000000000000000..66d0dde7b4901d7a7b42512414328a8e6b256d83
--- /dev/null
+++ b/sPuReMD/src/control.h
@@ -0,0 +1,29 @@
+/*----------------------------------------------------------------------
+  SerialReax - Reax Force Field Simulator
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, haktulga@cs.purdue.edu
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __CONTROL_H_
+#define __CONTROL_H_
+
+#include "mytypes.h"
+
+char Read_Control_File( FILE*, reax_system*, control_params*, output_controls* );
+
+#endif
diff --git a/sPuReMD/src/param.c b/sPuReMD/src/ffield.c
similarity index 58%
rename from sPuReMD/src/param.c
rename to sPuReMD/src/ffield.c
index 4cae89e79cfc93db2e32c9cec68563c76d7f4208..088e1acbe2aa7588cb6e40b19626038d4257e56e 100644
--- a/sPuReMD/src/param.c
+++ b/sPuReMD/src/ffield.c
@@ -19,85 +19,10 @@
   <http://www.gnu.org/licenses/>.
   ----------------------------------------------------------------------*/
 
-#include "param.h"
-#include "traj.h"
-#include "ctype.h"
-
-
-int Get_Atom_Type( reax_interaction *reaxprm, char *s )
-{
-    int i;
-
-    for ( i = 0; i < reaxprm->num_atom_types; ++i )
-        if ( !strcmp( reaxprm->sbp[i].name, s ) )
-            return i;
-
-    fprintf( stderr, "Unknown atom type %s. Terminating...\n", s );
-    exit( UNKNOWN_ATOM_TYPE_ERR );
-}
-
-
-int Tokenize(char* s, char*** tok)
-{
-    char test[MAX_LINE];
-    char *sep = "\t \n!=";
-    char *word;
-    int count = 0;
-
-    strncpy( test, s, MAX_LINE );
-
-    // fprintf( stderr, "|%s|\n", test );
-
-    for ( word = strtok(test, sep); word; word = strtok(NULL, sep) )
-    {
-        strncpy( (*tok)[count], word, MAX_LINE );
-        count++;
-    }
-
-    return count;
-}
-
-
-/* Initialize Taper params */
-void Init_Taper( control_params *control )
-{
-    real d1, d7;
-    real swa, swa2, swa3;
-    real swb, swb2, swb3;
-
-    swa = control->r_low;
-    swb = control->r_cut;
-
-    if ( fabs( swa ) > 0.01 )
-        fprintf( stderr, "Warning: non-zero value for lower Taper-radius cutoff\n" );
-
-    if ( swb < 0 )
-    {
-        fprintf( stderr, "Negative value for upper Taper-radius cutoff\n" );
-        exit( INVALID_INPUT );
-    }
-    else if ( swb < 5 )
-        fprintf( stderr, "Warning: low value for upper Taper-radius cutoff:%f\n",
-                 swb );
-
-    d1 = swb - swa;
-    d7 = POW( d1, 7.0 );
-    swa2 = SQR( swa );
-    swa3 = CUBE( swa );
-    swb2 = SQR( swb );
-    swb3 = CUBE( swb );
-
-    control->Tap7 =  20.0 / d7;
-    control->Tap6 = -70.0 * (swa + swb) / d7;
-    control->Tap5 =  84.0 * (swa2 + 3.0 * swa * swb + swb2) / d7;
-    control->Tap4 = -35.0 * (swa3 + 9.0 * swa2 * swb + 9.0 * swa * swb2 + swb3 ) / d7;
-    control->Tap3 = 140.0 * (swa3 * swb + 3.0 * swa2 * swb2 + swa * swb3 ) / d7;
-    control->Tap2 = -210.0 * (swa3 * swb2 + swa2 * swb3) / d7;
-    control->Tap1 = 140.0 * swa3 * swb3 / d7;
-    control->Tap0 = (-35.0 * swa3 * swb2 * swb2 + 21.0 * swa2 * swb3 * swb2 +
-                     7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7;
-}
+#include <ctype.h>
 
+#include "ffield.h"
+#include "tool_box.h"
 
 
 char Read_Force_Field( FILE* fp, reax_interaction* reax )
@@ -452,7 +377,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
     }
 
     /* calculating combination rules and filling up remaining fields. */
-
     for (i = 0; i < reax->num_atom_types; i++)
         for (j = i; j < reax->num_atom_types; j++)
         {
@@ -471,10 +395,10 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
             reax->tbp[j][i].r_pp = 0.5 *
                                    (reax->sbp[j].r_pi_pi + reax->sbp[i].r_pi_pi);
 
-
             reax->tbp[i][j].p_boc3 =
                 sqrt(reax->sbp[i].b_o_132 *
                      reax->sbp[j].b_o_132);
+
             reax->tbp[j][i].p_boc3 =
                 sqrt(reax->sbp[j].b_o_132 *
                      reax->sbp[i].b_o_132);
@@ -493,7 +417,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
                 sqrt(reax->sbp[j].b_o_133 *
                      reax->sbp[i].b_o_133);
 
-
             reax->tbp[i][j].D =
                 sqrt(reax->sbp[i].epsilon *
                      reax->sbp[j].epsilon);
@@ -534,7 +457,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
 
         }
 
-
     /* next line is number of 2-body offdiagonal combinations and some comments */
     /* these are two body offdiagonal terms that are different from the
        combination rules defined above */
@@ -596,7 +518,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
         }
     }
 
-
     /* 3-body parameters -
        supports multi-well potentials (upto MAX_3BODY_PARAM in mytypes.h) */
     /* clear entries first */
@@ -657,7 +578,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
         }
     }
 
-
     /* 4-body parameters are entered in compact form. i.e. 0-X-Y-0
        correspond to any type of pair of atoms in 1 and 4
        position. However, explicit X-Y-Z-W takes precedence over the
@@ -763,8 +683,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
         }
     }
 
-
-
     /* next line is number of hydrogen bond params and some comments */
     fgets( s, MAX_LINE, fp );
     c = Tokenize( s, &tmp );
@@ -779,7 +697,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
         k = atoi(tmp[1]) - 1;
         m = atoi(tmp[2]) - 1;
 
-
         if (j < reax->num_atom_types && m < reax->num_atom_types)
         {
             val = atof(tmp[3]);
@@ -797,14 +714,12 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
         }
     }
 
-
     /* deallocate helper storage */
     for ( i = 0; i < MAX_TOKENS; i++ )
         free( tmp[i] );
     free( tmp );
     free( s );
 
-
     /* deallocate tor_flag */
     for ( i = 0; i < reax->num_atom_types; i++ )
     {
@@ -823,523 +738,5 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax )
     fprintf( stderr, "force field read\n" );
 #endif
 
-    return 0;
-}
-
-
-char Read_Control_File( FILE* fp, reax_system *system, control_params* control,
-                        output_controls *out_control )
-{
-    char *s, **tmp;
-    int c, i;
-    real val;
-    int ival;
-
-    /* assign default values */
-    strcpy( control->sim_name, "default.sim" );
-
-    control->restart = 0;
-    out_control->restart_format = WRITE_BINARY;
-    out_control->restart_freq = 0;
-    strcpy( control->restart_from, "default.res" );
-    out_control->restart_freq = 0;
-    control->random_vel = 0;
-
-    control->reposition_atoms = 0;
-
-    control->ensemble = NVE;
-    control->nsteps = 0;
-    control->dt = 0.25;
-
-    control->geo_format = PDB;
-    control->restrict_bonds = 0;
-
-    control->periodic_boundaries = 1;
-    control->periodic_images[0] = 0;
-    control->periodic_images[1] = 0;
-    control->periodic_images[2] = 0;
-
-    control->reneighbor = 1;
-    control->vlist_cut = 0;
-    control->nbr_cut = 4.;
-    control->r_cut = 10;
-    control->max_far_nbrs = 1000;
-    control->bo_cut = 0.01;
-    control->thb_cut = 0.001;
-    control->hb_cut = 7.50;
-
-    control->tabulate = 0;
-
-    control->qeq_solver_type = PGMRES_S;
-    control->qeq_solver_q_err = 0.000001;
-    control->pre_comp_type = ICHOLT_PC;
-    control->pre_comp_sweeps = ICHOLT_PC;
-    control->pre_comp_refactor = 100;
-    control->pre_comp_droptol = 0.01;
-    control->pre_app_jacobi_iters = 10;
-
-    control->T_init = 0.;
-    control->T_final = 300.;
-    control->Tau_T = 1.0;
-    control->T_mode = 0.;
-    control->T_rate = 1.;
-    control->T_freq = 1.;
-
-    control->P[0] = 0.000101325;
-    control->P[1] = 0.000101325;
-    control->P[2] = 0.000101325;
-    control->Tau_P[0]  = 500.0;
-    control->Tau_P[1]  = 500.0;
-    control->Tau_P[2]  = 500.0;
-    control->Tau_PT = 500.0;
-    control->compressibility = 1.0;
-    control->press_mode = 0;
-
-    control->remove_CoM_vel = 25;
-
-    out_control->debug_level = 0;
-    out_control->energy_update_freq = 10;
-
-    out_control->write_steps = 100;
-    out_control->traj_compress = 0;
-    out_control->write = fprintf;
-    out_control->traj_format = 0;
-    out_control->write_header =
-        (int (*)( reax_system*, control_params*,
-                  static_storage*, void* )) Write_Custom_Header;
-    out_control->append_traj_frame =
-        (int (*)( reax_system*, control_params*, simulation_data*,
-                  static_storage*, list **, void* )) Append_Custom_Frame;
-
-    strcpy( out_control->traj_title, "default_title" );
-    out_control->atom_format = 0;
-    out_control->bond_info = 0;
-    out_control->angle_info = 0;
-
-    control->molec_anal = NO_ANALYSIS;
-    control->freq_molec_anal = 0;
-    control->bg_cut = 0.3;
-    control->num_ignored = 0;
-    memset( control->ignore, 0, sizeof(int)*MAX_ATOM_TYPES );
-
-    control->dipole_anal = 0;
-    control->freq_dipole_anal = 0;
-
-    control->diffusion_coef = 0;
-    control->freq_diffusion_coef = 0;
-    control->restrict_type = 0;
-
-    /* memory allocations */
-    s = (char*) malloc(sizeof(char) * MAX_LINE);
-    tmp = (char**) malloc(sizeof(char*)*MAX_TOKENS);
-    for (i = 0; i < MAX_TOKENS; i++)
-        tmp[i] = (char*) malloc(sizeof(char) * MAX_LINE);
-
-    /* read control parameters file */
-    while (fgets(s, MAX_LINE, fp))
-    {
-        c = Tokenize(s, &tmp);
-
-        if ( strcmp(tmp[0], "simulation_name") == 0 )
-        {
-            strcpy( control->sim_name, tmp[1] );
-        }
-        //else if( strcmp(tmp[0], "restart") == 0 ) {
-        //  ival = atoi(tmp[1]);
-        //  control->restart = ival;
-        //}
-        else if ( strcmp(tmp[0], "restart_format") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->restart_format = ival;
-        }
-        else if ( strcmp(tmp[0], "restart_freq") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->restart_freq = ival;
-        }
-        else if ( strcmp(tmp[0], "random_vel") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->random_vel = ival;
-        }
-        else if ( strcmp(tmp[0], "reposition_atoms") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->reposition_atoms = ival;
-        }
-        else if ( strcmp(tmp[0], "ensemble_type") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->ensemble = ival;
-        }
-        else if ( strcmp(tmp[0], "nsteps") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->nsteps = ival;
-        }
-        else if ( strcmp(tmp[0], "dt") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->dt = val * 1.e-3;  // convert dt from fs to ps!
-        }
-        else if ( strcmp(tmp[0], "periodic_boundaries") == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->periodic_boundaries = ival;
-        }
-        else if ( strcmp(tmp[0], "periodic_images") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->periodic_images[0] = ival;
-            ival = atoi(tmp[2]);
-            control->periodic_images[1] = ival;
-            ival = atoi(tmp[3]);
-            control->periodic_images[2] = ival;
-        }
-        else if ( strcmp(tmp[0], "geo_format") == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->geo_format = ival;
-        }
-        else if ( strcmp(tmp[0], "restrict_bonds") == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->restrict_bonds = ival;
-        }
-        else if ( strcmp(tmp[0], "tabulate_long_range") == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->tabulate = ival;
-        }
-        else if ( strcmp(tmp[0], "reneighbor") == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->reneighbor = ival;
-        }
-        else if ( strcmp(tmp[0], "vlist_buffer") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->vlist_cut = val;
-        }
-        else if ( strcmp(tmp[0], "nbrhood_cutoff") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->nbr_cut = val;
-        }
-        else if ( strcmp(tmp[0], "thb_cutoff") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->thb_cut = val;
-        }
-        else if ( strcmp(tmp[0], "hbond_cutoff") == 0 )
-        {
-            val = atof( tmp[1] );
-            control->hb_cut = val;
-        }
-        else if ( strcmp(tmp[0], "qeq_solver_type") == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->qeq_solver_type = ival;
-        }
-        else if ( strcmp(tmp[0], "qeq_solver_q_err") == 0 )
-        {
-            val = atof( tmp[1] );
-            control->qeq_solver_q_err = val;
-        }
-        else if ( strcmp(tmp[0], "pre_comp_type") == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->pre_comp_type = ival;
-        }
-        else if ( strcmp(tmp[0], "pre_comp_refactor") == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->pre_comp_refactor = ival;
-        }
-        else if ( strcmp(tmp[0], "pre_comp_droptol") == 0 )
-        {
-            val = atof( tmp[1] );
-            control->pre_comp_droptol = val;
-        }
-        else if ( strcmp(tmp[0], "pre_comp_sweeps") == 0 )
-        {
-            ival = atoi( tmp[1] );
-            control->pre_comp_sweeps = ival;
-        }
-        else if ( strcmp(tmp[0], "pre_app_jacobi_iters") == 0 )
-        {
-            val = atof( tmp[1] );
-            control->pre_app_jacobi_iters = val;
-        }
-        else if ( strcmp(tmp[0], "temp_init") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->T_init = val;
-
-            if ( control->T_init < 0.001 )
-                control->T_init = 0.001;
-        }
-        else if ( strcmp(tmp[0], "temp_final") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->T_final = val;
-
-            if ( control->T_final < 0.1 )
-                control->T_final = 0.1;
-        }
-        else if ( strcmp(tmp[0], "t_mass") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->Tau_T = val * 1.e-3;    // convert t_mass from fs to ps
-        }
-        else if ( strcmp(tmp[0], "t_mode") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->T_mode = ival;
-        }
-        else if ( strcmp(tmp[0], "t_rate") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->T_rate = val;
-        }
-        else if ( strcmp(tmp[0], "t_freq") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->T_freq = val;
-        }
-        else if ( strcmp(tmp[0], "pressure") == 0 )
-        {
-            if ( control->ensemble == iNPT )
-            {
-                val = atof(tmp[1]);
-                control->P[0] = control->P[1] = control->P[2] = val;
-            }
-            else if ( control->ensemble == sNPT )
-            {
-                val = atof(tmp[1]);
-                control->P[0] = val;
-
-                val = atof(tmp[2]);
-                control->P[1] = val;
-
-                val = atof(tmp[3]);
-                control->P[2] = val;
-            }
-        }
-        else if ( strcmp(tmp[0], "p_mass") == 0 )
-        {
-            if ( control->ensemble == iNPT )
-            {
-                val = atof(tmp[1]);
-                control->Tau_P[0] = val * 1.e-3;   // convert p_mass from fs to ps
-            }
-            else if ( control->ensemble == sNPT )
-            {
-                val = atof(tmp[1]);
-                control->Tau_P[0] = val * 1.e-3;   // convert p_mass from fs to ps
-
-                val = atof(tmp[2]);
-                control->Tau_P[1] = val * 1.e-3;   // convert p_mass from fs to ps
-
-                val = atof(tmp[3]);
-                control->Tau_P[2] = val * 1.e-3;   // convert p_mass from fs to ps
-            }
-        }
-        else if ( strcmp(tmp[0], "pt_mass") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->Tau_PT = val * 1.e-3;  // convert pt_mass from fs to ps
-        }
-        else if ( strcmp(tmp[0], "compress") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->compressibility = val;
-        }
-        else if ( strcmp(tmp[0], "press_mode") == 0 )
-        {
-            val = atoi(tmp[1]);
-            control->press_mode = val;
-        }
-        else if ( strcmp(tmp[0], "remove_CoM_vel") == 0 )
-        {
-            val = atoi(tmp[1]);
-            control->remove_CoM_vel = val;
-        }
-        else if ( strcmp(tmp[0], "debug_level") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->debug_level = ival;
-        }
-        else if ( strcmp(tmp[0], "energy_update_freq") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->energy_update_freq = ival;
-        }
-        else if ( strcmp(tmp[0], "write_freq") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->write_steps = ival;
-        }
-        else if ( strcmp(tmp[0], "traj_compress") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->traj_compress = ival;
-
-            if ( out_control->traj_compress )
-                out_control->write = (int (*)(FILE *, const char *, ...)) gzprintf;
-            else out_control->write = fprintf;
-        }
-        else if ( strcmp(tmp[0], "traj_format") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->traj_format = ival;
-
-            if ( out_control->traj_format == 0 )
-            {
-                out_control->write_header =
-                    (int (*)( reax_system*, control_params*,
-                              static_storage*, void* )) Write_Custom_Header;
-                out_control->append_traj_frame =
-                    (int (*)(reax_system*, control_params*, simulation_data*,
-                             static_storage*, list **, void*)) Append_Custom_Frame;
-            }
-            else if ( out_control->traj_format == 1 )
-            {
-                out_control->write_header =
-                    (int (*)( reax_system*, control_params*,
-                              static_storage*, void* )) Write_xyz_Header;
-                out_control->append_traj_frame =
-                    (int (*)( reax_system*,  control_params*, simulation_data*,
-                              static_storage*, list **, void* )) Append_xyz_Frame;
-            }
-        }
-        else if ( strcmp(tmp[0], "traj_title") == 0 )
-        {
-            strcpy( out_control->traj_title, tmp[1] );
-        }
-        else if ( strcmp(tmp[0], "atom_info") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->atom_format += ival * 4;
-        }
-        else if ( strcmp(tmp[0], "atom_velocities") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->atom_format += ival * 2;
-        }
-        else if ( strcmp(tmp[0], "atom_forces") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->atom_format += ival * 1;
-        }
-        else if ( strcmp(tmp[0], "bond_info") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->bond_info = ival;
-        }
-        else if ( strcmp(tmp[0], "angle_info") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            out_control->angle_info = ival;
-        }
-        else if ( strcmp(tmp[0], "test_forces") == 0 )
-        {
-            ival = atoi(tmp[1]);
-        }
-        else if ( strcmp(tmp[0], "molec_anal") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->molec_anal = ival;
-        }
-        else if ( strcmp(tmp[0], "freq_molec_anal") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->freq_molec_anal = ival;
-        }
-        else if ( strcmp(tmp[0], "bond_graph_cutoff") == 0 )
-        {
-            val = atof(tmp[1]);
-            control->bg_cut = val;
-        }
-        else if ( strcmp(tmp[0], "ignore") == 0 )
-        {
-            control->num_ignored = atoi(tmp[1]);
-            for ( i = 0; i < control->num_ignored; ++i )
-                control->ignore[atoi(tmp[i + 2])] = 1;
-        }
-        else if ( strcmp(tmp[0], "dipole_anal") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->dipole_anal = ival;
-        }
-        else if ( strcmp(tmp[0], "freq_dipole_anal") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->freq_dipole_anal = ival;
-        }
-        else if ( strcmp(tmp[0], "diffusion_coef") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->diffusion_coef = ival;
-        }
-        else if ( strcmp(tmp[0], "freq_diffusion_coef") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->freq_diffusion_coef = ival;
-        }
-        else if ( strcmp(tmp[0], "restrict_type") == 0 )
-        {
-            ival = atoi(tmp[1]);
-            control->restrict_type = ival;
-        }
-        else
-        {
-            fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] );
-            exit( 15 );
-        }
-    }
-
-    if (ferror(fp))
-    {
-        fprintf(stderr, "Error reading control file. Terminating.\n");
-        exit( INVALID_INPUT );
-    }
-
-    /* determine target T */
-    if ( control->T_mode == 0 )
-        control->T = control->T_final;
-    else control->T = control->T_init;
-
-
-    /* near neighbor and far neighbor cutoffs */
-    control->bo_cut = 0.01 * system->reaxprm.gp.l[29];
-    control->r_low  = system->reaxprm.gp.l[11];
-    control->r_cut  = system->reaxprm.gp.l[12];
-    control->vlist_cut += control->r_cut;
-
-    system->g.cell_size = control->vlist_cut / 2.;
-    for ( i = 0; i < 3; ++i )
-        system->g.spread[i] = 2;
-
-
-    /* Initialize Taper function */
-    Init_Taper( control );
-
-
-    /* free memory allocations at the top */
-    for ( i = 0; i < MAX_TOKENS; i++ )
-        free( tmp[i] );
-    free( tmp );
-    free( s );
-
-#if defined(DEBUG_FOCUS)
-    fprintf( stderr,
-             "en=%d steps=%d dt=%.5f opt=%d T=%.5f P=%.5f %.5f %.5f\n",
-             control->ensemble, control->nsteps, control->dt, control->tabulate,
-             control->T, control->P[0], control->P[1], control->P[2] );
-
-    fprintf(stderr, "control file read\n" );
-#endif
-    return 0;
+    return SUCCESS;
 }
diff --git a/sPuReMD/src/param.h b/sPuReMD/src/ffield.h
similarity index 76%
rename from sPuReMD/src/param.h
rename to sPuReMD/src/ffield.h
index 838bb1a0d004672978a35f94547d511104aba9ea..4aaf32a644861b069e8cf87e2eec68aadf4d3c84 100644
--- a/sPuReMD/src/param.h
+++ b/sPuReMD/src/ffield.h
@@ -19,22 +19,10 @@
   <http://www.gnu.org/licenses/>.
   ----------------------------------------------------------------------*/
 
-#ifndef __PARAM_H_
-#define __PARAM_H_
+#ifndef __FFIELD_H_
+#define __FFIELD_H_
 
 #include "mytypes.h"
-
-#define MAX_LINE 1024
-#define MAX_TOKENS 20
-#define MAX_TOKEN_LEN 1024
-
-int  Get_Atom_Type( reax_interaction*, char* );
-
-int  Tokenize( char*, char*** );
-
 char Read_Force_Field( FILE*, reax_interaction* );
 
-char Read_Control_File( FILE*, reax_system*, control_params*,
-                        output_controls* );
-
 #endif
diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c
index 13c57e63aad64529ca69aa44e05215ace7c4a76f..2746ede2bcc068e90dfd3bee3692999eaec303c9 100644
--- a/sPuReMD/src/forces.c
+++ b/sPuReMD/src/forces.c
@@ -187,7 +187,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n,
             fprintf( stderr,
                      "step%d - ran out of space on H matrix: Htop=%d, max = %d",
                      step, Htop, Hmax );
-            exit(INSUFFICIENT_SPACE);
+            exit( INSUFFICIENT_MEMORY );
         }
     }
 
@@ -206,7 +206,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n,
     {
         fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n",
                  step, flag, End_Index(flag, bonds), Start_Index(flag + 1, bonds) );
-        exit(INSUFFICIENT_SPACE);
+        exit( INSUFFICIENT_MEMORY );
     }
 
     if ( End_Index(i, bonds) >= bonds->num_intrs - 2 )
@@ -217,7 +217,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n,
         {
             fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d bond_end=%d\n",
                      step, flag, End_Index(i, bonds), bonds->num_intrs );
-            exit(INSUFFICIENT_SPACE);
+            exit( INSUFFICIENT_MEMORY );
         }
     }
 
@@ -240,7 +240,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n,
         {
             fprintf( stderr, "step%d-hbondchk failed: i=%d end(i)=%d str(i+1)=%d\n",
                      step, flag, End_Index(flag, hbonds), Start_Index(flag + 1, hbonds) );
-            exit(INSUFFICIENT_SPACE);
+            exit( INSUFFICIENT_MEMORY );
         }
 
         if ( Num_Entries(i, hbonds) >=
@@ -252,7 +252,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n,
             {
                 fprintf( stderr, "step%d-hbondchk failed: i=%d end(i)=%d hbondend=%d\n",
                          step, flag, End_Index(i, hbonds), hbonds->num_intrs );
-                exit(INSUFFICIENT_SPACE);
+                exit( INSUFFICIENT_MEMORY );
             }
         }
     }
@@ -372,15 +372,15 @@ void Init_Forces( reax_system *system, control_params *control,
                 dr3gamij_1 = ( r_ij * r_ij * r_ij + twbp->gamma );
                 dr3gamij_3 = POW( dr3gamij_1 , 0.33333333333333 );
 
-                H->entries[Htop].j = j;
-                H->entries[Htop].val = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3;
+                H->j[Htop] = j;
+                H->val[Htop] = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3;
                 ++Htop;
 
                 /* H_sp matrix entry */
                 if ( flag_sp )
                 {
-                    H_sp->entries[H_sp_top].j = j;
-                    H_sp->entries[H_sp_top].val = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3;
+                    H_sp->j[H_sp_top] = j;
+                    H_sp->val[H_sp_top] = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3;
                     ++H_sp_top;
                 }
 
@@ -538,12 +538,12 @@ void Init_Forces( reax_system *system, control_params *control,
             }
         }
 
-        H->entries[Htop].j = i;
-        H->entries[Htop].val = system->reaxprm.sbp[type_i].eta;
+        H->j[Htop] = i;
+        H->val[Htop] = system->reaxprm.sbp[type_i].eta;
         ++Htop;
 
-        H_sp->entries[H_sp_top].j = i;
-        H_sp->entries[H_sp_top].val = system->reaxprm.sbp[type_i].eta;
+        H_sp->j[H_sp_top] = i;
+        H_sp->val[H_sp_top] = system->reaxprm.sbp[type_i].eta;
         ++H_sp_top;
 
         Set_End_Index( i, btop_i, bonds );
@@ -663,8 +663,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
                       t->ele[r].a;
                 val *= EV_to_KCALpMOL / C_ele;
 
-                H->entries[Htop].j = j;
-                H->entries[Htop].val = self_coef * val;
+                H->j[Htop] = j;
+                H->val[Htop] = self_coef * val;
                 ++Htop;
 
                 /* hydrogen bond lists */
@@ -793,8 +793,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
             }
         }
 
-        H->entries[Htop].j = i;
-        H->entries[Htop].val = system->reaxprm.sbp[type_i].eta;
+        H->j[Htop] = i;
+        H->val[Htop] = system->reaxprm.sbp[type_i].eta;
         ++Htop;
 
         Set_End_Index( i, btop_i, bonds );
diff --git a/sPuReMD/src/pdb_tools.c b/sPuReMD/src/geo_tools.c
similarity index 52%
rename from sPuReMD/src/pdb_tools.c
rename to sPuReMD/src/geo_tools.c
index e7b1b57d07b91d5bf1c5f96eaf2b314ee55c6f02..01f404b4f6d51e73722b9f450f9ca8873dc3c9c9 100644
--- a/sPuReMD/src/pdb_tools.c
+++ b/sPuReMD/src/geo_tools.c
@@ -19,143 +19,262 @@
   <http://www.gnu.org/licenses/>.
   ----------------------------------------------------------------------*/
 
-#include "pdb_tools.h"
+#include <ctype.h>
+
+#include "geo_tools.h"
+#include "allocate.h"
 #include "box.h"
 #include "list.h"
-#include "param.h"
 #include "restart.h"
-#include "ctype.h"
+#include "tool_box.h"
+#include "vector.h"
 
 
-int is_Valid_Serial( static_storage *workspace, int serial )
+/********************* geo format routines ******************/
+void Count_Geo_Atoms( FILE *geo, reax_system *system )
 {
-    if ( workspace->map_serials[ serial ] < 0 )
+    int i, serial;
+    rvec x;
+    char element[3], name[9], line[MAX_LINE + 1];
+
+    /* total number of atoms */
+    fscanf( geo, " %d", &(system->N) );
+
+    /* count atoms */
+    for ( i = 0; i < system->N; ++i )
     {
-        fprintf( stderr, "CONECT line includes invalid pdb serial number %d.\n",
-                 serial );
-        fprintf( stderr, "Please correct the input file.Terminating...\n" );
-        exit( INVALID_INPUT );
+        fscanf( geo, CUSTOM_ATOM_FORMAT,
+                &serial, element, name, &x[0], &x[1], &x[2] );
+        Fit_to_Periodic_Box( &(system->box), &x );
     }
 
-    return 1;
+    fseek( geo, 0, SEEK_SET ); // set the pointer to the beginning of the file
+    fgets( line, MAX_LINE, geo );
+    fgets( line, MAX_LINE, geo );
+
+#if defined(DEBUG_FOCUS)
+    fprintf( stderr, "N = %d\n\n", system->N );
+#endif
 }
 
 
-int Check_Input_Range( int val, int lo, int hi, char *message )
+char Read_Geo( char* geo_file, reax_system* system, control_params *control,
+               simulation_data *data, static_storage *workspace )
 {
-    if ( val < lo || val > hi )
+
+    FILE *geo;
+    char descriptor[9];
+    int i, serial, top;
+    real box_x, box_y, box_z, alpha, beta, gamma;
+    rvec x;
+    char element[3], name[9];
+    reax_atom *atom;
+
+    /* open the geometry file */
+    if ( (geo = fopen(geo_file, "r")) == NULL )
     {
-        fprintf( stderr, "%s\nInput %d - Out of range %d-%d. Terminating...\n",
-                 message, val, lo, hi );
-        exit( INVALID_INPUT );
+        fprintf( stderr, "Error opening the geo file! terminating...\n" );
+        exit( FILE_NOT_FOUND );
     }
 
-    return 1;
-}
+    /* read box information */
+    fscanf( geo, CUSTOM_BOXGEO_FORMAT,
+            descriptor, &box_x, &box_y, &box_z, &alpha, &beta, &gamma );
+    /* initialize the box */
+    Setup_Box( box_x, box_y, box_z, alpha, beta, gamma, &(system->box) );
 
+    /* count my atoms & allocate storage */
+    Count_Geo_Atoms( geo, system );
+    if ( PreAllocate_Space( system, control, workspace ) == FAILURE )
+    {
+        fprintf( stderr, "PreAllocate_Space: not enough memory!" );
+        fprintf( stderr, "terminating...\n" );
+        exit( INSUFFICIENT_MEMORY );
+    }
 
-void Trim_Spaces( char *element )
-{
-    int i, j;
+    /* read in my atom info */
+    top = 0;
+    for ( i = 0; i < system->N; ++i )
+    {
+        fscanf( geo, CUSTOM_ATOM_FORMAT,
+                &serial, element, name, &x[0], &x[1], &x[2] );
+        Fit_to_Periodic_Box( &(system->box), &x );
+#if defined(DEBUG)
+        fprintf( stderr, "atom%d: %s %s %f %f %f\n",
+                 serial, element, name, x[0], x[1], x[2] );
+#endif
+
+        atom = &(system->atoms[top]);
+        workspace->orig_id[i] = serial;
+        atom->type = Get_Atom_Type( &(system->reaxprm), element );
+        strcpy( atom->name, name );
+        rvec_Copy( atom->x, x );
+        rvec_MakeZero( atom->v );
+        rvec_MakeZero( atom->f );
+        atom->q = 0.;
 
-    for ( i = 0; element[i] == ' '; ++i ); // skip initial space chars
+        top++;
+    }
+
+    fclose( geo );
+
+#if defined(DEBUG_FOCUS)
+    fprintf( stderr, "finished reading the geo file\n" );
+#endif
 
-    for ( j = i; j < strlen(element) && element[j] != ' '; ++j )
-        element[j - i] = toupper( element[j] ); // make uppercase, move to beginning
-    element[j - i] = 0; // finalize the string
+    return SUCCESS;
 }
 
 
-char Read_PDB( char* pdb_file, reax_system* system, control_params *control,
-               simulation_data *data, static_storage *workspace )
+int Read_Box_Info( reax_system *system, FILE *geo, int geo_format )
 {
+    char *cryst;
+    char  line[MAX_LINE + 1];
+    char  descriptor[9];
+    char  s_a[12], s_b[12], s_c[12], s_alpha[12], s_beta[12], s_gamma[12];
+    char  s_group[12], s_zValue[12];
 
-    FILE *pdb;
-    char **tmp;
-    char *s, *s1;
-    char descriptor[9], serial[9];
-    char atom_name[9], res_name[9], res_seq[9];
-    char s_x[9], s_y[9], s_z[9];
-    char occupancy[9], temp_factor[9];
-    char seg_id[9], element[9], charge[9];
-    char alt_loc, chain_id, icode;
-    char s_a[10], s_b[10], s_c[10], s_alpha[9], s_beta[9], s_gamma[9];
-    char s_group[12], s_zValue[9];
-    char *endptr = NULL;
-    int  i, c, c1, pdb_serial, ratom = 0;
-    /* open pdb file */
-    if ( (pdb = fopen(pdb_file, "r")) == NULL )
+    /* initialize variables */
+    fseek( geo, 0, SEEK_SET ); // set the pointer to the beginning of the file
+
+    switch ( geo_format )
     {
-        fprintf( stderr, "Error opening the pdb file!\n" );
-        exit( FILE_NOT_FOUND_ERR );
+        case PDB:
+            cryst = "CRYST1";
+            break;
+        default:
+            cryst = "BOX";
     }
 
+    /* locate the cryst line in the geo file, read it and
+       initialize the big box */
+    while ( fgets( line, MAX_LINE, geo ) )
+    {
+        if ( strncmp( line, cryst, 6 ) == 0 )
+        {
+            if ( geo_format == PDB )
+                sscanf( line, PDB_CRYST1_FORMAT,
+                        &descriptor[0],
+                        &s_a[0], &s_b[0], &s_c[0],
+                        &s_alpha[0], &s_beta[0], &s_gamma[0],
+                        &s_group[0], &s_zValue[0] );
+
+            /* compute full volume tensor from the angles */
+            Setup_Box( atof(s_a),  atof(s_b), atof(s_c),
+                    atof(s_alpha), atof(s_beta), atof(s_gamma),
+                    &(system->box) );
+            return SUCCESS;
+        }
+    }
+    if ( ferror( geo ) )
+    {
+        return FAILURE;
+    }
 
-    /* allocate memory for tokenizing pdb lines */
-    s =   (char*)  malloc( sizeof(char)  * MAX_LINE );
-    s1 =  (char*)  malloc( sizeof(char)  * MAX_LINE );
-    tmp = (char**) malloc( sizeof(char*) * MAX_TOKENS );
-    for ( i = 0; i < MAX_TOKENS; i++ )
-        tmp[i] = (char*) malloc( sizeof(char) * MAX_TOKEN_LEN );
+    return FAILURE;
+}
 
 
-    /* count number of atoms in the pdb file */
-    system->N = 0;
-    s[0] = 0;
-    while (fgets( s, MAX_LINE, pdb ))
-    {
+void Count_PDB_Atoms( FILE *geo, reax_system *system )
+{
+    char *endptr = NULL;
+    char line[MAX_LINE + 1];
+    char s_x[9], s_y[9], s_z[9];
+    rvec x;
 
-        tmp[0][0] = 0;
-        c = Tokenize( s, &tmp );
+    /* initialize variables */
+    fseek( geo, 0, SEEK_SET ); /* set the pointer to the beginning of the file */
+    system->N = 0;
 
-        if ( strncmp( tmp[0], "ATOM", 4 ) == 0 ||
-                strncmp( tmp[0], "HETATM", 6 ) == 0 )
-            (system->N)++;
-        s[0] = 0;
-    }
-    if (ferror(pdb))
+    /* increment number of atoms for each line denoting an atom desc */
+    while ( fgets( line, MAX_LINE, geo ) )
     {
-        fprintf(stderr, "Error reading PDB file. Terminating.\n");
-        exit( INVALID_INPUT );
+        if ( strncmp( line, "ATOM", 4 ) == 0 ||
+                strncmp( line, "HETATM", 6 ) == 0 )
+        {
+            system->N++;
+
+            strncpy( s_x, line + 30, 8 );
+            s_x[8] = 0;
+            strncpy( s_y, line + 38, 8 );
+            s_y[8] = 0;
+            strncpy( s_z, line + 46, 8 );
+            s_z[8] = 0;
+            Make_Point( strtod( s_x, &endptr ), strtod( s_y, &endptr ),
+                        strtod( s_z, &endptr ), &x );
+            Fit_to_Periodic_Box( &(system->box), &x );
+        }
     }
 
-    fclose(pdb);
-#if defined(DEBUG_FOCUS)
-    fprintf( stderr, "system->N: %d\n", system->N );
+#if defined(DEBUG)
+    fprintf( stderr, "count atoms:\n" );
+    fprintf( stderr, "N = %d\n\n", system->N );
 #endif
+}
 
-    /* memory allocations for atoms, atom maps, bond restrictions */
-    system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) );
-
-    workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) );
-    for ( i = 0; i < MAX_ATOM_ID; ++i )
-        workspace->map_serials[i] = -1;
 
-    workspace->orig_id = (int*) calloc( system->N, sizeof(int) );
-    workspace->restricted  = (int*) calloc( system->N, sizeof(int) );
-    workspace->restricted_list = (int**) calloc( system->N, sizeof(int*) );
-    for ( i = 0; i < system->N; ++i )
-        workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) );
+char Read_PDB( char* pdb_file, reax_system* system, control_params *control,
+               simulation_data *data, static_storage *workspace )
+{
 
+    FILE  *pdb;
+    char **tmp;
+    char  *s, *s1;
+    char   descriptor[9], serial[9];
+    char   atom_name[9], res_name[9], res_seq[9];
+    char   s_x[9], s_y[9], s_z[9];
+    char   occupancy[9], temp_factor[9];
+    char   seg_id[9], element[9], charge[9];
+    char   alt_loc, chain_id, icode;
+    char  *endptr = NULL;
+    int    i, c, c1, pdb_serial, top;
+    rvec   x;
+    reax_atom *atom;
 
-    /* start reading and processing pdb file */
+    /* open pdb file */
     if ( (pdb = fopen(pdb_file, "r")) == NULL )
     {
-        fprintf( stderr, "Error opening the pdb file!\n" );
-        exit( FILE_NOT_FOUND_ERR );
+        fprintf( stderr, "fopen: error opening the pdb file! terminating...\n" );
+        exit( FILE_NOT_FOUND );
     }
-    c = 0;
-    c1 = 0;
 
-    while (!feof(pdb))
+    /* allocate memory for tokenizing pdb lines */
+    if ( Allocate_Tokenizer_Space( &s, &s1, &tmp ) == FAILURE )
     {
-        /* clear previous input line */
-        s[0] = 0;
-        for ( i = 0; i < c1; ++i )
-            tmp[i][0] = 0;
+        fprintf( stderr, "Allocate_Tokenizer_Space: not enough memory!" );
+        fprintf( stderr, "terminating...\n" );
+        exit( INSUFFICIENT_MEMORY );
+    }
+
+    /* read box information */
+    if ( Read_Box_Info( system, pdb, PDB ) == FAILURE )
+    {
+        fprintf( stderr, "Read_Box_Info: no CRYST line in the pdb file!" );
+        fprintf( stderr, "terminating...\n" );
+        exit( INVALID_GEO );
+    }
+
+    Count_PDB_Atoms( pdb, system );
+    if ( PreAllocate_Space( system, control, workspace ) == FAILURE )
+    {
+        fprintf( stderr, "PreAllocate_Space: not enough memory!" );
+        fprintf( stderr, "terminating...\n" );
+        exit( INSUFFICIENT_MEMORY );
+    }
+
+    /* start reading and processing the pdb file */
+#if defined(DEBUG_FOCUS)
+    fprintf( stderr, "starting to read the pdb file\n" );
+#endif
+    fseek( pdb, 0, SEEK_SET );
+    c  = 0;
+    c1 = 0;
+    top = 0;
+    s[0] = 0;
 
+    while ( fgets( s, MAX_LINE, pdb ) )
+    {
         /* read new line and tokenize it */
-        fgets( s, MAX_LINE, pdb );
         strncpy( s1, s, MAX_LINE - 1 );
         c1 = Tokenize( s, &tmp );
 
@@ -170,6 +289,8 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control,
                 serial[5] = 0;
                 strncpy( &atom_name[0], s1 + 12, 4 );
                 atom_name[4] = 0;
+                //strncpy( &serial[0], s1+6, 7 );       serial[7] = 0;
+                //strncpy( &atom_name[0], s1+13, 3 );   atom_name[3] = 0;
                 alt_loc = s1[16];
                 strncpy( &res_name[0], s1 + 17, 3 );
                 res_name[3] = 0;
@@ -202,6 +323,8 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control,
                 serial[5] = 0;
                 strncpy( &atom_name[0], s1 + 12, 4 );
                 atom_name[4] = 0;
+                //strncpy( &serial[0], s1+6, 7 );       serial[7] = 0;
+                //strncpy( &atom_name[0], s1+13, 3 );   atom_name[3] = 0;
                 alt_loc = s1[16];
                 strncpy( &res_name[0], s1 + 17, 3 );
                 res_name[3] = 0;
@@ -226,193 +349,194 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control,
                 charge[2] = 0;
             }
 
+            /* if the point is inside my_box, add it to my lists */
+            Make_Point( strtod( &s_x[0], &endptr ),
+                        strtod( &s_y[0], &endptr ),
+                        strtod( &s_z[0], &endptr ), &x );
 
-            /* add to mapping */
-            pdb_serial = strtod( &serial[0], &endptr );
-            Check_Input_Range( pdb_serial, 0, MAX_ATOM_ID, "Invalid pdb_serial" );
-            workspace->map_serials[ pdb_serial ] = c;
-            workspace->orig_id[ c ] = pdb_serial;
-            // fprintf( stderr, "map %d --> %d\n", pdb_serial, c );
-
+            Fit_to_Periodic_Box( &(system->box), &x );
 
-            /* copy atomic positions */
-            system->atoms[c].x[0] = strtod( &s_x[0], &endptr );
-            system->atoms[c].x[1] = strtod( &s_y[0], &endptr );
-            system->atoms[c].x[2] = strtod( &s_z[0], &endptr );
+            /* store orig_id, type, name and coord info of the new atom */
+            atom = &(system->atoms[top]);
+            pdb_serial = (int) strtod( &serial[0], &endptr );
+            workspace->orig_id[top] = pdb_serial;
 
-            /* atom name and type */
-            strcpy( system->atoms[c].name, atom_name );
             Trim_Spaces( element );
-            system->atoms[c].type = Get_Atom_Type( &(system->reaxprm), element );
+            atom->type = Get_Atom_Type( &(system->reaxprm), element );
+            strcpy( atom->name, atom_name );
+
+            rvec_Copy( atom->x, x );
+            rvec_MakeZero( atom->v );
+            rvec_MakeZero( atom->f );
+            atom->q = 0;
+
+            top++;
+            // fprintf( stderr, "p%d: %6d%2d x:%8.3f%8.3f%8.3f"
+            //                  "q:%8.3f occ:%s temp:%s seg:%s elmnt:%s\n",
+            //       system->my_rank,
+            //       c, system->my_atoms[top].type,
+            //       system->my_atoms[top].x[0],
+            //       system->my_atoms[top].x[1],
+            //       system->my_atoms[top].x[2],
+            //       system->my_atoms[top].q, occupancy, temp_factor,
+            //       seg_id, element );
+
+            //fprintf( stderr, "atom( %8.3f %8.3f %8.3f ) --> p%d\n",
+            // system->my_atoms[top].x[0], system->my_atoms[top].x[1],
+            // system->my_atoms[top].x[2], system->my_rank );
 
-            /* fprintf( stderr,
-            "%d%8.3f%8.3f%8.3fq:%8.3f occ:%s temp:%s seg_id:%s element:%s\n",
-             system->atoms[c].type,
-             system->atoms[c].x[0], system->atoms[c].x[1], system->atoms[c].x[2],
-             system->atoms[c].q, occupancy, temp_factor, seg_id, element ); */
             c++;
         }
-        else if (!strncmp( tmp[0], "CRYST1", 6 ))
-        {
-            sscanf( s1, PDB_CRYST1_FORMAT,
-                    &descriptor[0],
-                    &s_a[0],
-                    &s_b[0],
-                    &s_c[0],
-                    &s_alpha[0],
-                    &s_beta[0],
-                    &s_gamma[0],
-                    &s_group[0],
-                    &s_zValue[0] );
-
-            /* Compute full volume tensor from the angles */
-            Init_Box_From_CRYST( atof(s_a),  atof(s_b), atof(s_c),
-                                 atof(s_alpha), atof(s_beta), atof(s_gamma),
-                                 &(system->box) );
-        }
 
         /* IMPORTANT: We do not check for the soundness of restrictions here.
-           When atom2 is on atom1's restricted list, and there is a restriction on
-           atom2, then atom1 has to be on atom2's restricted list, too. However,
-           we do not check if this is the case in the input file,
+           When atom2 is on atom1's restricted list, and there is a restriction
+           on atom2, then atom1 has to be on atom2's restricted list, too.
+           However, we do not check if this is the case in the input file,
            this is upto the user. */
         else if (!strncmp( tmp[0], "CONECT", 6 ))
         {
-            /* error check */
-            //fprintf(stderr, "CONECT: %d\n", c1 );
-            Check_Input_Range( c1 - 2, 0, MAX_RESTRICT,
-                               "CONECT line exceeds max restrictions allowed.\n" );
-
-            /* read bond restrictions */
-            if ( is_Valid_Serial( workspace, pdb_serial = atoi(tmp[1]) ) )
-                ratom = workspace->map_serials[ pdb_serial ];
-
-            workspace->restricted[ ratom ] = c1 - 2;
-            for ( i = 2; i < c1; ++i )
+            if ( control->restrict_bonds )
             {
-                if ( is_Valid_Serial( workspace, pdb_serial = atoi(tmp[i]) ) )
-                    workspace->restricted_list[ ratom ][ i - 2 ] =
-                        workspace->map_serials[ pdb_serial ];
+                /* error check */
+                // Check_Input_Range( c1 - 2, 0, MAX_RESTRICT,
+                // "CONECT line exceeds max num restrictions allowed.\n" );
+
+                /* read bond restrictions */
+                // if( is_Valid_Serial( workspace, pdb_serial = atoi(tmp[1]) ) )
+                //   ratom = workspace->map_serials[ pdb_serial ];
+
+                // workspace->restricted[ ratom ] = c1 - 2;
+                // for( i = 2; i < c1; ++i )
+                //  {
+                //    if( is_Valid_Serial(workspace, pdb_serial = atoi(tmp[i])) )
+                //        workspace->restricted_list[ ratom ][ i-2 ] =
+                //          workspace->map_serials[ pdb_serial ];
+                //  }
+
+                // fprintf( stderr, "restriction on %d:", ratom );
+                // for( i = 0; i < workspace->restricted[ ratom ]; ++i )
+                // fprintf( stderr, "  %d",
+                //          workspace->restricted_list[ratom][i] );
+                // fprintf( stderr, "\n" );
             }
-
-            /* fprintf( stderr, "restriction on %d:", ratom );
-            for( i = 0; i < workspace->restricted[ ratom ]; ++i )
-             fprintf( stderr, "  %d", workspace->restricted_list[ratom][i] );
-             fprintf( stderr, "\n" ); */
         }
+
+        /* clear previous input line */
+        s[0] = 0;
+        for ( i = 0; i < c1; ++i )
+            tmp[i][0] = 0;
+    }
+    if ( ferror( pdb ) )
+    {
+        return FAILURE;
     }
 
-    fclose(pdb);
+    fclose( pdb );
 
 #if defined(DEBUG_FOCUS)
-    fprintf( stderr, "pdb file read\n" );
+    fprintf( stderr, "finished reading the pdb file\n" );
 #endif
 
-    return 1;
-}
+    return SUCCESS;
+} 
 
 
-char Write_PDB( reax_system* system, control_params *control,
-                simulation_data *data, static_storage *workspace,
-                list* bonds, output_controls *out_control )
+/* PDB serials are written without regard to the order, we'll see if this
+   cause trouble, if so we'll have to rethink this approach
+   Also, we do not write connect lines yet.
+*/
+char Write_PDB( reax_system* system, list* bonds, simulation_data *data,
+        control_params *control, static_storage *workspace, output_controls *out_control )
 {
-    int  i, j, k, count;
-    int  connect[4];
-    char temp[MAX_STR], name[10];
-    real bo;
+    int i, buffer_req, buffer_len;
+    //int j, connect[4];
+    char name[8];
+    //real bo;
     real alpha, beta, gamma;
+    reax_atom *p_atom;
+    char fname[MAX_STR];
+    char *line;
+    char *buffer;
+    FILE *pdb;
 
+    /* Allocation */
+    line = (char*) smalloc( sizeof(char) * PDB_ATOM_FORMAT_O_LENGTH, "geo:line" );
+    buffer_req = system->N * PDB_ATOM_FORMAT_O_LENGTH;
 
-    /* open output pdb file */
-    sprintf( temp, "%s%d.pdb", control->sim_name, data->step );
-    if ( (out_control->pdb = fopen( temp, "w" )) == NULL )
-    {
-        fprintf( stderr, "Error opening the pdb output file!\n" );
-        exit( FILE_NOT_FOUND_ERR );
-    }
-
+    buffer = (char*) smalloc( sizeof(char) * buffer_req, "geo:buffer" );
 
+    pdb = NULL;
+    line[0] = 0;
+    buffer[0] = 0;
     /* Writing Box information */
-    /* Write full volume tensor from the angles (as soon as possible) TODO_SOON */
-    gamma = acos( (system->box.box[0][0] * system->box.box[1][0] +
+    gamma = ACOS( (system->box.box[0][0] * system->box.box[1][0] +
                    system->box.box[0][1] * system->box.box[1][1] +
                    system->box.box[0][2] * system->box.box[1][2]) /
-                  (system->box.box_norms[0] * system->box.box_norms[1]));
-    beta  = acos( (system->box.box[0][0] * system->box.box[2][0] +
+                  (system->box.box_norms[0] * system->box.box_norms[1]) );
+    beta  = ACOS( (system->box.box[0][0] * system->box.box[2][0] +
                    system->box.box[0][1] * system->box.box[2][1] +
                    system->box.box[0][2] * system->box.box[2][2]) /
-                  (system->box.box_norms[0] * system->box.box_norms[2]));
-    alpha = acos( (system->box.box[2][0] * system->box.box[1][0] +
+                  (system->box.box_norms[0] * system->box.box_norms[2]) );
+    alpha = ACOS( (system->box.box[2][0] * system->box.box[1][0] +
                    system->box.box[2][1] * system->box.box[1][1] +
                    system->box.box[2][2] * system->box.box[1][2]) /
-                  (system->box.box_norms[2] * system->box.box_norms[1]));
-
-    fprintf(out_control->pdb, PDB_CRYST1_FORMAT_O,
-            "CRYST1",
-            system->box.box_norms[0],
-            system->box.box_norms[1],
-            system->box.box_norms[2],
-            RAD2DEG(alpha),
-            RAD2DEG(beta),
-            RAD2DEG(gamma),
-            " ",
-            0);
+                  (system->box.box_norms[2] * system->box.box_norms[1]) );
+
+    /*open pdb and write header*/
+    sprintf(fname, "%s-%d.pdb", control->sim_name, data->step);
+    pdb = fopen(fname, "w");
+    fprintf( pdb, PDB_CRYST1_FORMAT_O,
+             "CRYST1",
+             system->box.box_norms[0], system->box.box_norms[1],
+             system->box.box_norms[2],
+             RAD2DEG(alpha), RAD2DEG(beta), RAD2DEG(gamma), " ", 0 );
     fprintf( out_control->log, "Box written\n" );
     fflush( out_control->log );
 
-    /* Writing atom information */
-    for (i = 0; i < system->N; i++)
+    /*write atom lines to buffer*/
+    for ( i = 0; i < system->N; i++)
     {
-        strncpy( name, system->reaxprm.sbp[system->atoms[i].type].name, 2 );
-        name[2] = '\0';
-        fprintf( out_control->pdb, PDB_ATOM_FORMAT_O,
-                 "ATOM  ",
-                 workspace->orig_id[i],
-                 name,
-                 ' ',
-                 "REX",
-                 ' ',
-                 1,
-                 ' ',
-                 system->atoms[i].x[0],
-                 system->atoms[i].x[1],
-                 system->atoms[i].x[2],
-                 1.0,
-                 0.0,
-                 "0",
-                 name,
-                 "  " );
+        p_atom = &(system->atoms[i]);
+        strncpy(name, p_atom->name, 8);
+        Trim_Spaces(name);
+        sprintf( line, PDB_ATOM_FORMAT_O,
+                 "ATOM  ", workspace->orig_id[i], p_atom->name, ' ', "REX", ' ', 1, ' ',
+                 p_atom->x[0], p_atom->x[1], p_atom->x[2],
+                 1.0, 0.0, "0", name, "  " );
+        fprintf( stderr, "PDB NAME <%s>\n", p_atom->name );
+        strncpy( buffer + i * PDB_ATOM_FORMAT_O_LENGTH, line,
+                 PDB_ATOM_FORMAT_O_LENGTH );
     }
 
-    fprintf( out_control->log, "ATOM written\n" );
-    fflush( out_control->log );
+    buffer_len = system->N * PDB_ATOM_FORMAT_O_LENGTH;
+    buffer[buffer_len] = 0;
 
-    /* Writing connect information */
-    for (i = 0; i < system->N; i++)
-    {
-        count = 0;
+    fprintf( pdb, "%s", buffer );
+    fclose( pdb );
 
-        for (j = Start_Index(i, bonds); j < End_Index(i, bonds); ++j)
-        {
-            bo = bonds->select.bond_list[j].bo_data.BO;
-            if (bo > 0.3)
-            {
-                connect[count] = workspace->orig_id[bonds->select.bond_list[j].nbr];
-                count++;
-            }
+    /* Writing connect information */
+    /*
+    for(i=0; i < system->N; i++) {
+      count = 0;
+      for(j = Start_Index(i, bonds); j < End_Index(i, bonds); ++j) {
+        bo = bonds->bond_list[j].bo_data.BO;
+        if (bo > 0.3) {
+          connect[count] = bonds->bond_list[j].nbr+1;
+          count++;
         }
+      }
 
-        fprintf( out_control->pdb, "%6s%6d", "CONECT", workspace->orig_id[i] );
-        for ( k = 0; k < count; k++ )
-            fprintf( out_control->pdb, "%6d", connect[k] );
-        fprintf( out_control->pdb, "\n" );
+      fprintf( out_control->pdb, "%6s%5d", "CONECT", i+1 );
+      for( k=0; k < count; k++ )
+        fprintf( out_control->pdb, "%5d", connect[k] );
+      fprintf( out_control->pdb, "\n" );
     }
+    */
 
-    fprintf( out_control->pdb, "END\n" );
-
-    fclose( out_control->pdb );
+    free(buffer);
+    free(line);
 
-    return 1;
+    return SUCCESS;
 }
 
 
@@ -436,67 +560,70 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control,
     if ( (bgf = fopen( bgf_file, "r" )) == NULL )
     {
         fprintf( stderr, "Error opening the bgf file!\n" );
-        exit( FILE_NOT_FOUND_ERR );
+        exit( FILE_NOT_FOUND );
     }
 
-
     /* allocate memory for tokenizing biograf file lines */
     line   = (char*)  malloc( sizeof(char)  * MAX_LINE );
     backup = (char*)  malloc( sizeof(char)  * MAX_LINE );
     tokens = (char**) malloc( sizeof(char*) * MAX_TOKENS );
     for ( i = 0; i < MAX_TOKENS; i++ )
+    {
         tokens[i] = (char*) malloc( sizeof(char) * MAX_TOKEN_LEN );
-
+    }
 
     /* count number of atoms in the pdb file */
     system->N = 0;
-    while ( !feof( bgf ) )
-    {
-        line[0] = 0;
-        fgets( line, MAX_LINE, bgf );
+    line[0] = 0;
 
+    while ( fgets( line, MAX_LINE, bgf ) )
+    {
         tokens[0][0] = 0;
         token_cnt = Tokenize( line, &tokens );
 
         if ( !strcmp( tokens[0], "ATOM" ) || !strcmp( tokens[0], "HETATM" ) )
             (system->N)++;
+
+        line[0] = 0;
     }
-    //fprintf( stderr, "system->N: %d\n", system->N );
+    if ( ferror ( bgf ) )
+    {
+        return FAILURE;
+    }
+#if defined(DEBUG_FOCUS)
+    fprintf( stderr, "system->N: %d\n", system->N );
+#endif
     fclose( bgf );
 
-
     /* memory allocations for atoms, atom maps, bond restrictions */
     system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) );
 
     workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) );
     for ( i = 0; i < MAX_ATOM_ID; ++i )
+    {
         workspace->map_serials[i] = -1;
+    }
 
     workspace->orig_id = (int*) calloc( system->N, sizeof(int) );
     workspace->restricted  = (int*) calloc( system->N, sizeof(int) );
     workspace->restricted_list = (int**) calloc( system->N, sizeof(int*) );
     for ( i = 0; i < system->N; ++i )
+    {
         workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) );
-
+    }
 
     /* start reading and processing bgf file */
     if ( (bgf = fopen( bgf_file, "r" )) == NULL )
     {
         fprintf( stderr, "Error opening the bgf file!\n" );
-        exit( FILE_NOT_FOUND_ERR );
+        exit( FILE_NOT_FOUND );
     }
     atom_cnt = 0;
     token_cnt = 0;
 
-    while ( !feof( bgf ) )
+    while ( fgets( line, MAX_LINE, bgf ) )
     {
-        /* clear previous input line */
-        line[0] = 0;
-        for ( i = 0; i < token_cnt; ++i )
-            tokens[i][0] = 0;
-
         /* read new line and tokenize it */
-        fgets( line, MAX_LINE, bgf );
         strncpy( backup, line, MAX_LINE - 1 );
         token_cnt = Tokenize( line, &tokens );
 
@@ -562,7 +689,6 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control,
                 charge[8] = 0;
             }
 
-
             /* add to mapping */
             bgf_serial = strtod( &serial[0], &endptr );
             Check_Input_Range( bgf_serial, 0, MAX_ATOM_ID, "Invalid bgf serial" );
@@ -570,13 +696,11 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control,
             workspace->orig_id[ atom_cnt ] = bgf_serial;
             // fprintf( stderr, "map %d --> %d\n", bgf_serial, atom_cnt );
 
-
             /* copy atomic positions */
             system->atoms[atom_cnt].x[0] = strtod( &s_x[0], &endptr );
             system->atoms[atom_cnt].x[1] = strtod( &s_y[0], &endptr );
             system->atoms[atom_cnt].x[2] = strtod( &s_z[0], &endptr );
 
-
             /* atom name and type */
             strcpy( system->atoms[atom_cnt].name, atom_name );
             Trim_Spaces( element );
@@ -605,7 +729,7 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control,
                     &s_gamma[0] );
 
             /* Compute full volume tensor from the angles */
-            Init_Box_From_CRYST( atof(s_a),  atof(s_b), atof(s_c),
+            Setup_Box( atof(s_a),  atof(s_b), atof(s_c),
                                  atof(s_alpha), atof(s_beta), atof(s_gamma),
                                  &(system->box) );
         }
@@ -617,19 +741,37 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control,
 
             /* read bond restrictions */
             if ( is_Valid_Serial( workspace, bgf_serial = atoi(tokens[1]) ) )
+            {
                 ratom = workspace->map_serials[ bgf_serial ];
+            }
 
             workspace->restricted[ ratom ] = token_cnt - 2;
             for ( i = 2; i < token_cnt; ++i )
+            {
                 if ( is_Valid_Serial( workspace, bgf_serial = atoi(tokens[i]) ) )
+                {
                     workspace->restricted_list[ ratom ][ i - 2 ] =
                         workspace->map_serials[ bgf_serial ];
+                }
+            }
 
             /* fprintf( stderr, "restriction on %d:", ratom );
             for( i = 0; i < workspace->restricted[ ratom ]; ++i )
              fprintf( stderr, "  %d", workspace->restricted_list[ratom][i] );
              fprintf( stderr, "\n" ); */
         }
+
+        /* clear previous input line */
+        line[0] = 0;
+
+        for ( i = 0; i < token_cnt; ++i )
+        {
+            tokens[i][0] = 0;
+        }
+    }
+    if ( ferror ( bgf ) )
+    {
+        return FAILURE;
     }
 
     fclose( bgf );
@@ -638,5 +780,5 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control,
     fprintf( stderr, "bgf file read\n" );
 #endif
 
-    return 1;
+    return SUCCESS;
 }
diff --git a/sPuReMD/src/pdb_tools.h b/sPuReMD/src/geo_tools.h
similarity index 87%
rename from sPuReMD/src/pdb_tools.h
rename to sPuReMD/src/geo_tools.h
index c3afab9f601a666f54b00c99c80502bbecb64c41..4c44e3081e6105947b610916e95210e123b4a7d9 100644
--- a/sPuReMD/src/pdb_tools.h
+++ b/sPuReMD/src/geo_tools.h
@@ -19,13 +19,20 @@
   <http://www.gnu.org/licenses/>.
   ----------------------------------------------------------------------*/
 
-#ifndef __PDB_TOOLS_H_
-#define __PDB_TOOLS_H_
+#ifndef __GEO_TOOLS_H_
+#define __GEO_TOOLS_H_
 
 #include "mytypes.h"
 
-/*
-PDB format :
+// CUSTOM_BOXGEO: BOXGEO box_x box_y box_z  angle1 angle2 angle3
+#define CUSTOM_BOXGEO_FORMAT " %s %lf %lf %lf %lf %lf %lf"
+// CUSTOM ATOM: serial element name x y z
+#define CUSTOM_ATOM_FORMAT " %d %s %s %lf %lf %lf"
+
+char Read_Geo( char*, reax_system*, control_params*,
+        simulation_data*, static_storage* );
+
+/* PDB format :
 http://www.rcsb.org/pdb/file_formats/pdb/pdbguide2.2/guide2.2_frame.html
 
 #define PDB_ATOM_FORMAT   "%6s%5d%4s%c%4s%c%4d%c%8s%8s%8s%6s%6s%4s%2s%2s\n"
@@ -95,24 +102,28 @@ COLUMNS       DATA TYPE       FIELD         DEFINITION
 67 - 70      Integer         z             Z value
 */
 
-//#define PDB_ATOM_FORMAT "ATOM  %4d%4s%c%3s%c%4d%c%8.3f%8.3f%8.3f%6.2f%6.2f%-4s%2s%2s\n"
+//#define PDB_ATOM_FORMAT
+//"ATOM  %4d%4s%c%3s%c%4d%c%8.3f%8.3f%8.3f%6.2f%6.2f%-4s%2s%2s\n"
 
 #define PDB_ATOM_FORMAT   "%6s%5d%4s%c%4s%c%4d%c%8s%8s%8s%6s%6s%4s%2s%2s\n"
+#define PDB_ATOM_FORMAT_LENGTH 71
 #define PDB_HETATM_FORMAT "%6s%5d%4s%c%4s%c%4d%c%8s%8s%8s%6s%6s%2s%2s\n"
 #define PDB_CONECT_FORMAT "%6s%5d%5d%5d%5d%5d\n"
 #define PDB_CRYST1_FORMAT "%6s%9s%9s%9s%7s%7s%7s%11s%4s\n"
 
 #define PDB_ATOM_FORMAT_O "%6s%5d %4s%c%3s %c%4d%c   %8.3f%8.3f%8.3f%6.2f%6.2f      %-4s%2s%2s\n"
+#define PDB_ATOM_FORMAT_O_LENGTH 81
 #define PDB_CRYST1_FORMAT_O "%6s%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f%11s%4d\n"
 
 #define BGF_CRYSTX_FORMAT "%8s%11s%11s%11s%11s%11s%11s"
 
 char Read_PDB( char*, reax_system*, control_params*,
-               simulation_data*, static_storage* );
+        simulation_data*, static_storage* );
+
 char Read_BGF( char*, reax_system*, control_params*,
-               simulation_data*, static_storage* );
+        simulation_data*, static_storage* );
 
-char Write_PDB( reax_system*, control_params*, simulation_data*,
-                static_storage*, list*, output_controls* );
+char Write_PDB( reax_system*, list*, simulation_data*,
+        control_params*, static_storage*, output_controls* );
 
 #endif
diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c
index b6b5997db4000d3d67155189090b4be74e9318f2..92a625e6c7882dd2e032524edbedfaa3ce201d05 100644
--- a/sPuReMD/src/init_md.c
+++ b/sPuReMD/src/init_md.c
@@ -217,6 +217,47 @@ void Init_Simulation_Data( reax_system *system, control_params *control,
 }
 
 
+/* Initialize Taper params */
+void Init_Taper( control_params *control )
+{
+    real d1, d7;
+    real swa, swa2, swa3;
+    real swb, swb2, swb3;
+
+    swa = control->r_low;
+    swb = control->r_cut;
+
+    if ( fabs( swa ) > 0.01 )
+        fprintf( stderr, "Warning: non-zero value for lower Taper-radius cutoff\n" );
+
+    if ( swb < 0 )
+    {
+        fprintf( stderr, "Negative value for upper Taper-radius cutoff\n" );
+        exit( INVALID_INPUT );
+    }
+    else if ( swb < 5 )
+        fprintf( stderr, "Warning: low value for upper Taper-radius cutoff:%f\n",
+                 swb );
+
+    d1 = swb - swa;
+    d7 = POW( d1, 7.0 );
+    swa2 = SQR( swa );
+    swa3 = CUBE( swa );
+    swb2 = SQR( swb );
+    swb3 = CUBE( swb );
+
+    control->Tap7 =  20.0 / d7;
+    control->Tap6 = -70.0 * (swa + swb) / d7;
+    control->Tap5 =  84.0 * (swa2 + 3.0 * swa * swb + swb2) / d7;
+    control->Tap4 = -35.0 * (swa3 + 9.0 * swa2 * swb + 9.0 * swa * swb2 + swb3 ) / d7;
+    control->Tap3 = 140.0 * (swa3 * swb + 3.0 * swa2 * swb2 + swa * swb3 ) / d7;
+    control->Tap2 = -210.0 * (swa3 * swb2 + swa2 * swb3) / d7;
+    control->Tap1 = 140.0 * swa3 * swb3 / d7;
+    control->Tap0 = (-35.0 * swa3 * swb2 * swb2 + 21.0 * swa2 * swb3 * swb2 +
+                     7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7;
+}
+
+
 void Init_Workspace( reax_system *system, control_params *control,
                      static_storage *workspace )
 {
@@ -347,6 +388,9 @@ void Init_Workspace( reax_system *system, control_params *control,
     workspace->realloc.gcell_atoms = -1;
 
     Reset_Workspace( system, workspace );
+
+    /* Initialize Taper function */
+    Init_Taper( control );
 }
 
 
@@ -361,7 +405,7 @@ void Init_Lists( reax_system *system, control_params *control,
     if ( !Make_List(system->N, num_nbrs, TYP_FAR_NEIGHBOR, (*lists) + FAR_NBRS) )
     {
         fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n");
-        exit( INIT_ERR );
+        exit( CANNOT_INITIALIZE );
     }
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "memory allocated: far_nbrs = %ldMB\n",
@@ -424,7 +468,7 @@ void Init_Lists( reax_system *system, control_params *control,
     if (!Make_List(num_bonds, num_3body, TYP_THREE_BODY, (*lists) + THREE_BODIES))
     {
         fprintf( stderr, "Problem in initializing angles list. Terminating!\n" );
-        exit( INIT_ERR );
+        exit( CANNOT_INITIALIZE );
     }
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "estimated storage - num_3body: %d\n", num_3body );
@@ -435,13 +479,13 @@ void Init_Lists( reax_system *system, control_params *control,
     if (!Make_List( system->N, num_bonds * 8, TYP_DDELTA, (*lists) + DDELTA ))
     {
         fprintf( stderr, "Problem in initializing dDelta list. Terminating!\n" );
-        exit( INIT_ERR );
+        exit( CANNOT_INITIALIZE );
     }
 
     if ( !Make_List( num_bonds, num_bonds * MAX_BONDS * 3, TYP_DBO, (*lists) + DBO ) )
     {
         fprintf( stderr, "Problem in initializing dBO list. Terminating!\n" );
-        exit( INIT_ERR );
+        exit( CANNOT_INITIALIZE );
     }
 #endif
 
@@ -670,7 +714,7 @@ void Init_Out_Controls(reax_system *system, control_params *control,
        out_control->pdb == NULL )
        {
        fprintf( stderr, "FILE OPEN ERROR. TERMINATING..." );
-       exit( CANNOT_OPEN_OUTFILE );
+       exit( CANNOT_OPEN_FILE );
        }*/
 }
 
diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h
index 9d5136efec2dfee42cfc8a0ecf9c6f02b869ccd8..3202c622bb842356ca453284573da12887234391 100644
--- a/sPuReMD/src/mytypes.h
+++ b/sPuReMD/src/mytypes.h
@@ -46,6 +46,11 @@
 //#define REORDER_ATOMS  // turns on nbrgen opt by re-ordering atoms
 //#define LGJ
 
+#define SUCCESS  1
+#define FAILURE  0
+#define TRUE  1
+#define FALSE 0
+
 #define EXP    exp
 #define SQRT   sqrt
 #define POW    pow
@@ -53,6 +58,8 @@
 #define COS    cos
 #define SIN    sin
 #define TAN    tan
+#define FABS   fabs
+#define FMOD   fmod
 
 #define SQR(x)        ((x)*(x))
 #define CUBE(x)       ((x)*(x)*(x))
@@ -61,6 +68,15 @@
 #define MAX( x, y )   (((x) > (y)) ? (x) : (y))
 #define MIN( x, y )   (((x) < (y)) ? (x) : (y))
 
+/* NaN IEEE 754 representation for C99 in math.h
+ * Note: function choice must match REAL typedef below */
+#ifdef NAN
+#define IS_NAN_REAL(a) (isnan(a))
+#else
+#warn "No support for NaN"
+#define NAN_REAL(a) (0)
+#endif
+
 #define PI            3.14159265
 #define C_ele          332.06371
 //#define K_B         503.398008   // kcal/mol/K
@@ -77,11 +93,16 @@
 #define AVOGNR          6.0221367e23
 #define P_CONV          1.0e-24 * AVOGNR * JOULES_to_CAL
 
-#define MAX_STR             100      // MAX STRing length (used for naming)
+#define MAX_STR             1024
+#define MAX_LINE            1024
+#define MAX_TOKENS          1024
+#define MAX_TOKEN_LEN       1024
+
 #define MAX_ATOM_ID         100000
 #define MAX_RESTRICT        15
 #define MAX_MOLECULE_SIZE   20
 #define MAX_ATOM_TYPES      25
+
 #define MAX_GRID            50
 #define MAX_3BODY_PARAM     5
 #define MAX_4BODY_PARAM     5
@@ -95,24 +116,6 @@
 #define MAX_ITR             10
 #define RESTART             50
 
-#define FILE_NOT_FOUND_ERR    10
-#define UNKNOWN_ATOM_TYPE_ERR 11
-#define CANNOT_OPEN_OUTFILE   12
-#define INIT_ERR              13
-#define INSUFFICIENT_SPACE    14
-#define UNKNOWN_OPTION        15
-#define INVALID_INPUT         16
-#define NUMERIC_BREAKDOWN     17
-
-#define C_ATOM  0
-#define H_ATOM  1
-#define O_ATOM  2
-#define N_ATOM  3
-#define S_ATOM  4
-#define SI_ATOM 5
-#define GE_ATOM 6
-#define X_ATOM  7
-
 #define ZERO           0.000000000000000e+00
 #define ALMOST_ZERO    1e-10
 #define NEG_INF       -1e10
@@ -134,35 +137,71 @@ typedef int  ivec[3];
 typedef real rtensor[3][3];
 
 /* config params */
-enum ensemble {
-	NVE = 0, NVT = 1, NPT = 2, sNPT = 3, iNPT = 4, ensNR = 5, bNVT = 6
+enum ensemble
+{
+    NVE = 0, NVT = 1, NPT = 2, sNPT = 3, iNPT = 4, ensNR = 5, bNVT = 6,
 };
-enum interaction_list_offets {
-	FAR_NBRS = 0, NEAR_NBRS = 1, THREE_BODIES = 2, BONDS = 3, OLD_BONDS = 4,
-	HBONDS = 5, DBO = 6, DDELTA = 7, LIST_N = 8
+
+enum interaction_list_offets
+{
+    FAR_NBRS = 0, NEAR_NBRS = 1, THREE_BODIES = 2, BONDS = 3, OLD_BONDS = 4,
+    HBONDS = 5, DBO = 6, DDELTA = 7, LIST_N = 8,
 };
-enum interaction_type {
-	TYP_VOID = 0, TYP_THREE_BODY = 1, TYP_BOND = 2, TYP_HBOND = 3, TYP_DBO = 4,
-	TYP_DDELTA = 5, TYP_FAR_NEIGHBOR = 6, TYP_NEAR_NEIGHBOR = 7, TYP_N = 8
+
+enum interaction_type
+{
+    TYP_VOID = 0, TYP_THREE_BODY = 1, TYP_BOND = 2, TYP_HBOND = 3, TYP_DBO = 4,
+    TYP_DDELTA = 5, TYP_FAR_NEIGHBOR = 6, TYP_NEAR_NEIGHBOR = 7, TYP_N = 8,
 };
-enum molecule_type {
-	UNKNOWN = 0, WATER = 1
+
+enum errors
+{
+    FILE_NOT_FOUND = -10, UNKNOWN_ATOM_TYPE = -11,
+    CANNOT_OPEN_FILE = -12, CANNOT_INITIALIZE = -13,
+    INSUFFICIENT_MEMORY = -14, UNKNOWN_OPTION = -15,
+    INVALID_INPUT = -16, INVALID_GEO = -17,
+    NUMERIC_BREAKDOWN = -18,
 };
-enum molecular_analysis_type {
-	NO_ANALYSIS = 0, FRAGMENTS = 1, REACTIONS = 2, NUM_ANALYSIS = 3
+
+enum atoms
+{
+    C_ATOM = 0, H_ATOM = 1, O_ATOM = 2, N_ATOM = 3,
+    S_ATOM = 4, SI_ATOM = 5, GE_ATOM = 6, X_ATOM = 7,
 };
-enum restart_format {
-	WRITE_ASCII = 0, WRITE_BINARY = 1, RF_N = 2
+
+enum molecule_type
+{
+    UNKNOWN = 0, WATER = 1,
 };
-enum geometry {
-	XYZ = 0, PDB = 1, BGF = 2, ASCII_RESTART = 3, BINARY_RESTART = 4, GF_N = 5
+
+enum molecular_analysis_type
+{
+    NO_ANALYSIS = 0, FRAGMENTS = 1, REACTIONS = 2, NUM_ANALYSIS = 3,
 };
-enum solver {
-	GMRES_S = 0, GMRES_H_S = 1, PGMRES_S = 2,
-	PGMRES_J_S = 3, CG_S = 4, PCG_S = 5, SDM_S = 6,
+
+enum restart_format
+{
+    WRITE_ASCII = 0, WRITE_BINARY = 1, RF_N = 2,
 };
-enum pre_comp {
-	DIAG_PC = 0, ICHOLT_PC = 1, ICHOL_PAR_PC = 2, ILU_SUPERLU_MT_PC = 3,
+
+enum geo_formats
+{
+    CUSTOM = 0, PDB = 1, BGF = 2, ASCII_RESTART = 3, BINARY_RESTART = 4, GF_N = 5,
+};
+
+enum solver
+{
+    GMRES_S = 0, GMRES_H_S = 1, CG_S = 2, SDM_S = 3,
+};
+
+enum pre_comp
+{
+    DIAG_PC = 0, ICHOLT_PC = 1, ILU_PAR_PC = 2, ILUT_PAR_PC = 3, ILU_SUPERLU_MT_PC = 4,
+};
+
+enum pre_app
+{
+    NONE_PA = 0, TRI_SOLVE_PA = 1, TRI_SOLVE_LEVEL_SCHED_PA = 2, JACOBI_ITER_PA = 3,
 };
 
 
@@ -357,10 +396,14 @@ typedef struct
 
 typedef struct
 {
-    rvec x, v, f;        /* Position, velocity, force on atom */
-    real q;              /* Charge on the atom */
     int  type;           /* Type of this atom */
-    char name[5];
+    char name[8];
+
+    rvec x; // position
+    rvec v; // velocity
+    rvec f; // force
+
+    real q;              /* Charge on the atom */
 } reax_atom;
 
 
@@ -472,6 +515,7 @@ typedef struct
     unsigned int pre_comp_refactor;
     real pre_comp_droptol;
     unsigned int pre_comp_sweeps;
+    unsigned int pre_app_type;
     unsigned int pre_app_jacobi_iters;
 
     int molec_anal;
@@ -669,21 +713,6 @@ typedef struct
 } bond_data;
 
 
-/* compressed row storage (crs) format
- *   j: column index for corresponding matrix entry
- *   val: matrix entry
- * */
-// TODO: move pointers into below struct (question about addressing performance)
-// Consider: __attribute__((packed)) or similar in structure definition
-//   to avoid having the compiler add padding bytes (manual alignment along word boundary)
-// Warning: padding is implementation (compiler) dependent, so not portable,
-//   and also potentially dangerous
-typedef struct
-{
-    int j;
-    real val;
-} sparse_matrix_entry;
-
 /* compressed row storage (crs) format
  * See, e.g.,
  *   http://netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000
@@ -691,15 +720,15 @@ typedef struct
  *   m: number of nonzeros (NNZ) ALLOCATED
  *   n: number of rows
  *   start: row pointer (last element contains ACTUAL NNZ)
- *   entries: (column index, val) pairs
+ *   j: column index for corresponding matrix entry
+ *   val: matrix entry
  * */
 typedef struct
 {
     int n, m;
     int *start;
-    sparse_matrix_entry *entries;
-//  int *j;
-//  real *val;
+    int *j;
+    real *val;
 } sparse_matrix;
 
 
diff --git a/sPuReMD/src/neighbors.c b/sPuReMD/src/neighbors.c
index 9b04cfa8c4e6f1d1886adc09a4fe68d73a060a45..8d225de100c9f777c058c9f68cc87a02aff6caf2 100644
--- a/sPuReMD/src/neighbors.c
+++ b/sPuReMD/src/neighbors.c
@@ -134,7 +134,7 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control,
         {
             fprintf( stderr, "step%d-ran out of space on far_nbrs: top=%d, max=%d",
                      data->step, num_far, far_nbrs->num_intrs );
-            exit( INSUFFICIENT_SPACE );
+            exit( INSUFFICIENT_MEMORY );
         }
     }
 
diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c
index 710ee56836a7f781deccd15dcdd8e92602c0c6fb..7ba057f8937f957b01998b11a1e0cb7072dab964 100644
--- a/sPuReMD/src/print_utils.c
+++ b/sPuReMD/src/print_utils.c
@@ -21,7 +21,7 @@
 
 #include "print_utils.h"
 #include "list.h"
-#include "pdb_tools.h"
+#include "geo_tools.h"
 #include "system_props.h"
 #include "vector.h"
 
@@ -375,18 +375,6 @@ void Init_Force_Test_Functions( )
 #endif
 
 
-char *Get_Element( reax_system *system, int i )
-{
-    return &( system->reaxprm.sbp[system->atoms[i].type].name[0] );
-}
-
-
-char *Get_Atom_Name( reax_system *system, int i )
-{
-    return &(system->atoms[i].name[0]);
-}
-
-
 /* near nbrs contain both i-j, j-i nbrhood info */
 void Print_Near_Neighbors( reax_system *system, control_params *control,
                            static_storage *workspace, list **lists )
@@ -690,7 +678,7 @@ void Output_Results( reax_system *system, control_params *control,
         out_control->append_traj_frame( system, control, data,
                                         workspace, lists, out_control );
 
-        //Write_PDB( system, control, data, workspace, *lists+BONDS, out_control );
+        //Write_PDB( system, *lists+BONDS, data, control, workspace, out_control );
         // t_elapsed = Get_Timing_Info( t_start );
         // fprintf(stdout, "append_frame took %.6f seconds\n", t_elapsed );
     }
@@ -736,16 +724,16 @@ void Print_Linear_System( reax_system *system, control_params *control,
         for ( j = H->start[i]; j < H->start[i + 1] - 1; ++j )
         {
             fprintf( out, "%6d%6d %24.15e\n",
-                     workspace->orig_id[i], workspace->orig_id[H->entries[j].j],
-                     H->entries[j].val );
+                     workspace->orig_id[i], workspace->orig_id[H->j[j]],
+                     H->val[j] );
 
             fprintf( out, "%6d%6d %24.15e\n",
-                     workspace->orig_id[H->entries[j].j], workspace->orig_id[i],
-                     H->entries[j].val );
+                     workspace->orig_id[H->j[j]], workspace->orig_id[i],
+                     H->val[j] );
         }
         // the diagonal entry
         fprintf( out, "%6d%6d %24.15e\n",
-                 workspace->orig_id[i], workspace->orig_id[i], H->entries[j].val );
+                 workspace->orig_id[i], workspace->orig_id[i], H->val[j] );
     }
 
     fclose( out );
@@ -759,16 +747,16 @@ void Print_Linear_System( reax_system *system, control_params *control,
         for ( j = H->start[i]; j < H->start[i + 1] - 1; ++j )
         {
             fprintf( out, "%6d%6d %24.15e\n",
-                     workspace->orig_id[i], workspace->orig_id[H->entries[j].j],
-                     H->entries[j].val );
+                     workspace->orig_id[i], workspace->orig_id[H->j[j]],
+                     H->val[j] );
 
             fprintf( out, "%6d%6d %24.15e\n",
-                     workspace->orig_id[H->entries[j].j], workspace->orig_id[i],
-                     H->entries[j].val );
+                     workspace->orig_id[H->j[j]], workspace->orig_id[i],
+                     H->val[j] );
         }
         // the diagonal entry
         fprintf( out, "%6d%6d %24.15e\n",
-                 workspace->orig_id[i], workspace->orig_id[i], H->entries[j].val );
+                 workspace->orig_id[i], workspace->orig_id[i], H->val[j] );
     }
 
     fclose( out );
@@ -831,7 +819,7 @@ void Print_Sparse_Matrix( sparse_matrix *A )
     {
         fprintf( stderr, "i:%d  j(val):", i );
         for ( j = A->start[i]; j < A->start[i + 1]; ++j )
-            fprintf( stderr, "%d(%.4f) ", A->entries[j].j, A->entries[j].val );
+            fprintf( stderr, "%d(%.4f) ", A->j[j], A->val[j] );
         fprintf( stderr, "\n" );
     }
 }
@@ -848,7 +836,7 @@ void Print_Sparse_Matrix2( sparse_matrix *A, char *fname )
         {
             //fprintf( f, "%d%d %.15e\n", A->entries[j].j, i, A->entries[j].val );
             //Convert 0-based to 1-based (for Matlab)
-            fprintf( f, "%6d%6d %24.15e\n", i+1, A->entries[j].j+1, A->entries[j].val );
+            fprintf( f, "%6d%6d %24.15e\n", i+1, A->j[j]+1, A->val[j] );
         }
     }
 
diff --git a/sPuReMD/src/system_props.c b/sPuReMD/src/system_props.c
index 52c1d2713ea0b84f8ea729306ba856645d191d2f..fc93a474cf378f1a382d0ae017cf15a9b23eb17a 100644
--- a/sPuReMD/src/system_props.c
+++ b/sPuReMD/src/system_props.c
@@ -20,30 +20,10 @@
   ----------------------------------------------------------------------*/
 
 #include "system_props.h"
-#include "box.h"
+#include "tool_box.h"
 #include "vector.h"
 
 
-real Get_Time( )
-{
-    struct timeval tim;
-
-    gettimeofday(&tim, NULL );
-    return ( tim.tv_sec + (tim.tv_usec / 1000000.0) );
-}
-
-
-real Get_Timing_Info( real t_start )
-{
-    struct timeval tim;
-    real t_end;
-
-    gettimeofday(&tim, NULL );
-    t_end = tim.tv_sec + (tim.tv_usec / 1000000.0);
-    return (t_end - t_start);
-}
-
-
 void Temperature_Control( control_params *control, simulation_data *data,
                           output_controls *out_control )
 {
diff --git a/sPuReMD/src/testmd.c b/sPuReMD/src/testmd.c
index f6e9003c3e81cfec46f2292d35a08b398d549bb4..5328a5bc2f65a4d4d398b08e8864288b3706c705 100644
--- a/sPuReMD/src/testmd.c
+++ b/sPuReMD/src/testmd.c
@@ -21,24 +21,25 @@
 
 #include "mytypes.h"
 #include "analyze.h"
-#include "box.h"
+#include "control.h"
+#include "ffield.h"
 #include "forces.h"
 #include "init_md.h"
-#include "integrate.h"
 #include "neighbors.h"
-#include "param.h"
-#include "pdb_tools.h"
+#include "geo_tools.h"
 #include "print_utils.h"
 #include "reset_utils.h"
 #include "restart.h"
 #include "system_props.h"
-#include "traj.h"
 #include "vector.h"
 
 
-void Post_Evolve( reax_system* system, control_params* control,
-                  simulation_data* data, static_storage* workspace,
-                  list** lists, output_controls *out_control )
+static void Post_Evolve( reax_system * const system,
+        control_params * const control,
+        simulation_data * const data,
+        static_storage * const workspace,
+        list ** const lists,
+        output_controls * const out_control )
 {
     int i;
     rvec diff, cross;
@@ -77,22 +78,26 @@ void Post_Evolve( reax_system* system, control_params* control,
 }
 
 
-void Read_System( char *geof, char *ff, char *ctrlf,
-                  reax_system *system, control_params *control,
-                  simulation_data *data, static_storage *workspace,
-                  output_controls *out_control )
+void static Read_System( char * const geo_file,
+        char * const ffield_file,
+        char * const control_file,
+        reax_system * const system,
+        control_params * const control,
+        simulation_data * const data,
+        static_storage * const workspace,
+        output_controls * const out_control )
 {
     FILE *ffield, *ctrl;
 
-    if ( (ffield = fopen( ff, "r" )) == NULL )
+    if ( (ffield = fopen( ffield_file, "r" )) == NULL )
     {
         fprintf( stderr, "Error opening the ffield file!\n" );
-        exit( FILE_NOT_FOUND_ERR );
+        exit( FILE_NOT_FOUND );
     }
-    if ( (ctrl = fopen( ctrlf, "r" )) == NULL )
+    if ( (ctrl = fopen( control_file, "r" )) == NULL )
     {
         fprintf( stderr, "Error opening the ffield file!\n" );
-        exit( FILE_NOT_FOUND_ERR );
+        exit( FILE_NOT_FOUND );
     }
 
     /* ffield file */
@@ -102,34 +107,37 @@ void Read_System( char *geof, char *ff, char *ctrlf,
     Read_Control_File( ctrl, system, control, out_control );
 
     /* geo file */
-    if ( control->geo_format == XYZ )
+    if ( control->geo_format == CUSTOM )
     {
-        fprintf( stderr, "xyz input is not implemented yet\n" );
-        exit(1);
+        Read_Geo( geo_file, system, control, data, workspace );
     }
     else if ( control->geo_format == PDB )
-        Read_PDB( geof, system, control, data, workspace );
+    {
+        Read_PDB( geo_file, system, control, data, workspace );
+    }
     else if ( control->geo_format == BGF )
-        Read_BGF( geof, system, control, data, workspace );
+    {
+        Read_BGF( geo_file, system, control, data, workspace );
+    }
     else if ( control->geo_format == ASCII_RESTART )
     {
-        Read_ASCII_Restart( geof, system, control, data, workspace );
+        Read_ASCII_Restart( geo_file, system, control, data, workspace );
         control->restart = 1;
     }
     else if ( control->geo_format == BINARY_RESTART )
     {
-        Read_Binary_Restart( geof, system, control, data, workspace );
+        Read_Binary_Restart( geo_file, system, control, data, workspace );
         control->restart = 1;
     }
     else
     {
         fprintf( stderr, "unknown geo file format. terminating!\n" );
-        exit(1);
+        exit( INVALID_GEO );
     }
 
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "input files have been read...\n" );
-    Print_Box_Information( &(system->box), stderr );
+    Print_Box( &(system->box), stderr );
 #endif
 }
 
@@ -198,13 +206,12 @@ int main(int argc, char* argv[])
     if ( out_control.write_steps > 0 )
     {
         fclose( out_control.trj );
-        Write_PDB( &system, &control, &data, &workspace,
-                   &(lists[BONDS]), &out_control );
+        Write_PDB( &system, &(lists[BONDS]), &data, &control, &workspace, &out_control );
     }
 
     data.timing.end = Get_Time( );
     data.timing.elapsed = Get_Timing_Info( data.timing.start );
     fprintf( out_control.log, "total: %.2f secs\n", data.timing.elapsed );
 
-    return 0;
+    return SUCCESS;
 }
diff --git a/sPuReMD/src/three_body_interactions.c b/sPuReMD/src/three_body_interactions.c
index 9aef73d4692e50b6a75de4897108b43bcd0e9a5b..d12ab80f30a38468e1819c4963e933e21c8adf48 100644
--- a/sPuReMD/src/three_body_interactions.c
+++ b/sPuReMD/src/three_body_interactions.c
@@ -571,7 +571,7 @@ void Three_Body_Interactions( reax_system *system, control_params *control,
         {
             fprintf( stderr, "step%d-ran out of space on angle_list: top=%d, max=%d",
                      data->step, num_thb_intrs, thb_intrs->num_intrs );
-            exit( INSUFFICIENT_SPACE );
+            exit( INSUFFICIENT_MEMORY );
         }
     }
 
diff --git a/sPuReMD/src/tool_box.c b/sPuReMD/src/tool_box.c
new file mode 100644
index 0000000000000000000000000000000000000000..776bb565f9cb21d7f9ef38259f42bc13fca4fb91
--- /dev/null
+++ b/sPuReMD/src/tool_box.c
@@ -0,0 +1,468 @@
+/*----------------------------------------------------------------------
+  SerialReax - Reax Force Field Simulator
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, haktulga@cs.purdue.edu
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#include "mytypes.h"
+#include "tool_box.h"
+
+#include <ctype.h>
+
+
+/************** taken from box.c **************/
+void Transform( rvec x1, simulation_box *box, char flag, rvec x2 )
+{
+    int i, j;
+    real tmp;
+
+    //  printf(">x1: (%lf, %lf, %lf)\n",x1[0],x1[1],x1[2]);
+
+    if (flag > 0)
+    {
+        for (i = 0; i < 3; i++)
+        {
+            tmp = 0.0;
+            for (j = 0; j < 3; j++)
+                tmp += box->trans[i][j] * x1[j];
+            x2[i] = tmp;
+        }
+    }
+    else
+    {
+        for (i = 0; i < 3; i++)
+        {
+            tmp = 0.0;
+            for (j = 0; j < 3; j++)
+                tmp += box->trans_inv[i][j] * x1[j];
+            x2[i] = tmp;
+        }
+    }
+    //  printf(">x2: (%lf, %lf, %lf)\n", x2[0], x2[1], x2[2]);
+}
+
+
+void Transform_to_UnitBox( rvec x1, simulation_box *box, char flag, rvec x2 )
+{
+    Transform( x1, box, flag, x2 );
+
+    x2[0] /= box->box_norms[0];
+    x2[1] /= box->box_norms[1];
+    x2[2] /= box->box_norms[2];
+}
+
+
+/* determine whether point p is inside the box */
+void Fit_to_Periodic_Box( simulation_box *box, rvec *p )
+{
+    int i;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        //TODO: verify box boundary coordinates -- assuming orthogonal box pinned at origin
+        if ( (*p)[i] < 0. )
+        {
+            /* handle lower coords */
+            while ( (*p)[i] < 0. )
+                (*p)[i] += box->box_norms[i];
+        }
+        else if ( (*p)[i] >= box->box_norms[i] )
+        {
+            /* handle higher coords */
+            while ( (*p)[i] >= box->box_norms[i] )
+                (*p)[i] -= box->box_norms[i];
+        }
+//        if ( (*p)[i] < box->min[i] )
+//        {
+//            /* handle lower coords */
+//            while ( (*p)[i] < box->min[i] )
+//                (*p)[i] += box->box_norms[i];
+//        }
+//        else if ( (*p)[i] >= box->max[i] )
+//        {
+//            /* handle higher coords */
+//            while ( (*p)[i] >= box->max[i] )
+//                (*p)[i] -= box->box_norms[i];
+//        }
+    }
+}
+
+
+/* determine the touch point, tp, of a box to
+   its neighbor denoted by the relative coordinate rl */
+/*
+inline void Box_Touch_Point( simulation_box *box, ivec rl, rvec tp )
+{
+    int d;
+
+    for ( d = 0; d < 3; ++d )
+        if ( rl[d] == -1 )
+            tp[d] = box->min[d];
+        else if ( rl[d] == 0 )
+            tp[d] = NEG_INF - 1.;
+        else
+            tp[d] = box->max[d];
+}
+*/
+
+
+/* determine whether point p is inside the box */
+/* assumes orthogonal box */
+/*
+inline int is_Inside_Box( simulation_box *box, rvec p )
+{
+    if ( p[0] < box->min[0] || p[0] >= box->max[0] ||
+            p[1] < box->min[1] || p[1] >= box->max[1] ||
+            p[2] < box->min[2] || p[2] >= box->max[2] )
+        return FALSE;
+
+    return TRUE;
+}
+*/
+
+
+/*
+inline int iown_midpoint( simulation_box *box, rvec p1, rvec p2 )
+{
+    rvec midp;
+
+    midp[0] = (p1[0] + p2[0]) / 2;
+    midp[1] = (p1[1] + p2[1]) / 2;
+    midp[2] = (p1[2] + p2[2]) / 2;
+
+    if ( midp[0] < box->min[0] || midp[0] >= box->max[0] ||
+            midp[1] < box->min[1] || midp[1] >= box->max[1] ||
+            midp[2] < box->min[2] || midp[2] >= box->max[2] )
+        return FALSE;
+
+    return TRUE;
+}
+*/
+
+
+/**************** from grid.c ****************/
+/* finds the closest point of grid cell cj to ci.
+   no need to consider periodic boundary conditions as in the serial case
+   because the box of a process is not periodic in itself */
+/*
+inline void GridCell_Closest_Point( grid_cell *gci, grid_cell *gcj,
+        ivec ci, ivec cj, rvec cp )
+{
+    int  d;
+
+    for ( d = 0; d < 3; d++ )
+        if ( cj[d] > ci[d] )
+            cp[d] = gcj->min[d];
+        else if ( cj[d] == ci[d] )
+            cp[d] = NEG_INF - 1.;
+        else
+            cp[d] = gcj->max[d];
+}
+
+
+inline void GridCell_to_Box_Points( grid_cell *gc, ivec rl, rvec cp, rvec fp )
+{
+    int d;
+
+    for ( d = 0; d < 3; ++d )
+        if ( rl[d] == -1 )
+        {
+            cp[d] = gc->min[d];
+            fp[d] = gc->max[d];
+        }
+        else if ( rl[d] == 0 )
+        {
+            cp[d] = fp[d] = NEG_INF - 1.;
+        }
+        else
+        {
+            cp[d] = gc->max[d];
+            fp[d] = gc->min[d];
+        }
+}
+
+
+inline real DistSqr_between_Special_Points( rvec sp1, rvec sp2 )
+{
+    int  i;
+    real d_sqr = 0;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        if ( sp1[i] > NEG_INF && sp2[i] > NEG_INF )
+        {
+            d_sqr += SQR( sp1[i] - sp2[i] );
+        }
+    }
+
+    return d_sqr;
+}
+
+
+inline real DistSqr_to_Special_Point( rvec cp, rvec x )
+{
+    int  i;
+    real d_sqr = 0;
+
+    for ( i = 0; i < 3; ++i )
+    {
+        if ( cp[i] > NEG_INF )
+        {
+            d_sqr += SQR( cp[i] - x[i] );
+        }
+    }
+
+    return d_sqr;
+}
+
+
+inline int Relative_Coord_Encoding( ivec c )
+{
+    return 9 * (c[0] + 1) + 3 * (c[1] + 1) + (c[2] + 1);
+}
+*/
+
+
+/************** from geo_tools.c *****************/
+void Make_Point( real x, real y, real z, rvec* p )
+{
+    (*p)[0] = x;
+    (*p)[1] = y;
+    (*p)[2] = z;
+}
+
+
+int is_Valid_Serial( static_storage *workspace, int serial )
+{
+    if( workspace->map_serials[ serial ] < 0 )
+    {
+        fprintf( stderr, "CONECT line includes invalid pdb serial number %d.\n", serial );
+        fprintf( stderr, "Please correct the input file.Terminating...\n" );
+        exit( INVALID_INPUT );
+    }
+
+    return TRUE;
+}
+
+
+int Check_Input_Range( int val, int lo, int hi, char *message )
+{
+    if ( val < lo || val > hi )
+    {
+        fprintf( stderr, "%s\nInput %d - Out of range %d-%d. Terminating...\n",
+                 message, val, lo, hi );
+        exit( INVALID_INPUT );
+    }
+
+    return SUCCESS;
+}
+
+
+void Trim_Spaces( char *element )
+{
+    int i, j;
+
+    for ( i = 0; element[i] == ' '; ++i ); // skip initial space chars
+
+    for ( j = i; j < (int)(strlen(element)) && element[j] != ' '; ++j )
+    {
+        element[j - i] = toupper( element[j] ); // make uppercase, offset to 0
+    }
+    element[j - i] = 0; // finalize the string
+}
+
+
+/************ from system_props.c *************/
+real Get_Time( )
+{
+    gettimeofday(&tim, NULL );
+    return ( tim.tv_sec + (tim.tv_usec / 1000000.0) );
+}
+
+
+real Get_Timing_Info( real t_start )
+{
+    gettimeofday(&tim, NULL );
+    t_end = tim.tv_sec + (tim.tv_usec / 1000000.0);
+    return (t_end - t_start);
+}
+
+
+void Update_Timing_Info( real *t_start, real *timing )
+{
+    gettimeofday(&tim, NULL );
+    t_end = tim.tv_sec + (tim.tv_usec / 1000000.0);
+    *timing += (t_end - *t_start);
+    *t_start = t_end;
+}
+
+
+/*********** from io_tools.c **************/
+int Get_Atom_Type( reax_interaction *reax_param, char *s )
+{
+    int i;
+
+    for ( i = 0; i < reax_param->num_atom_types; ++i )
+    {
+        if ( !strcmp( reax_param->sbp[i].name, s ) )
+        {
+            return i;
+        }
+    }
+
+    fprintf( stderr, "Unknown atom type %s. Terminating...\n", s );
+    exit( UNKNOWN_ATOM_TYPE );
+
+    return FAILURE;
+}
+
+
+char *Get_Element( reax_system *system, int i )
+{
+    return &( system->reaxprm.sbp[system->atoms[i].type].name[0] );
+}
+
+
+char *Get_Atom_Name( reax_system *system, int i )
+{
+    return &(system->atoms[i].name[0]);
+}
+
+
+int Allocate_Tokenizer_Space( char **line, char **backup, char ***tokens )
+{
+    int i;
+
+    if ( (*line = (char*) malloc( sizeof(char) * MAX_LINE )) == NULL )
+    {
+        return FAILURE;
+    }
+
+    if ( (*backup = (char*) malloc( sizeof(char) * MAX_LINE )) == NULL )
+    {
+        return FAILURE;
+    }
+
+    if ( (*tokens = (char**) malloc( sizeof(char*) * MAX_TOKENS )) == NULL )
+    {
+        return FAILURE;
+    }
+
+    for ( i = 0; i < MAX_TOKENS; i++ )
+    {
+        if ( ((*tokens)[i] = (char*) malloc(sizeof(char) * MAX_TOKEN_LEN)) == NULL )
+        {
+            return FAILURE;
+        }
+    }
+
+    return SUCCESS;
+}
+
+
+int Tokenize( char* s, char*** tok )
+{
+    char test[MAX_LINE];
+    char *sep = "\t \n!=";
+    char *word;
+    int count = 0;
+
+    strncpy( test, s, MAX_LINE );
+
+    for ( word = strtok(test, sep); word; word = strtok(NULL, sep) )
+    {
+        strncpy( (*tok)[count], word, MAX_LINE );
+        count++;
+    }
+
+    return count;
+}
+
+
+/***************** taken from lammps ************************/
+/* safe malloc */
+void *smalloc( long n, char *name )
+{
+    void *ptr;
+
+    if ( n <= 0 )
+    {
+        fprintf( stderr, "WARNING: trying to allocate %ld bytes for array %s. ",
+                 n, name );
+        fprintf( stderr, "returning NULL.\n" );
+        return NULL;
+    }
+
+    ptr = malloc( n );
+    if ( ptr == NULL )
+    {
+        fprintf( stderr, "ERROR: failed to allocate %ld bytes for array %s",
+                 n, name );
+        exit( INSUFFICIENT_MEMORY );
+    }
+
+    return ptr;
+}
+
+
+/* safe calloc */
+void *scalloc( int n, int size, char *name )
+{
+    void *ptr;
+
+    if ( n <= 0 )
+    {
+        fprintf( stderr, "WARNING: trying to allocate %d elements for array %s. ",
+                 n, name );
+        fprintf( stderr, "returning NULL.\n" );
+        return NULL;
+    }
+
+    if ( size <= 0 )
+    {
+        fprintf( stderr, "WARNING: elements size for array %s is %d. ",
+                 name, size );
+        fprintf( stderr, "returning NULL.\n" );
+        return NULL;
+    }
+
+    ptr = calloc( n, size );
+    if ( ptr == NULL )
+    {
+        fprintf( stderr, "ERROR: failed to allocate %d bytes for array %s",
+                 n * size, name );
+        exit( INSUFFICIENT_MEMORY );
+    }
+
+    return ptr;
+}
+
+
+/* safe free */
+void sfree( void *ptr, char *name )
+{
+    if ( ptr == NULL )
+    {
+        fprintf( stderr, "WARNING: trying to free the already NULL pointer %s!\n",
+                 name );
+        return;
+    }
+
+    free( ptr );
+    ptr = NULL;
+}
diff --git a/sPuReMD/src/tool_box.h b/sPuReMD/src/tool_box.h
new file mode 100644
index 0000000000000000000000000000000000000000..5712152d9f399fd2c92152a64a2e82d2aacc70dc
--- /dev/null
+++ b/sPuReMD/src/tool_box.h
@@ -0,0 +1,70 @@
+/*----------------------------------------------------------------------
+  SerialReax - Reax Force Field Simulator
+
+  Copyright (2010) Purdue University
+  Hasan Metin Aktulga, haktulga@cs.purdue.edu
+  Joseph Fogarty, jcfogart@mail.usf.edu
+  Sagar Pandit, pandit@usf.edu
+  Ananth Y Grama, ayg@cs.purdue.edu
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  See the GNU General Public License for more details:
+  <http://www.gnu.org/licenses/>.
+  ----------------------------------------------------------------------*/
+
+#ifndef __TOOL_BOX_H_
+#define __TOOL_BOX_H_
+
+#include "mytypes.h"
+
+struct timeval tim;
+real t_end;
+
+/* from box.h */
+void Transform( rvec, simulation_box*, char, rvec );
+void Transform_to_UnitBox( rvec, simulation_box*, char, rvec );
+void Fit_to_Periodic_Box( simulation_box*, rvec* );
+//void Box_Touch_Point( simulation_box*, ivec, rvec );
+//int  is_Inside_Box( simulation_box*, rvec );
+//int  iown_midpoint( simulation_box*, rvec, rvec );
+
+/* from grid.h */
+/*
+void GridCell_Closest_Point( grid_cell*, grid_cell*, ivec, ivec, rvec );
+void GridCell_to_Box_Points( grid_cell*, ivec, rvec, rvec );
+real DistSqr_between_Special_Points( rvec, rvec );
+real DistSqr_to_Special_Point( rvec, rvec );
+int Relative_Coord_Encoding( ivec );
+*/
+
+/* from geo_tools.h */
+void Make_Point( real, real, real, rvec* );
+int is_Valid_Serial( static_storage*, int );
+int Check_Input_Range( int, int, int, char* );
+void Trim_Spaces( char* );
+
+/* from system_props.h */
+real Get_Time( );
+real Get_Timing_Info( real );
+void Update_Timing_Info( real*, real* );
+
+/* from io_tools.h */
+int Get_Atom_Type( reax_interaction*, char* );
+char *Get_Element( reax_system*, int );
+char *Get_Atom_Name( reax_system*, int );
+int Allocate_Tokenizer_Space( char**, char**, char*** );
+int Tokenize( char*, char*** );
+
+/* from lammps */
+void *smalloc( long, char* );
+void *scalloc( int, int, char* );
+void sfree( void*, char* );
+
+#endif
diff --git a/tools/run_sim.py b/tools/run_sim.py
index 39342fdb03a3f71591453199b561fbc9ca9ba7c8..bcea5dbadefcf0484b189109b438c6cef4df679b 100644
--- a/tools/run_sim.py
+++ b/tools/run_sim.py
@@ -1,5 +1,6 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
+import argparse
 from fileinput import input
 from itertools import product
 from re import sub
@@ -12,7 +13,7 @@ from time import time
 
 class TestCase():
     def __init__(self, geo_file, ffield_file, control_file, params={}, result_header_fmt='',
-            result_header='', result_body_fmt='', result_file='results.txt'):
+            result_header='', result_body_fmt='', result_file='results.txt', geo_format='1'):
         self.__geo_file = geo_file
         self.__ffield_file = ffield_file
         self.__control_file = control_file
@@ -47,33 +48,35 @@ class TestCase():
                     r'(?P<key>geo_format\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \
         }
 
+        self.__params['geo_format'] = geo_format
+
     def _setup(self, param, temp_file):
         fp = open(self.__control_file, 'r')
-	lines = fp.read()
-	fp.close()
+        lines = fp.read()
+        fp.close()
         for k in self.__control_res.keys():
             try:
                 lines = self.__control_res[k](lines, param[k])
             except KeyError:
                 pass
-        fp_temp = open(temp_file, 'w', 0)
-	fp_temp.write(lines)
-	fp_temp.close()
+        fp_temp = open(temp_file, 'w')
+        fp_temp.write(lines)
+        fp_temp.close()
 
     def run(self, bin_file='sPuReMD/bin/spuremd'):
         base_dir = getcwd()
-	bin_path = path.join(base_dir, bin_file)
+        bin_path = path.join(base_dir, bin_file)
         env = dict(environ)
 
         write_header = True
         if path.exists(self.__result_file):
             write_header = False
-        fout = open(self.__result_file, 'a', 0)
+        fout = open(self.__result_file, 'a')
         if write_header:
             fout.write(self.__result_header_fmt.format(*self.__result_header))
 
         temp_dir = mkdtemp()
-	temp_file = path.join(temp_dir, path.basename(self.__control_file))
+        temp_file = path.join(temp_dir, path.basename(self.__control_file))
 
         for p in product(*[self.__params[k] for k in self.__param_names]):
             param_dict = dict((k, v) for (k, v) in zip(self.__param_names, p))
@@ -95,7 +98,7 @@ class TestCase():
 
         fout.close()
         remove(temp_file)
-	rmdir(temp_dir)
+        rmdir(temp_dir)
 
     def _process_result(self, fout, time, param):
         qeq = 0.
@@ -129,69 +132,122 @@ class TestCase():
 
 
 if __name__ == '__main__':
+    DATA = [ \
+            'bilayer_56800', 'bilayer_340800', \
+            'dna_19733', \
+            'petn_48256', \
+            'silica_6000', 'silica_72000', 'silica_300000', \
+            'water_6540', 'water_78480', 'water_327000', \
+            ]
+
+    parser = argparse.ArgumentParser(description='Run molecular dynamics simulations on specified data sets.')
+    parser.add_argument('data', nargs='+',
+            choices=DATA, help='Data sets for which to run simulations.')
+
+    # parse args and take action
+    args = parser.parse_args()
+
     base_dir = getcwd()
     control_dir = path.join(base_dir, 'environ')
     data_dir = path.join(base_dir, 'data/benchmarks')
 
     header_fmt_str = '{:20}|{:5}|{:5}|{:5}|{:10}|{:10}|{:10}|{:10}|{:10}|{:10}|{:10}\n'
     header_str = ['Data Set', 'Steps', 'Q Tol', 'Pre T', 'Pre Comp',
-		    'Pre App', 'Iters', 'SpMV', 'QEq', 'Threads', 'Time (s)']
+            'Pre App', 'Iters', 'SpMV', 'QEq', 'Threads', 'Time (s)']
     body_fmt_str = '{:20} {:5} {:5} {:5} {:10.6f} {:10.6f} {:10.6f} {:10.6f} {:10.6f} {:10} {:10.6f}\n'
 
     params = {
             'ensemble_type': ['0'],
-            'nsteps': ['20', '100', '500', '1000'],
+            'nsteps': ['20'],
+#            'nsteps': ['20', '100', '500', '1000'],
             'qeq_solver_type': ['0'],
-            'qeq_solver_q_err': ['1e-6', '1e-10'],
+            'qeq_solver_q_err': ['1e-6'],
+#            'qeq_solver_q_err': ['1e-6', '1e-10'],
 #            'qeq_solver_q_err': ['1e-6', '1e-8', '1e-10', '1e-14'],
-            'pre_comp_type': ['0'],
+            'pre_comp_type': ['2'],
 #            'pre_comp_type': ['0', '1', '2'],
-            'pre_comp_refactor': ['1'],
-#            'pre_comp_sweeps': ['3'],
-#            'pre_app_type': ['0'],
-#            'pre_app_jacobi_iters': ['50'],
-            'threads': ['12'],
+            'pre_comp_refactor': ['100'],
+            'pre_comp_sweeps': ['3'],
+            'pre_app_type': ['2'],
+            'pre_app_jacobi_iters': ['50'],
+            'threads': ['2'],
 #            'threads': ['1', '2', '4', '12', '24'],
-            'geo_format': ['1'],
+            'geo_format': [],
     }
 
-    test_cases = [
+    test_cases = []
+    if 'water_6540' in args.data:
+        test_cases.append(
             TestCase(path.join(data_dir, 'water/water_6540.pdb'),
                 path.join(data_dir, 'water/ffield.water'),
                 path.join(control_dir, 'param.gpu.water'),
-                params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str),
-#            TestCase(path.join(data_dir, 'water/water_78480.pdb'),
-#                path.join(data_dir, 'water/ffield.water'),
-#                path.join(control_dir, 'param.gpu.water'),
-#                params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str),
-#            TestCase(path.join(data_dir, 'water/water_327000.geo'),
-#                path.join(data_dir, 'water/ffield.water'),
-#                path.join(control_dir, 'param.gpu.water'),
-#                params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str),
+                params=params, result_header_fmt=header_fmt_str,
+                result_header = header_str, result_body_fmt=body_fmt_str,
+                geo_format=['1']))
+    if 'water_78480' in args.data:
+        test_cases.append(
+            TestCase(path.join(data_dir, 'water/water_78480.pdb'),
+                path.join(data_dir, 'water/ffield.water'),
+                path.join(control_dir, 'param.gpu.water'),
+                params=params, result_header_fmt=header_fmt_str,
+                result_header = header_str, result_body_fmt=body_fmt_str,
+                geo_format=['0']))
+    if 'water_327000' in args.data:
+        test_cases.append(
+            TestCase(path.join(data_dir, 'water/water_327000.geo'),
+                path.join(data_dir, 'water/ffield.water'),
+                path.join(control_dir, 'param.gpu.water'),
+                params=params, result_header_fmt=header_fmt_str,
+                result_header = header_str, result_body_fmt=body_fmt_str,
+                geo_format=['0']))
+    if 'bilayer_56800' in args.data:
+        test_cases.append(
             TestCase(path.join(data_dir, 'bilayer/bilayer_56800.pdb'),
                 path.join(data_dir, 'bilayer/ffield-bio'),
                 path.join(control_dir, 'param.gpu.water'),
-                params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str),
+                params=params, result_header_fmt=header_fmt_str,
+                result_header = header_str, result_body_fmt=body_fmt_str,
+                geo_format=['1']))
+    if 'dna_19733' in args.data:
+        test_cases.append(
             TestCase(path.join(data_dir, 'dna/dna_19733.pdb'),
                 path.join(data_dir, 'dna/ffield-dna'),
                 path.join(control_dir, 'param.gpu.water'),
-                params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str),
+                params=params, result_header_fmt=header_fmt_str,
+                result_header = header_str, result_body_fmt=body_fmt_str,
+                geo_format=['1']))
+    if 'silica_6000' in args.data:
+        test_cases.append(
             TestCase(path.join(data_dir, 'silica/silica_6000.pdb'),
                 path.join(data_dir, 'silica/ffield-bio'),
                 path.join(control_dir, 'param.gpu.water'),
-                params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str),
-#            TestCase(path.join(data_dir, 'silica/silica_72000.geo'),
-#                path.join(data_dir, 'silica/ffield-bio'),
-#                path.join(control_dir, 'param.gpu.water'),
-#                params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str),
-#            TestCase(path.join(data_dir, 'silica/silica_300000.geo'),
-#                path.join(data_dir, 'silica/ffield-bio'),
-#                path.join(control_dir, 'param.gpu.water'),
-#                params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str),
+                params=params, result_header_fmt=header_fmt_str,
+                result_header = header_str, result_body_fmt=body_fmt_str,
+                geo_format=['1']))
+    if 'silica_72000' in args.data:
+        test_cases.append(
+            TestCase(path.join(data_dir, 'silica/silica_72000.geo'),
+                path.join(data_dir, 'silica/ffield-bio'),
+                path.join(control_dir, 'param.gpu.water'),
+                params=params, result_header_fmt=header_fmt_str,
+                result_header = header_str, result_body_fmt=body_fmt_str,
+                geo_format=['0']))
+    if 'silica_300000' in args.data:
+        test_cases.append(
+            TestCase(path.join(data_dir, 'silica/silica_300000.geo'),
+                path.join(data_dir, 'silica/ffield-bio'),
+                path.join(control_dir, 'param.gpu.water'),
+                params=params, result_header_fmt=header_fmt_str,
+                result_header = header_str, result_body_fmt=body_fmt_str,
+                geo_format=['0']))
+    if 'petn_48256' in args.data:
+        test_cases.append(
             TestCase(path.join(data_dir, 'petn/petn_48256.pdb'),
                 path.join(data_dir, 'petn/ffield.petn'),
                 path.join(control_dir, 'param.gpu.water'),
-                params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str),
-                        ]
+                params=params, result_header_fmt=header_fmt_str,
+                result_header = header_str, result_body_fmt=body_fmt_str,
+                geo_format=['1']))
+
     for test in test_cases:
         test.run(bin_file='sPuReMD/bin/spuremd')