diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c
index 7561df494dd1246dd0e51cd8a812d2bd9e90be4a..7b9cbdab2470ae42a1ea5556a8c8a53455aa9400 100644
--- a/sPuReMD/src/charges.c
+++ b/sPuReMD/src/charges.c
@@ -871,7 +871,6 @@ static void Setup_Preconditioner_EE( const reax_system * const system,
         const control_params * const control,
         simulation_data * const data, static_storage * const workspace )
 {
-    int fillin;
     real time;
     sparse_matrix *Hptr;
 
@@ -968,7 +967,6 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system,
         const control_params * const control,
         simulation_data * const data, static_storage * const workspace )
 {
-    int fillin;
     real time;
     sparse_matrix *Hptr;
 
diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c
index 4461a9cd251894040eab34d58aec153e7a809f27..5faf0b8c0f302275fda257649537811ca643e17c 100644
--- a/sPuReMD/src/init_md.c
+++ b/sPuReMD/src/init_md.c
@@ -618,6 +618,20 @@ static void Init_Workspace( reax_system *system, control_params *control,
         workspace->rp2 = NULL;
     }
 
+    /* ILUTP preconditioner related */
+    if ( control->cm_solver_pre_comp_type == ILUTP_PC )
+    {
+        workspace->perm_ilutp = smalloc( sizeof( int ) * system->N_cm,
+               "Init_Workspace::workspace->perm_ilutp" );
+        workspace->r_p = smalloc( sizeof( real ) * system->N_cm,
+               "Init_Workspace::workspace->r_p" );
+    }
+    else
+    {
+        workspace->perm_ilutp = NULL;
+        workspace->r_p = NULL;
+    }
+
     /* integrator storage */
     workspace->a = smalloc( system->N * sizeof( rvec ),
            "Init_Workspace::workspace->a" );
@@ -1349,6 +1363,13 @@ static void Finalize_Workspace( reax_system *system, control_params *control,
         sfree( workspace->rp2, "Finalize_Workspace::rp2" );
     }
 
+    /* ILUTP preconditioner related */
+    if ( control->cm_solver_pre_comp_type == ILUTP_PC )
+    {
+        sfree( workspace->perm_ilutp, "Finalize_Workspace::workspace->perm_ilutp" );
+        sfree( workspace->r_p, "Finalize_Workspace::workspace->r_p" );
+    }
+
     /* integrator storage */
     sfree( workspace->a, "Finalize_Workspace::workspace->a" );
     sfree( workspace->f_old, "Finalize_Workspace::workspace->f_old" );
diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c
index 770c1141f18ab5eadafc4c7b6dc39e0cea9b86e1..eaad9a0e4df241e5b73d7764c6b409ac2c6bfacf 100644
--- a/sPuReMD/src/lin_alg.c
+++ b/sPuReMD/src/lin_alg.c
@@ -2011,7 +2011,7 @@ void graph_coloring( const control_params * const control,
         const sparse_matrix * const A, const TRIANGULARITY tri )
 {
 #ifdef _OPENMP
-    #pragma omp parallel
+    #pragma omp parallel num_threads(1)
 #endif
     {
         int i, pj, v;
@@ -2209,6 +2209,28 @@ void sort_rows_by_colors( const static_storage * const workspace,
 }
 
 
+/* Apply permutation P*x = x_p
+ *
+ * x: vector to permute
+ * perm: vector containing mapping if computing P*x
+ * x_p (output): permuted vector
+ * n: number of entries in x
+ */
+static void permute_vector( const real * const x, const int * const perm,
+        real * const x_p, const unsigned int n )
+{
+    unsigned int i;
+
+#ifdef _OPENMP
+    #pragma omp for schedule(static)
+#endif
+    for ( i = 0; i < n; ++i )
+    {
+        x_p[i] = x[perm[i]];
+    }
+}
+
+
 /* Apply permutation Q^T*x or Q*x based on graph coloring
  *
  * workspace: storage container for workspace structures
@@ -2218,7 +2240,7 @@ void sort_rows_by_colors( const static_storage * const workspace,
  * invert_map: if TRUE, use Q^T, otherwise use Q
  * tri: coloring to triangular factor to use (lower/upper)
  */
-static void permute_vector( const static_storage * const workspace,
+static void permute_vector_gc( const static_storage * const workspace,
         real * const x, const unsigned int n, const int invert_map,
         const TRIANGULARITY tri )
 {
@@ -2514,7 +2536,7 @@ void jacobi_iter( const static_storage * const workspace,
  * control: data struct containing parameters
  * y: vector to which to apply preconditioning,
  *  specific to internals of iterative solver being used
- * x: preconditioned vector (output)
+ * x (output): preconditioned vector
  * fresh_pre: parameter indicating if this is a newly computed (fresh) preconditioner
  * side: used in determining how to apply preconditioner if the preconditioner is
  *  factorized as M = M_{1}M_{2} (e.g., incomplete LU, A \approx LU)
@@ -2522,9 +2544,9 @@ void jacobi_iter( const static_storage * const workspace,
  * Assumptions:
  *   Matrices have non-zero diagonals
  *   Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */
-static void apply_preconditioner( const static_storage * const workspace, const control_params * const control,
-                                  const real * const y, real * const x, const int fresh_pre,
-                                  const int side )
+static void apply_preconditioner( const static_storage * const workspace,
+        const control_params * const control, const real * const y, real * const x,
+        const int fresh_pre, const int side )
 {
     int i, si;
 
@@ -2551,15 +2573,18 @@ static void apply_preconditioner( const static_storage * const workspace, const
                     break;
                 case ICHOLT_PC:
                 case ILUT_PC:
-                case ILUTP_PC:
                 case FG_ILUT_PC:
                     tri_solve( workspace->L, y, x, workspace->L->n, LOWER );
                     break;
+                case ILUTP_PC:
+                    permute_vector( y, workspace->perm_ilutp, workspace->r_p, workspace->H->n );
+                    tri_solve( workspace->L, workspace->r_p, x, workspace->L->n, LOWER );
+                    break;
                 case SAI_PC:
                     Sparse_MatVec_full( workspace->H_app_inv, y, x );
                     break;
                 default:
-                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 }
@@ -2572,16 +2597,20 @@ static void apply_preconditioner( const static_storage * const workspace, const
                     break;
                 case ICHOLT_PC:
                 case ILUT_PC:
-                case ILUTP_PC:
                 case FG_ILUT_PC:
                     tri_solve_level_sched( (static_storage *) workspace,
                             workspace->L, y, x, workspace->L->n, LOWER, fresh_pre );
                     break;
+                case ILUTP_PC:
+                    permute_vector( y, workspace->perm_ilutp, workspace->r_p, workspace->H->n );
+                    tri_solve_level_sched( (static_storage *) workspace,
+                            workspace->L, workspace->r_p, x, workspace->L->n, LOWER, fresh_pre );
+                    break;
                 case SAI_PC:
                     Sparse_MatVec_full( workspace->H_app_inv, y, x );
                     break;
                 default:
-                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 }
@@ -2591,12 +2620,11 @@ static void apply_preconditioner( const static_storage * const workspace, const
                 {
                 case JACOBI_PC:
                 case SAI_PC:
-                    fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unsupported preconditioner computation/application method combination. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 case ICHOLT_PC:
                 case ILUT_PC:
-                case ILUTP_PC:
                 case FG_ILUT_PC:
 #ifdef _OPENMP
                     #pragma omp for schedule(static)
@@ -2606,12 +2634,27 @@ static void apply_preconditioner( const static_storage * const workspace, const
                         workspace->y_p[i] = y[i];
                     }
 
-                    permute_vector( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER );
+                    permute_vector_gc( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER );
+                    tri_solve_level_sched( (static_storage *) workspace,
+                            workspace->L, workspace->y_p, x, workspace->L->n, LOWER, fresh_pre );
+                    break;
+                case ILUTP_PC:
+                    permute_vector( y, workspace->perm_ilutp, workspace->r_p, workspace->H->n );
+
+#ifdef _OPENMP
+                    #pragma omp for schedule(static)
+#endif
+                    for ( i = 0; i < workspace->H->n; ++i )
+                    {
+                        workspace->y_p[i] = workspace->r_p[i];
+                    }
+
+                    permute_vector_gc( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER );
                     tri_solve_level_sched( (static_storage *) workspace,
                             workspace->L, workspace->y_p, x, workspace->L->n, LOWER, fresh_pre );
                     break;
                 default:
-                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 }
@@ -2621,12 +2664,11 @@ static void apply_preconditioner( const static_storage * const workspace, const
                 {
                 case JACOBI_PC:
                 case SAI_PC:
-                    fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unsupported preconditioner computation/application method combination. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 case ICHOLT_PC:
                 case ILUT_PC:
-                case ILUTP_PC:
                 case FG_ILUT_PC:
                     /* construct D^{-1}_L */
                     if ( fresh_pre == TRUE )
@@ -2644,14 +2686,33 @@ static void apply_preconditioner( const static_storage * const workspace, const
                     jacobi_iter( workspace, workspace->L, workspace->Dinv_L,
                             y, x, LOWER, control->cm_solver_pre_app_jacobi_iters );
                     break;
+                case ILUTP_PC:
+                    permute_vector( y, workspace->perm_ilutp, workspace->r_p, workspace->H->n );
+
+                    /* construct D^{-1}_L */
+                    if ( fresh_pre == TRUE )
+                    {
+#ifdef _OPENMP
+                        #pragma omp for schedule(static)
+#endif
+                        for ( i = 0; i < workspace->L->n; ++i )
+                        {
+                            si = workspace->L->start[i + 1] - 1;
+                            workspace->Dinv_L[i] = 1.0 / workspace->L->val[si];
+                        }
+                    }
+
+                    jacobi_iter( workspace, workspace->L, workspace->Dinv_L,
+                            workspace->r_p, x, LOWER, control->cm_solver_pre_app_jacobi_iters );
+                    break;
                 default:
-                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 }
                 break;
             default:
-                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                 exit( INVALID_INPUT );
                 break;
 
@@ -2678,7 +2739,7 @@ static void apply_preconditioner( const static_storage * const workspace, const
                     tri_solve( workspace->U, y, x, workspace->U->n, UPPER );
                     break;
                 default:
-                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 }
@@ -2701,7 +2762,7 @@ static void apply_preconditioner( const static_storage * const workspace, const
                             workspace->U, y, x, workspace->U->n, UPPER, fresh_pre );
                     break;
                 default:
-                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 }
@@ -2711,7 +2772,7 @@ static void apply_preconditioner( const static_storage * const workspace, const
                 {
                 case JACOBI_PC:
                 case SAI_PC:
-                    fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unsupported preconditioner computation/application method combination. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 case ICHOLT_PC:
@@ -2720,10 +2781,10 @@ static void apply_preconditioner( const static_storage * const workspace, const
                 case FG_ILUT_PC:
                     tri_solve_level_sched( (static_storage *) workspace,
                             workspace->U, y, x, workspace->U->n, UPPER, fresh_pre );
-                    permute_vector( workspace, x, workspace->H->n, TRUE, UPPER );
+                    permute_vector_gc( workspace, x, workspace->H->n, TRUE, UPPER );
                     break;
                 default:
-                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 }
@@ -2733,7 +2794,7 @@ static void apply_preconditioner( const static_storage * const workspace, const
                 {
                 case JACOBI_PC:
                 case SAI_PC:
-                    fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unsupported preconditioner computation/application method combination. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 case ICHOLT_PC:
@@ -2757,13 +2818,13 @@ static void apply_preconditioner( const static_storage * const workspace, const
                             y, x, UPPER, control->cm_solver_pre_app_jacobi_iters );
                     break;
                 default:
-                    fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                    fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                     exit( INVALID_INPUT );
                     break;
                 }
                 break;
             default:
-                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                fprintf( stderr, "[ERROR] Unrecognized preconditioner application method. Terminating...\n" );
                 exit( INVALID_INPUT );
                 break;
 
diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c
index aeabf83ca9e54fea713269cf7757748a8ad4f231..602cc4ada1586eb47cf1822e7c71ef0f63185855 100644
--- a/sPuReMD/src/print_utils.c
+++ b/sPuReMD/src/print_utils.c
@@ -884,7 +884,7 @@ void Read_Sparse_Matrix2( sparse_matrix *A, char *fname )
 
     A->start[cur_row] = top;
 
-    while ( fscanf( f, "%6d %6d %24.15lf", &row, &col, &val ) == 3 )
+    while ( fscanf( f, "%6d %6d %24lf", &row, &col, &val ) == 3 )
     {
         /* assumption: every row has at least one nonzero (diagonal) */
         if ( cur_row != row - 1 )
@@ -904,6 +904,28 @@ void Read_Sparse_Matrix2( sparse_matrix *A, char *fname )
 }
 
 
+/* Read permuation matrix in COO format (1-based indexing)
+ * and store in a dense vector (preallocated)
+ *
+ * Note: the file must be sorted in increasing order of
+ * columns than rows */
+void Read_Permutation_Matrix( int *v, char *fname )
+{
+    int row, col;
+    real val;
+    FILE *f;
+   
+    f = sfopen( fname, "r" );
+
+    while ( fscanf( f, "%6d %6d %24lf", &row, &col, &val ) == 3 )
+    {
+        v[row - 1] = col - 1;
+    }
+
+    sfclose( f, "Read_Permuation_Matrix::f" );
+}
+
+
 /* Note: watch out for portability issues with endianness
  * due to serialization of numeric types (integer, IEEE 754) */
 void Print_Sparse_Matrix_Binary( sparse_matrix *A, char *fname )
diff --git a/sPuReMD/src/print_utils.h b/sPuReMD/src/print_utils.h
index 8496a405d716a356d753ac24ee49ae400f3164ac..1852ca31e5d4a4d5ad27063633029f303edef5b7 100644
--- a/sPuReMD/src/print_utils.h
+++ b/sPuReMD/src/print_utils.h
@@ -56,6 +56,8 @@ void Print_Sparse_Matrix2( sparse_matrix*, char*, char* );
 
 void Read_Sparse_Matrix2( sparse_matrix *, char * );
 
+void Read_Permutation_Matrix( int *, char * );
+
 void Print_Sparse_Matrix_Binary( sparse_matrix*, char* );
 
 void Print_Bonds( reax_system*, reax_list*, char* );
diff --git a/sPuReMD/src/reax_types.h b/sPuReMD/src/reax_types.h
index e26363a48b042d661d554de7919ccfc59dc0c9c5..b0fa09a758288dff14e49750e8e44cce9caa2b0d 100644
--- a/sPuReMD/src/reax_types.h
+++ b/sPuReMD/src/reax_types.h
@@ -1116,7 +1116,7 @@ struct static_storage
     real *b_local;
 #endif
 
-    /* Level scheduling related storage for applying, e.g. ICHOLT and ILU(T),
+    /* Level scheduling related storage for applying ICHOLT/ILUT(P)/FG-ILUT
      * preconditioners */
     int levels_L;
     int levels_U;
@@ -1128,7 +1128,7 @@ struct static_storage
     unsigned int *level_rows_cnt_U;
     unsigned int *top;
 
-    /* Graph coloring related storage for applying, e.g. ICHOLT and ILU(T),
+    /* Graph coloring related storage for applying ICHOLT/ILUT(P)/FG-ILUT
      * preconditioners */
     unsigned int *color;
     unsigned int *to_color;
@@ -1142,7 +1142,7 @@ struct static_storage
     real *y_p;
     real *x_p;
 
-    /* Jacobi iteration related storage for applying, e.g. ICHOLT and ILU(T),
+    /* Jacobi iteration related storage for applying ICHOLT/ILUT(P)/FG-ILUT
      * preconditioners */
     real *Dinv_L;
     real *Dinv_U;
@@ -1150,6 +1150,11 @@ struct static_storage
     real *rp;
     real *rp2;
 
+    /* permutation for ILUTP */
+    int *perm_ilutp;
+    /* permuted residual for preconditioning with ILUTP */
+    real *r_p;
+
     int num_H;
     int *hbond_index; // for hydrogen bonds