diff --git a/environ/param.gpu.water b/environ/param.gpu.water
index 7b3d00e95f1eba256b006f40283943d2bcbcbd57..d5d3900d4ea9f31cfe5fec224692d9fd96b594af 100644
--- a/environ/param.gpu.water
+++ b/environ/param.gpu.water
@@ -16,6 +16,7 @@ hbond_cutoff            7.50                    ! cutoff distance for hydrogen b
 
 qeq_solver_type         0                       ! method used to solve charge equilibration phase (QEq)
 qeq_solver_q_err        1e-6                    ! relative residual norm threshold used in solver
+qeq_domain_sparsity     1.0                     ! scalar for scaling cut-off distance, used to sparsify QEq matrix (between 0.0 and 1.0)
 pre_comp_type           0                       ! method used to compute QEq preconditioner, if applicable
 pre_comp_refactor       100                     ! nsteps to recompute preconditioner
 pre_comp_droptol        0.0                     ! threshold tolerance for dropping values in preconditioner computation, if applicable
diff --git a/sPuReMD/src/QEq.c b/sPuReMD/src/QEq.c
index 514330bdb342caa65b3446f10cd463cabca75b43..ddf0000d3ac225dab75352ab1c8ac43a514d4537 100644
--- a/sPuReMD/src/QEq.c
+++ b/sPuReMD/src/QEq.c
@@ -262,7 +262,7 @@ static int Estimate_LU_Fill( const sparse_matrix * const A, const real * const d
     fillin = 0;
 
     #pragma omp parallel for schedule(static) \
-        default(none) private(i, j, pj, val) reduction(+: fillin)
+    default(none) private(i, j, pj, val) reduction(+: fillin)
     for ( i = 0; i < A->n; ++i )
     {
         for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj )
@@ -1018,7 +1018,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     {
         /* for each nonzero in L */
         #pragma omp parallel for schedule(static) \
-            default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum)
+        default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum)
         for ( j = 0; j < DAD->start[DAD->n]; ++j )
         {
             sum = ZERO;
@@ -1405,9 +1405,19 @@ static void Init_MatVec( const reax_system * const system, const control_params
 {
     int i, fillin;
     real s_tmp, t_tmp, time;
+    sparse_matrix *Hptr;
     sparse_matrix *H_test, *L_test, *U_test;
 //    char fname[100];
 
+    if (control->qeq_domain_sparsify_enabled)
+    {
+        Hptr = workspace->H_sp;
+    }
+    else
+    {
+        Hptr = workspace->H;
+    }
+
     if (control->pre_comp_refactor > 0 &&
             ((data->step - data->prev_steps) % control->pre_comp_refactor == 0 || workspace->L == NULL))
     {
@@ -1417,7 +1427,10 @@ static void Init_MatVec( const reax_system * const system, const control_params
         if ( control->pre_comp_type != DIAG_PC )
         {
             Sort_Matrix_Rows( workspace->H );
-            Sort_Matrix_Rows( workspace->H_sp );
+            if (control->qeq_domain_sparsify_enabled)
+            {
+                Sort_Matrix_Rows( workspace->H_sp );
+            }
         }
         data->timing.QEq_sort_mat_rows += Get_Timing_Info( time );
 #if defined(DEBUG)
@@ -1438,13 +1451,13 @@ static void Init_MatVec( const reax_system * const system, const control_params
             data->timing.pre_comp += diag_pre_comp( system, workspace->Hdia_inv );
             break;
         case ICHOLT_PC:
-            Calculate_Droptol( workspace->H, workspace->droptol, control->pre_comp_droptol );
+            Calculate_Droptol( Hptr, workspace->droptol, control->pre_comp_droptol );
 #if defined(DEBUG_FOCUS)
             fprintf( stderr, "drop tolerances calculated\n" );
 #endif
             if ( workspace->L == NULL )
             {
-                fillin = Estimate_LU_Fill( workspace->H, workspace->droptol );
+                fillin = Estimate_LU_Fill( Hptr, workspace->droptol );
                 if ( Allocate_Matrix( &(workspace->L), far_nbrs->n, fillin ) == 0 ||
                         Allocate_Matrix( &(workspace->U), far_nbrs->n, fillin ) == 0 )
                 {
@@ -1458,15 +1471,14 @@ static void Init_MatVec( const reax_system * const system, const control_params
 #endif
             }
 
-            data->timing.pre_comp += ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U );
-//                data->timing.pre_comp += ICHOLT( workspace->H_sp, workspace->droptol, workspace->L, workspace->U );
+            data->timing.pre_comp += ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U );
             break;
         case ILU_PAR_PC:
             if ( workspace->L == NULL )
             {
                 /* factors have sparsity pattern as H */
-                if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 ||
-                        Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 )
+                if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == 0 ||
+                        Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == 0 )
                 {
                     fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                     exit( INSUFFICIENT_MEMORY );
@@ -1474,7 +1486,7 @@ static void Init_MatVec( const reax_system * const system, const control_params
             }
 
 //                data->timing.pre_comp += ICHOL_PAR( workspace->H, control->pre_comp_sweeps, workspace->L, workspace->U );
-            data->timing.pre_comp += ILU_PAR( workspace->H, control->pre_comp_sweeps, workspace->L, workspace->U );
+            data->timing.pre_comp += ILU_PAR( Hptr, control->pre_comp_sweeps, workspace->L, workspace->U );
 
 //                Print_Sparse_Matrix2( workspace->H, "H.out" );
 //                Print_Sparse_Matrix2( workspace->L, "L.out" );
@@ -1516,7 +1528,7 @@ static void Init_MatVec( const reax_system * const system, const control_params
 //                exit( 0 );
             break;
         case ILUT_PAR_PC:
-            Calculate_Droptol( workspace->H, workspace->droptol, control->pre_comp_droptol );
+            Calculate_Droptol( Hptr, workspace->droptol, control->pre_comp_droptol );
 #if defined(DEBUG_FOCUS)
             fprintf( stderr, "drop tolerances calculated\n" );
 #endif
@@ -1524,30 +1536,30 @@ static void Init_MatVec( const reax_system * const system, const control_params
             if ( workspace->L == NULL )
             {
                 /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */
-                if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 ||
-                        Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 )
+                if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == 0 ||
+                        Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == 0 )
                 {
                     fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                     exit( INSUFFICIENT_MEMORY );
                 }
             }
 
-            data->timing.pre_comp += ILUT_PAR( workspace->H, workspace->droptol, control->pre_comp_sweeps,
+            data->timing.pre_comp += ILUT_PAR( Hptr, workspace->droptol, control->pre_comp_sweeps,
                                                workspace->L, workspace->U );
             break;
         case ILU_SUPERLU_MT_PC:
             if ( workspace->L == NULL )
             {
                 /* factors have sparsity pattern as H */
-                if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 ||
-                        Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 )
+                if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == 0 ||
+                        Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == 0 )
                 {
                     fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                     exit( INSUFFICIENT_MEMORY );
                 }
             }
 #if defined(HAVE_SUPERLU_MT)
-            data->timing.pre_comp += SuperLU_Factorize( workspace->H, workspace->L, workspace->U );
+            data->timing.pre_comp += SuperLU_Factorize( Hptr, workspace->L, workspace->U );
 #else
             fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" );
             exit( INVALID_INPUT );
diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c
index 62311c5934d5da95ee7e5473f799c5ec506e0c20..41f744969f1615ba621d85db852d998e92719b86 100644
--- a/sPuReMD/src/control.c
+++ b/sPuReMD/src/control.c
@@ -61,7 +61,8 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control,
     control->reneighbor = 1;
     control->vlist_cut = 0;
     control->nbr_cut = 4.;
-    control->r_cut = 10;
+    control->r_cut = 10.;
+    control->r_sp_cut = 10.;
     control->max_far_nbrs = 1000;
     control->bo_cut = 0.01;
     control->thb_cut = 0.001;
@@ -71,6 +72,8 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control,
 
     control->qeq_solver_type = GMRES_S;
     control->qeq_solver_q_err = 0.000001;
+    control->qeq_domain_sparsify_enabled = FALSE;
+    control->qeq_domain_sparsity = 1.0;
     control->pre_comp_type = ICHOLT_PC;
     control->pre_comp_sweeps = 3;
     control->pre_comp_refactor = 100;
@@ -247,6 +250,12 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control,
             val = atof( tmp[1] );
             control->qeq_solver_q_err = val;
         }
+        else if ( strcmp(tmp[0], "qeq_domain_sparsity") == 0 )
+        {
+            val = atof( tmp[1] );
+            control->qeq_domain_sparsity = val;
+            control->qeq_domain_sparsify_enabled = TRUE;
+        }
         else if ( strcmp(tmp[0], "pre_comp_type") == 0 )
         {
             ival = atoi( tmp[1] );
@@ -501,7 +510,7 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control,
         else
         {
             fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] );
-            exit( 15 );
+            exit( UNKNOWN_OPTION );
         }
     }
 
@@ -521,6 +530,7 @@ char Read_Control_File( FILE* fp, reax_system *system, control_params* control,
     control->bo_cut = 0.01 * system->reaxprm.gp.l[29];
     control->r_low  = system->reaxprm.gp.l[11];
     control->r_cut  = system->reaxprm.gp.l[12];
+    control->r_sp_cut  = control->r_cut * control->qeq_domain_sparsity;
     control->vlist_cut += control->r_cut;
 
     system->g.cell_size = control->vlist_cut / 2.;
diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c
index e6e637076f2de1342e422ee08db4260b0025d6f3..9108a8dd4026e001716f707302e40d671d48b838 100644
--- a/sPuReMD/src/forces.c
+++ b/sPuReMD/src/forces.c
@@ -304,9 +304,6 @@ void Init_Forces( reax_system *system, control_params *control,
     p_boc1 = system->reaxprm.gp.l[0];
     p_boc2 = system->reaxprm.gp.l[1];
 
-    //TODO: add to control file, remove
-    control->r_sp_cut = 8.0;
-
     for ( i = 0; i < system->N; ++i )
     {
         atom_i = &(system->atoms[i]);
@@ -384,7 +381,7 @@ void Init_Forces( reax_system *system, control_params *control,
                 if ( flag_sp )
                 {
                     H_sp->j[H_sp_top] = j;
-                    H_sp->val[H_sp_top] = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3;
+                    H_sp->val[H_sp_top] = H->val[Htop - 1];
                     ++H_sp_top;
                 }
 
@@ -542,12 +539,14 @@ void Init_Forces( reax_system *system, control_params *control,
             }
         }
 
+        /* diagonal entry */
         H->j[Htop] = i;
         H->val[Htop] = system->reaxprm.sbp[type_i].eta;
         ++Htop;
 
+        /* diagonal entry */
         H_sp->j[H_sp_top] = i;
-        H_sp->val[H_sp_top] = system->reaxprm.sbp[type_i].eta;
+        H_sp->val[H_sp_top] = H->val[Htop - 1];
         ++H_sp_top;
 
         Set_End_Index( i, btop_i, bonds );
@@ -582,17 +581,17 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
     int i, j, pj;
     int start_i, end_i;
     int type_i, type_j;
-    int Htop, btop_i, btop_j, num_bonds, num_hbonds;
+    int Htop, H_sp_top, btop_i, btop_j, num_bonds, num_hbonds;
     int tmin, tmax, r;
     int ihb, jhb, ihb_top, jhb_top;
-    int flag;
+    int flag, flag_sp;
     real r_ij, r2, self_coef;
     real val, dif, base;
     real C12, C34, C56;
     real Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2;
     real BO, BO_s, BO_pi, BO_pi2;
     real p_boc1, p_boc2;
-    sparse_matrix *H;
+    sparse_matrix *H, *H_sp;
     list *far_nbrs, *bonds, *hbonds;
     single_body_parameters *sbp_i, *sbp_j;
     two_body_parameters *twbp;
@@ -607,7 +606,9 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
     hbonds = *lists + HBONDS;
 
     H = workspace->H;
+    H_sp = workspace->H_sp;
     Htop = 0;
+    H_sp_top = 0;
     num_bonds = 0;
     num_hbonds = 0;
     btop_i = btop_j = 0;
@@ -621,6 +622,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
         start_i = Start_Index(i, far_nbrs);
         end_i   = End_Index(i, far_nbrs);
         H->start[i] = Htop;
+        H_sp->start[i] = H_sp_top;
         btop_i = End_Index( i, bonds );
         sbp_i = &(system->reaxprm.sbp[type_i]);
         ihb = ihb_top = -1;
@@ -634,15 +636,30 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
             atom_j = &(system->atoms[j]);
 
             flag = 0;
+            flag_sp = 0;
             if ((data->step - data->prev_steps) % control->reneighbor == 0)
             {
                 if (nbr_pj->d <= control->r_cut)
+                {
                     flag = 1;
-                else flag = 0;
+                    if ( nbr_pj->d <= control->r_sp_cut )
+                    {
+                        flag_sp = 1;
+                    }
+                }
+                else
+                {
+                    flag = 0;
+                    flag_sp = 0;
+                }
             }
             else if ((nbr_pj->d = Sq_Distance_on_T3(atom_i->x, atom_j->x, &(system->box),
                                                     nbr_pj->dvec)) <= SQR(control->r_cut))
             {
+                if ( nbr_pj->d <= SQR(control->r_sp_cut))
+                {
+                    flag_sp = 1;
+                }
                 nbr_pj->d = sqrt(nbr_pj->d);
                 flag = 1;
             }
@@ -671,6 +688,14 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
                 H->val[Htop] = self_coef * val;
                 ++Htop;
 
+                /* H_sp matrix entry */
+                if ( flag_sp )
+                {
+                    H_sp->j[H_sp_top] = j;
+                    H_sp->val[H_sp_top] = H->val[Htop - 1];
+                    ++H_sp_top;
+                }
+
                 /* hydrogen bond lists */
                 if ( control->hb_cut > 0 && (ihb == 1 || ihb == 2) &&
                         nbr_pj->d <= control->hb_cut )
@@ -797,10 +822,16 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
             }
         }
 
+        /* diagonal entry */
         H->j[Htop] = i;
         H->val[Htop] = system->reaxprm.sbp[type_i].eta;
         ++Htop;
 
+        /* diagonal entry */
+        H_sp->j[H_sp_top] = i;
+        H_sp->val[H_sp_top] = H->val[Htop - 1];
+        ++H_sp_top;
+
         Set_End_Index( i, btop_i, bonds );
         if ( ihb == 1 )
             Set_End_Index( workspace->hbond_index[i], ihb_top, hbonds );
@@ -808,6 +839,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control,
 
     // mark the end of j list
     H->start[i] = Htop;
+    H_sp->start[i] = H_sp_top;
     /* validate lists - decide if reallocation is required! */
     Validate_Lists( workspace, lists,
                     data->step, system->N, H->m, Htop, num_bonds, num_hbonds );
diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h
index 9be7cf757d54471195b5afa892a08d2d6bb8ed53..d3cd8b79f6079c7f5ff270efd595c366f05aa877 100644
--- a/sPuReMD/src/mytypes.h
+++ b/sPuReMD/src/mytypes.h
@@ -511,6 +511,8 @@ typedef struct
 
     unsigned int qeq_solver_type;
     real qeq_solver_q_err;
+    real qeq_domain_sparsity;
+    unsigned int qeq_domain_sparsify_enabled;
     unsigned int pre_comp_type;
     unsigned int pre_comp_refactor;
     real pre_comp_droptol;