diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c
index d2fddf362d944b214f56ca22591e3d0c425a65ba..f81fba699cf4bb451c7a71911772a3ecf1a424da 100644
--- a/sPuReMD/src/charges.c
+++ b/sPuReMD/src/charges.c
@@ -2011,7 +2011,7 @@ static void Setup_Preconditioner_QEq( const reax_system * const system,
         simulation_data * const data, static_storage * const workspace,
         const list * const far_nbrs )
 {
-    int i, fillin;
+    int fillin;
     real time;
     sparse_matrix *Hptr;
 
@@ -2156,7 +2156,7 @@ static void Setup_Preconditioner_EE( const reax_system * const system,
         simulation_data * const data, static_storage * const workspace,
         const list * const far_nbrs )
 {
-    int i, fillin;
+    int fillin;
     real time;
     sparse_matrix *Hptr;
 
@@ -2305,7 +2305,7 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system,
         simulation_data * const data, static_storage * const workspace,
         const list * const far_nbrs )
 {
-    int i, fillin;
+    int fillin;
     real time;
     sparse_matrix *Hptr;
 
@@ -2547,9 +2547,10 @@ static void QEq( reax_system * const system, control_params * const control,
 
     case SDM_S:
         iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err,
-                     workspace->s[0] ) + 1;
+                workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 &&
+                 (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1;
         iters += SDM( workspace,control,  workspace->H, workspace->b_t, control->cm_solver_q_err,
-                      workspace->t[0] ) + 1;
+                      workspace->t[0], FALSE ) + 1;
         break;
 
     default:
@@ -2628,7 +2629,8 @@ static void EE( reax_system * const system, control_params * const control,
 
     case SDM_S:
         iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err,
-                workspace->s[0] ) + 1;
+                workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 &&
+                 (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1;
         break;
 
     default:
@@ -2689,8 +2691,8 @@ static void ACKS2( reax_system * const system, control_params * const control,
     case GMRES_H_S:
         iters = GMRES_HouseHolder( workspace, control, data,workspace->H,
                 workspace->b_s, control->cm_solver_q_err, workspace->s[0],
-                (control->cm_solver_pre_comp_refactor > 0 &&
-                 data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 );
+                control->cm_solver_pre_comp_refactor > 0 &&
+                 (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 );
         break;
 
     case CG_S:
@@ -2701,7 +2703,8 @@ static void ACKS2( reax_system * const system, control_params * const control,
 
     case SDM_S:
         iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err,
-                workspace->s[0] ) + 1;
+                workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 &&
+                 (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1;
         break;
 
     default:
diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c
index 4228f8bd4967cca1822e5d07fd0d5055503d5b20..3cbf575fc3215c45e6c4bde578036e0f243abb20 100644
--- a/sPuReMD/src/init_md.c
+++ b/sPuReMD/src/init_md.c
@@ -428,7 +428,9 @@ void Init_Workspace( reax_system *system, control_params *control,
             break;
 
         case SDM_S:
-            //TODO
+            workspace->r = (real *) calloc( system->N_cm, sizeof( real ) );
+            workspace->d = (real *) calloc( system->N_cm, sizeof( real ) );
+            workspace->q = (real *) calloc( system->N_cm, sizeof( real ) );
             break;
 
         default:
diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c
index 631b543afa9e96c2e73287b949ee3f001865e28a..28b7c9560d4ab5f11e8161f8a9ff867ab5f3dc1a 100644
--- a/sPuReMD/src/lin_alg.c
+++ b/sPuReMD/src/lin_alg.c
@@ -1764,53 +1764,58 @@ int CG( const static_storage * const workspace, const control_params * const con
 /* Steepest Descent */
 int SDM( const static_storage * const workspace, const control_params * const control,
         const sparse_matrix * const H, const real * const b, const real tol,
-        real * const x )
+        real * const x, const int fresh_pre )
 {
-    int  i, j, N;
-    real tmp, alpha, beta, b_norm;
-    real sig0, sig;
+    int i, itr, N;
+    real tmp, alpha, b_norm;
+    real sig;
 
     N = H->n;
-    b_norm = Norm( b, N );
 
-    Sparse_MatVec( H, x, workspace->q );
-    Vector_Sum( workspace->r , 1.,  b, -1., workspace->q, N );
-    for ( j = 0; j < N; ++j )
+    #pragma omp parallel default(none) private(i, tmp, alpha, b_norm, sig) \
+        shared(itr, N)
     {
-        workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
-    }
+        b_norm = Norm( b, N );
 
-    sig = Dot( workspace->r, workspace->d, N );
-    sig0 = sig;
+        Sparse_MatVec( H, x, workspace->q );
+        Vector_Sum( workspace->r , 1.0,  b, -1.0, workspace->q, N );
 
-    for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / b_norm > tol; ++i )
-    {
-        Sparse_MatVec( H, workspace->d, workspace->q );
+        apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre );
 
         sig = Dot( workspace->r, workspace->d, N );
-        tmp = Dot( workspace->d, workspace->q, N );
-        alpha = sig / tmp;
 
-        Vector_Add( x, alpha, workspace->d, N );
-        Vector_Add( workspace->r, -alpha, workspace->q, N );
-        for ( j = 0; j < N; ++j )
+        for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / b_norm > tol; ++i )
         {
-            workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
+            Sparse_MatVec( H, workspace->d, workspace->q );
+
+            sig = Dot( workspace->r, workspace->d, N );
+
+            /* ensure each thread gets a local copy of
+             * the function return value
+             * (which is stored as global inside the function)
+             * before proceeding */
+            #pragma omp barrier
+
+            tmp = Dot( workspace->d, workspace->q, N );
+            alpha = sig / tmp;
+
+            Vector_Add( x, alpha, workspace->d, N );
+            Vector_Add( workspace->r, -alpha, workspace->q, N );
+
+            apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE );
         }
 
-        //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n",
-        //Norm(workspace->d,N), Norm(workspace->q,N), tmp );
+        #pragma omp single
+        itr = i;
     }
 
-    fprintf( stderr, "SDM took %d iterations\n", i );
-
-    if ( i >= 300 )
+    if ( itr >= control->cm_solver_max_iters  )
     {
-        fprintf( stderr, "SDM convergence failed!\n" );
-        return i;
+        fprintf( stderr, "[WARNING] SDM convergence failed (%d iters)\n", itr );
+        return itr;
     }
 
-    return i;
+    return itr;
 }
 
 
diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h
index f62742a2b8aceb77a2d3aca9edad8503c6b08551..a3148514449e17fa972c4fee57be8ea518c3c100 100644
--- a/sPuReMD/src/lin_alg.h
+++ b/sPuReMD/src/lin_alg.h
@@ -63,7 +63,8 @@ int CG( const static_storage * const, const control_params * const,
         real * const, const int );
 
 int SDM( const static_storage * const, const control_params * const,
-        const sparse_matrix * const, const real * const, const real, real * const );
+        const sparse_matrix * const, const real * const, const real,
+        real * const, const int );
 
 real condest( const sparse_matrix * const, const sparse_matrix * const );
 
diff --git a/sPuReMD/src/vector.c b/sPuReMD/src/vector.c
index 24e05de8d82242cb6c0c15153d6df892eb55e73e..5329aea717a6d00b542df5527eb03a6684ec5f18 100644
--- a/sPuReMD/src/vector.c
+++ b/sPuReMD/src/vector.c
@@ -32,13 +32,11 @@ inline int Vector_isZero( const real * const v, const unsigned int k )
 {
     unsigned int i;
 
-    #pragma omp master
+    #pragma omp single
     {
         ret = TRUE;
     }
 
-    #pragma omp barrier
-
     #pragma omp for reduction(&&: ret) schedule(static)
     for ( i = 0; i < k; ++i )
     {
@@ -134,14 +132,11 @@ inline real Dot( const real * const v1, const real * const v2, const unsigned in
 {
     unsigned int i;
 
-    #pragma omp master
+    #pragma omp single
     {
         ret2 = ZERO;
     }
 
-    #pragma omp barrier
-
-
     #pragma omp for reduction(+: ret2) schedule(static)
     for ( i = 0; i < k; ++i )
     {
@@ -156,13 +151,11 @@ inline real Norm( const real * const v1, const unsigned int k )
 {
     unsigned int i;
 
-    #pragma omp master
+    #pragma omp single
     {
         ret2 = ZERO;
     }
 
-    #pragma omp barrier
-
     #pragma omp for reduction(+: ret2) schedule(static)
     for ( i = 0; i < k; ++i )
     {