From f14972a49d808172f8f6be2b7d8acf37b6aa1546 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnk@msu.edu>
Date: Thu, 24 Aug 2017 00:11:41 -0400
Subject: [PATCH] sPuReMD: update SDM with preconditioning methods and OpenMP
 support.

---
 sPuReMD/src/charges.c | 21 ++++++++-------
 sPuReMD/src/init_md.c |  4 ++-
 sPuReMD/src/lin_alg.c | 63 +++++++++++++++++++++++--------------------
 sPuReMD/src/lin_alg.h |  3 ++-
 sPuReMD/src/vector.c  | 13 +++------
 5 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c
index d2fddf36..f81fba69 100644
--- a/sPuReMD/src/charges.c
+++ b/sPuReMD/src/charges.c
@@ -2011,7 +2011,7 @@ static void Setup_Preconditioner_QEq( const reax_system * const system,
         simulation_data * const data, static_storage * const workspace,
         const list * const far_nbrs )
 {
-    int i, fillin;
+    int fillin;
     real time;
     sparse_matrix *Hptr;
 
@@ -2156,7 +2156,7 @@ static void Setup_Preconditioner_EE( const reax_system * const system,
         simulation_data * const data, static_storage * const workspace,
         const list * const far_nbrs )
 {
-    int i, fillin;
+    int fillin;
     real time;
     sparse_matrix *Hptr;
 
@@ -2305,7 +2305,7 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system,
         simulation_data * const data, static_storage * const workspace,
         const list * const far_nbrs )
 {
-    int i, fillin;
+    int fillin;
     real time;
     sparse_matrix *Hptr;
 
@@ -2547,9 +2547,10 @@ static void QEq( reax_system * const system, control_params * const control,
 
     case SDM_S:
         iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err,
-                     workspace->s[0] ) + 1;
+                workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 &&
+                 (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1;
         iters += SDM( workspace,control,  workspace->H, workspace->b_t, control->cm_solver_q_err,
-                      workspace->t[0] ) + 1;
+                      workspace->t[0], FALSE ) + 1;
         break;
 
     default:
@@ -2628,7 +2629,8 @@ static void EE( reax_system * const system, control_params * const control,
 
     case SDM_S:
         iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err,
-                workspace->s[0] ) + 1;
+                workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 &&
+                 (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1;
         break;
 
     default:
@@ -2689,8 +2691,8 @@ static void ACKS2( reax_system * const system, control_params * const control,
     case GMRES_H_S:
         iters = GMRES_HouseHolder( workspace, control, data,workspace->H,
                 workspace->b_s, control->cm_solver_q_err, workspace->s[0],
-                (control->cm_solver_pre_comp_refactor > 0 &&
-                 data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 );
+                control->cm_solver_pre_comp_refactor > 0 &&
+                 (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 );
         break;
 
     case CG_S:
@@ -2701,7 +2703,8 @@ static void ACKS2( reax_system * const system, control_params * const control,
 
     case SDM_S:
         iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err,
-                workspace->s[0] ) + 1;
+                workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 &&
+                 (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1;
         break;
 
     default:
diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c
index 4228f8bd..3cbf575f 100644
--- a/sPuReMD/src/init_md.c
+++ b/sPuReMD/src/init_md.c
@@ -428,7 +428,9 @@ void Init_Workspace( reax_system *system, control_params *control,
             break;
 
         case SDM_S:
-            //TODO
+            workspace->r = (real *) calloc( system->N_cm, sizeof( real ) );
+            workspace->d = (real *) calloc( system->N_cm, sizeof( real ) );
+            workspace->q = (real *) calloc( system->N_cm, sizeof( real ) );
             break;
 
         default:
diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c
index 631b543a..28b7c956 100644
--- a/sPuReMD/src/lin_alg.c
+++ b/sPuReMD/src/lin_alg.c
@@ -1764,53 +1764,58 @@ int CG( const static_storage * const workspace, const control_params * const con
 /* Steepest Descent */
 int SDM( const static_storage * const workspace, const control_params * const control,
         const sparse_matrix * const H, const real * const b, const real tol,
-        real * const x )
+        real * const x, const int fresh_pre )
 {
-    int  i, j, N;
-    real tmp, alpha, beta, b_norm;
-    real sig0, sig;
+    int i, itr, N;
+    real tmp, alpha, b_norm;
+    real sig;
 
     N = H->n;
-    b_norm = Norm( b, N );
 
-    Sparse_MatVec( H, x, workspace->q );
-    Vector_Sum( workspace->r , 1.,  b, -1., workspace->q, N );
-    for ( j = 0; j < N; ++j )
+    #pragma omp parallel default(none) private(i, tmp, alpha, b_norm, sig) \
+        shared(itr, N)
     {
-        workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
-    }
+        b_norm = Norm( b, N );
 
-    sig = Dot( workspace->r, workspace->d, N );
-    sig0 = sig;
+        Sparse_MatVec( H, x, workspace->q );
+        Vector_Sum( workspace->r , 1.0,  b, -1.0, workspace->q, N );
 
-    for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / b_norm > tol; ++i )
-    {
-        Sparse_MatVec( H, workspace->d, workspace->q );
+        apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre );
 
         sig = Dot( workspace->r, workspace->d, N );
-        tmp = Dot( workspace->d, workspace->q, N );
-        alpha = sig / tmp;
 
-        Vector_Add( x, alpha, workspace->d, N );
-        Vector_Add( workspace->r, -alpha, workspace->q, N );
-        for ( j = 0; j < N; ++j )
+        for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / b_norm > tol; ++i )
         {
-            workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j];
+            Sparse_MatVec( H, workspace->d, workspace->q );
+
+            sig = Dot( workspace->r, workspace->d, N );
+
+            /* ensure each thread gets a local copy of
+             * the function return value
+             * (which is stored as global inside the function)
+             * before proceeding */
+            #pragma omp barrier
+
+            tmp = Dot( workspace->d, workspace->q, N );
+            alpha = sig / tmp;
+
+            Vector_Add( x, alpha, workspace->d, N );
+            Vector_Add( workspace->r, -alpha, workspace->q, N );
+
+            apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE );
         }
 
-        //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n",
-        //Norm(workspace->d,N), Norm(workspace->q,N), tmp );
+        #pragma omp single
+        itr = i;
     }
 
-    fprintf( stderr, "SDM took %d iterations\n", i );
-
-    if ( i >= 300 )
+    if ( itr >= control->cm_solver_max_iters  )
     {
-        fprintf( stderr, "SDM convergence failed!\n" );
-        return i;
+        fprintf( stderr, "[WARNING] SDM convergence failed (%d iters)\n", itr );
+        return itr;
     }
 
-    return i;
+    return itr;
 }
 
 
diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h
index f62742a2..a3148514 100644
--- a/sPuReMD/src/lin_alg.h
+++ b/sPuReMD/src/lin_alg.h
@@ -63,7 +63,8 @@ int CG( const static_storage * const, const control_params * const,
         real * const, const int );
 
 int SDM( const static_storage * const, const control_params * const,
-        const sparse_matrix * const, const real * const, const real, real * const );
+        const sparse_matrix * const, const real * const, const real,
+        real * const, const int );
 
 real condest( const sparse_matrix * const, const sparse_matrix * const );
 
diff --git a/sPuReMD/src/vector.c b/sPuReMD/src/vector.c
index 24e05de8..5329aea7 100644
--- a/sPuReMD/src/vector.c
+++ b/sPuReMD/src/vector.c
@@ -32,13 +32,11 @@ inline int Vector_isZero( const real * const v, const unsigned int k )
 {
     unsigned int i;
 
-    #pragma omp master
+    #pragma omp single
     {
         ret = TRUE;
     }
 
-    #pragma omp barrier
-
     #pragma omp for reduction(&&: ret) schedule(static)
     for ( i = 0; i < k; ++i )
     {
@@ -134,14 +132,11 @@ inline real Dot( const real * const v1, const real * const v2, const unsigned in
 {
     unsigned int i;
 
-    #pragma omp master
+    #pragma omp single
     {
         ret2 = ZERO;
     }
 
-    #pragma omp barrier
-
-
     #pragma omp for reduction(+: ret2) schedule(static)
     for ( i = 0; i < k; ++i )
     {
@@ -156,13 +151,11 @@ inline real Norm( const real * const v1, const unsigned int k )
 {
     unsigned int i;
 
-    #pragma omp master
+    #pragma omp single
     {
         ret2 = ZERO;
     }
 
-    #pragma omp barrier
-
     #pragma omp for reduction(+: ret2) schedule(static)
     for ( i = 0; i < k; ++i )
     {
-- 
GitLab