From 4c09c5f15211d18887859c78595236eb1d6d5b84 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnku@cse.msu.edu>
Date: Tue, 12 Apr 2016 23:11:37 -0400
Subject: [PATCH] sPuReMD: begin adding OpenMP support to SpMV, Jacobi iter,
 and ICHOL_PAR.

---
 puremd_rc_1003/sPuReMD/GMRES.c  | 109 ++++++++++++++++++++++++++------
 puremd_rc_1003/sPuReMD/Makefile |   2 +-
 puremd_rc_1003/sPuReMD/QEq.c    |  11 ++--
 3 files changed, 96 insertions(+), 26 deletions(-)
 mode change 100755 => 100644 puremd_rc_1003/sPuReMD/Makefile

diff --git a/puremd_rc_1003/sPuReMD/GMRES.c b/puremd_rc_1003/sPuReMD/GMRES.c
index bbc9fc0e..0bf02708 100644
--- a/puremd_rc_1003/sPuReMD/GMRES.c
+++ b/puremd_rc_1003/sPuReMD/GMRES.c
@@ -35,25 +35,60 @@ static void Sparse_MatVec( const sparse_matrix * const A,
 {
     int i, j, k, n, si, ei;
     real H;
+#ifdef _OPENMP
+    real *b_local;
+#endif
 
     n = A->n;
     Vector_MakeZero( b, n );
 
-    for ( i = 0; i < n; ++i )
+    #pragma omp parallel \
+    default(none) shared(n) private(b_local, si, ei, H, i, j, k)
     {
-        si = A->start[i];
-        ei = A->start[i + 1] - 1;
+#ifdef _OPENMP
+        if ( (b_local = (real*) calloc(n, sizeof(real))) == NULL )
+        {
+            exit( INSUFFICIENT_SPACE );
+        }
+#endif
+        #pragma omp for schedule(guided)
+        for ( i = 0; i < n; ++i )
+        {
+            si = A->start[i];
+            ei = A->start[i + 1] - 1;
 
-        for ( k = si; k < ei; ++k )
+            for ( k = si; k < ei; ++k )
+            {
+                j = A->entries[k].j;
+                H = A->entries[k].val;
+#ifdef _OPENMP
+                b_local[j] += H * x[i];
+                b_local[i] += H * x[j];
+#else
+                b[j] += H * x[i];
+                b[i] += H * x[j];
+#endif
+            }
+
+            // the diagonal entry is the last one in
+#ifdef _OPENMP
+            b_local[i] += A->entries[k].val * x[i];
+#else
+            b[i] += A->entries[k].val * x[i];
+#endif
+        }
+
+#ifdef _OPENMP
+        #pragma omp critical(redux)
         {
-            j = A->entries[k].j;
-            H = A->entries[k].val;
-            b[j] += H * x[i];
-            b[i] += H * x[j];
+            for ( i = 0; i < n; ++i )
+            {
+                b[i] += b_local[i];
+            }
         }
 
-        // the diagonal entry is the last one in
-        b[i] += A->entries[k].val * x[i];
+        free(b_local);
+#endif
     }
 }
 
@@ -70,28 +105,60 @@ static void Sparse_MatVec2( const sparse_matrix * const R,
                             const real * const x, real * const b)
 {
     int i, k, si = 0, ei = 0;
+#ifdef _OPENMP
+    real *b_local;
+#endif
 
     Vector_MakeZero( b, R->n );
 
-    for ( i = 0; i < R->n; ++i )
+    #pragma omp parallel default(none) private(b_local, i, k) firstprivate(si, ei)
     {
-        if (tri == LOWER)
+#ifdef _OPENMP
+        if ( (b_local = (real*) calloc(R->n, sizeof(real))) == NULL )
         {
-            si = R->start[i];
-            ei = R->start[i + 1] - 1;
+            exit( INSUFFICIENT_SPACE );
         }
-        else if (tri == UPPER)
+#endif
+        #pragma omp for schedule(guided)
+        for ( i = 0; i < R->n; ++i )
         {
+            if (tri == LOWER)
+            {
+                si = R->start[i];
+                ei = R->start[i + 1] - 1;
+            }
+            else if (tri == UPPER)
+            {
 
-            si = R->start[i] + 1;
-            ei = R->start[i + 1];
-        }
+                si = R->start[i] + 1;
+                ei = R->start[i + 1];
+            }
 
-        for ( k = si; k < ei; ++k )
+            for ( k = si; k < ei; ++k )
+            {
+#ifdef _OPENMP
+                b_local[i] += R->entries[k].val * x[R->entries[k].j];
+#else
+                b[i] += R->entries[k].val * x[R->entries[k].j];
+#endif
+            }
+#ifdef _OPENMP
+            b_local[i] *= -Dinv[i];
+#else
+            b[i] *= -Dinv[i];
+#endif
+        }
+#ifdef _OPENMP
+        #pragma omp critical(redux)
         {
-            b[i] += R->entries[k].val * x[R->entries[k].j];
+            for ( i = 0; i < R->n; ++i )
+            {
+                b[i] += b_local[i];
+            }
         }
-        b[i] *= -Dinv[i];
+
+        free(b_local);
+#endif
     }
 }
 
diff --git a/puremd_rc_1003/sPuReMD/Makefile b/puremd_rc_1003/sPuReMD/Makefile
old mode 100755
new mode 100644
index afb0d13a..a403e107
--- a/puremd_rc_1003/sPuReMD/Makefile
+++ b/puremd_rc_1003/sPuReMD/Makefile
@@ -1,7 +1,7 @@
 CC      = gcc 
 
 LIBS    = -lm -lz
-CFLAGS  = -I.  -Wall -O3 -funroll-loops -fstrict-aliasing
+CFLAGS  = -I.  -Wall -O3 -funroll-loops -fstrict-aliasing -fopenmp
 #CFLAGS  = -I.  -Wall -pg -gstabs
 #CFLAGS  = -I.  -Wall -funroll-loops -fstrict-aliasing
 #-finline-functions  -finline-limit=15 -g#-DTEST -pg -ldl -rdynamic -g
diff --git a/puremd_rc_1003/sPuReMD/QEq.c b/puremd_rc_1003/sPuReMD/QEq.c
index 25033088..ca8efc67 100644
--- a/puremd_rc_1003/sPuReMD/QEq.c
+++ b/puremd_rc_1003/sPuReMD/QEq.c
@@ -120,7 +120,7 @@ real ICHOLT( sparse_matrix *A, real *droptol,
     struct timeval start, stop;
 
     gettimeofday( &start, NULL );
-
+    
     Utop = (int*) malloc((A->n + 1) * sizeof(int));
 
     // clear variables
@@ -296,6 +296,8 @@ static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     for ( i = 0; i < sweeps; ++i )
     {
         /* for each nonzero */
+        #pragma omp parallel for schedule(guided) \
+            default(none) private(sum, ei_x, ei_y) firstprivate(x, y)
         for ( j = 0; j < A->start[A->n]; ++j )
         {
             sum = ZERO;
@@ -461,10 +463,11 @@ void Init_MatVec( reax_system *system, control_params *control,
         }
 
         data->timing.pre_comp += ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U );
+//        data->timing.pre_comp += ICHOLT( workspace->H_sp, workspace->droptol, workspace->L, workspace->U );
         // TODO: add parameters for sweeps to control file
 //        ICHOL_PAR( workspace->H, 1, workspace->L, workspace->U );
 
-        fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) );
+//        fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) );
 
 #if defined(DEBUG_FOCUS)
         sprintf( fname, "%s.L%d.out", control->sim_name, data->step );
@@ -548,8 +551,8 @@ void QEq( reax_system *system, control_params *control, simulation_data *data,
 
     Init_MatVec( system, control, data, workspace, far_nbrs );
 
-    if( data->step == 0 || data->step == 100 )
-      Print_Linear_System( system, control, workspace, data->step );
+//    if( data->step == 0 || data->step == 100 )
+//      Print_Linear_System( system, control, workspace, data->step );
 
     //TODO: add parameters in control file for solver choice and options
 //    matvecs = GMRES( workspace, workspace->H,
-- 
GitLab