From 67b2c00d0afe93e5af3f6ecb251f0a9663cd5418 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnku@msu.edu>
Date: Thu, 1 Sep 2016 20:40:00 -0700
Subject: [PATCH] sPuReMD: parameterize trisolve via level sched for better
 memory usage.

---
 sPuReMD/src/GMRES.c   | 27 +++++++++++++++++++--------
 sPuReMD/src/forces.c  | 11 ++++++++++-
 sPuReMD/src/mytypes.h |  7 ++++---
 sPuReMD/src/testmd.c  |  2 ++
 4 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/sPuReMD/src/GMRES.c b/sPuReMD/src/GMRES.c
index ddbd20a0..2cb7c042 100644
--- a/sPuReMD/src/GMRES.c
+++ b/sPuReMD/src/GMRES.c
@@ -306,9 +306,9 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real *
 
     if ( row_levels == NULL || level_rows == NULL || level_rows_cnt == NULL )
     {
-        if ( (row_levels = (unsigned int*) malloc(LU->n * sizeof(unsigned int))) == NULL
-                || (level_rows = (unsigned int*) malloc(LU->n * LU->n * sizeof(unsigned int))) == NULL
-                || (level_rows_cnt = (unsigned int*) malloc(LU->n * sizeof(unsigned int))) == NULL )
+        if ( (row_levels = (unsigned int*) malloc((size_t)LU->n * sizeof(unsigned int))) == NULL
+                || (level_rows = (unsigned int*) malloc(MAX_ROWS_PER_LEVEL * (size_t)LU->n * sizeof(unsigned int))) == NULL
+                || (level_rows_cnt = (unsigned int*) malloc((size_t)LU->n * sizeof(unsigned int))) == NULL )
         {
             fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" );
             exit( INSUFFICIENT_MEMORY );
@@ -333,8 +333,14 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real *
 
                 levels = MAX( levels, local_level + 1 );
                 row_levels[i] = local_level;
-                level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i;
+                level_rows[local_level * MAX_ROWS_PER_LEVEL + level_rows_cnt[local_level]] = i;
                 ++level_rows_cnt[local_level];
+                if( level_rows_cnt[local_level] >= MAX_ROWS_PER_LEVEL )
+                {
+                    fprintf( stderr, "Not enough space for triangular solve via level scheduling" );
+                    fprintf( stderr, " (MAX_ROWS_PER_LEVEL). Terminating...\n" );
+                    exit( INSUFFICIENT_MEMORY );
+                }
             }
         }
         else
@@ -349,8 +355,14 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real *
 
                 levels = MAX( levels, local_level + 1 );
                 row_levels[i] = local_level;
-                level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i;
+                level_rows[local_level * MAX_ROWS_PER_LEVEL + level_rows_cnt[local_level]] = i;
                 ++level_rows_cnt[local_level];
+                if( level_rows_cnt[local_level] >= MAX_ROWS_PER_LEVEL )
+                {
+                    fprintf( stderr, "Not enough space for triangular solve via level scheduling" );
+                    fprintf( stderr, " (MAX_ROWS_PER_LEVEL). Terminating...\n" );
+                    exit( INSUFFICIENT_MEMORY );
+                }
             }
         }
     }
@@ -364,7 +376,7 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real *
             default(none) private(j, pj, local_row) shared(stderr, i, level_rows_cnt, level_rows)
             for ( j = 0; j < level_rows_cnt[i]; ++j )
             {
-                local_row = level_rows[i * LU->n + j];
+                local_row = level_rows[i * MAX_ROWS_PER_LEVEL + j];
                 x[local_row] = y[local_row];
                 for ( pj = LU->start[local_row]; pj < LU->start[local_row + 1] - 1; ++pj )
                 {
@@ -383,7 +395,7 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real *
             default(none) private(j, pj, local_row) shared(i, level_rows_cnt, level_rows)
             for ( j = 0; j < level_rows_cnt[i]; ++j )
             {
-                local_row = level_rows[i * LU->n + j];
+                local_row = level_rows[i * MAX_ROWS_PER_LEVEL + j];
                 x[local_row] = y[local_row];
                 for ( pj = LU->start[local_row] + 1; pj < LU->start[local_row + 1]; ++pj )
                 {
@@ -787,7 +799,6 @@ int GMRES( const static_storage * const workspace, const control_params * const
                 }
             }
 
-
             workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N );
             Vector_Scale( workspace->v[j + 1],
                           1. / workspace->h[j + 1][j], workspace->v[j + 1], N );
diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c
index 2746ede2..e6e63707 100644
--- a/sPuReMD/src/forces.c
+++ b/sPuReMD/src/forces.c
@@ -133,10 +133,14 @@ void Compute_NonBonded_Forces( reax_system *system, control_params *control,
 #endif
 
     if ( control->tabulate == 0)
+    {
         vdW_Coulomb_Energy( system, control, data, workspace, lists, out_control );
+    }
     else
+    {
         Tabulated_vdW_Coulomb_Energy( system, control, data, workspace,
                                       lists, out_control );
+    }
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "nonb forces - " );
 #endif
@@ -933,8 +937,13 @@ void Compute_Forces( reax_system *system, control_params *control,
 
     t_start = Get_Time( );
     if ( !control->tabulate )
+    {
         Init_Forces( system, control, data, workspace, lists, out_control );
-    else Init_Forces_Tab( system, control, data, workspace, lists, out_control );
+    }
+    else
+    {
+        Init_Forces_Tab( system, control, data, workspace, lists, out_control );
+    }
     t_elapsed = Get_Timing_Info( t_start );
     data->timing.init_forces += t_elapsed;
 #if defined(DEBUG_FOCUS)
diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h
index 3202c622..ff89296f 100644
--- a/sPuReMD/src/mytypes.h
+++ b/sPuReMD/src/mytypes.h
@@ -115,6 +115,7 @@
 
 #define MAX_ITR             10
 #define RESTART             50
+#define MAX_ROWS_PER_LEVEL  10000 /* triangular solve using level scheduling */
 
 #define ZERO           0.000000000000000e+00
 #define ALMOST_ZERO    1e-10
@@ -725,9 +726,9 @@ typedef struct
  * */
 typedef struct
 {
-    int n, m;
-    int *start;
-    int *j;
+    unsigned int n, m;
+    unsigned int *start;
+    unsigned int *j;
     real *val;
 } sparse_matrix;
 
diff --git a/sPuReMD/src/testmd.c b/sPuReMD/src/testmd.c
index 5328a5bc..c4c66453 100644
--- a/sPuReMD/src/testmd.c
+++ b/sPuReMD/src/testmd.c
@@ -191,7 +191,9 @@ int main(int argc, char* argv[])
     for ( ; data.step <= control.nsteps; data.step++ )
     {
         if ( control.T_mode )
+        {
             Temperature_Control( &control, &data, &out_control );
+        }
         Evolve( &system, &control, &data, &workspace, &lists, &out_control );
         Post_Evolve( &system, &control, &data, &workspace, &lists, &out_control );
         Output_Results(&system, &control, &data, &workspace, &lists, &out_control);
-- 
GitLab