From 9290ebc5e21222119f7a4291eca4d6f774d87268 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnku@msu.edu>
Date: Wed, 19 Dec 2018 12:16:24 -0800
Subject: [PATCH] PuReMD-old: fix timing issues. Remove fine-grained timing of
 Init_Forces due to high overhead. Disable SAI preconditioner optimization of
 changing far nbr list and matrix formats based on preconditioner refactoring
 step (not working). Tweak to not product numeric overflow when setting up SAI
 preconditioner (pivot sampling).

---
 PuReMD/src/forces.c         | 46 +++++----------------
 PuReMD/src/integrate.c      | 80 ++++++++++++++++++-------------------
 PuReMD/src/linear_solvers.c |  2 +-
 3 files changed, 52 insertions(+), 76 deletions(-)

diff --git a/PuReMD/src/forces.c b/PuReMD/src/forces.c
index e8693c09..2eb3a19e 100644
--- a/PuReMD/src/forces.c
+++ b/PuReMD/src/forces.c
@@ -353,9 +353,6 @@ void Init_Forces( reax_system *system, control_params *control,
     int total_sum[6];
     int nt_flag;
 #endif
-    real t_start, t_cm_init, total_cm_init;
-
-    t_cm_init = 0;
 
     far_nbrs = lists[FAR_NBRS];
     bonds = lists[BONDS];
@@ -379,7 +376,6 @@ void Init_Forces( reax_system *system, control_params *control,
     renbr = (data->step - data->prev_steps) % control->reneighbor == 0;
 
 #if defined(NEUTRAL_TERRITORY)
-    t_start = MPI_Wtime();
     nt_flag = 1;
     if( renbr )
     {
@@ -421,7 +417,6 @@ void Init_Forces( reax_system *system, control_params *control,
 
     mark[0] = mark[1] = 1;
     mark[2] = mark[3] = mark[4] = mark[5] = 2;
-    t_cm_init += MPI_Wtime() - t_start;
 #endif
 
     for ( i = 0; i < system->N; ++i )
@@ -466,12 +461,10 @@ void Init_Forces( reax_system *system, control_params *control,
         ihb_top = -1;
         if ( local == 1 )
         {
-            t_start = MPI_Wtime();
             H->start[i] = Htop;
             H->entries[Htop].j = i;
             H->entries[Htop].val = sbp_i->eta;
             ++Htop;
-            t_cm_init += MPI_Wtime() - t_start;
 
             if ( control->hbond_cut > 0 )
             {
@@ -534,7 +527,6 @@ void Init_Forces( reax_system *system, control_params *control,
 
                 if ( local == 1 )
                 {
-                    t_start = MPI_Wtime();
                     /* H matrix entry */
 #if defined(NEUTRAL_TERRITORY)
                     if ( atom_j->nt_dir > 0 || (j < system->n
@@ -580,7 +572,6 @@ void Init_Forces( reax_system *system, control_params *control,
                         ++Htop;
                     }
 #endif
-                    t_cm_init += MPI_Wtime() - t_start;
 
                     /* hydrogen bond lists */
                     if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) &&
@@ -612,7 +603,6 @@ void Init_Forces( reax_system *system, control_params *control,
 #if defined(NEUTRAL_TERRITORY)
                 else if ( local == 2 )
                 {
-                    t_start = MPI_Wtime();
                     /* H matrix entry */
                     if( ( atom_j->nt_dir != -1 && mark[atom_i->nt_dir] != mark[atom_j->nt_dir] 
                                 && ( H->format == SYM_FULL_MATRIX
@@ -645,7 +635,6 @@ void Init_Forces( reax_system *system, control_params *control,
 
                         ++Htop;
                     }
-                    t_cm_init += MPI_Wtime() - t_start;
                 }
 #endif
 
@@ -670,7 +659,6 @@ void Init_Forces( reax_system *system, control_params *control,
         }
 
         Set_End_Index( i, btop_i, bonds );
-        t_start = MPI_Wtime();
         if ( local == 1 )
         {
             H->end[i] = Htop;
@@ -691,7 +679,6 @@ void Init_Forces( reax_system *system, control_params *control,
             }
         }
 #endif
-        t_cm_init += MPI_Wtime() - t_start;
     }
 
     if ( far_nbrs->format == FULL_LIST )
@@ -725,12 +712,6 @@ void Init_Forces( reax_system *system, control_params *control,
         }
     }
 
-    MPI_Reduce(&t_cm_init, &total_cm_init, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world);
-    if( system->my_rank == MASTER_NODE )
-    {
-        data->timing.init_qeq += total_cm_init / control->nprocs;
-    }
-
 #if defined(DEBUG)
     Print_Sparse_Matrix2( system, H, NULL );
 #endif
@@ -1211,7 +1192,7 @@ void Compute_Forces( reax_system *system, control_params *control,
     MPI_Comm comm;
     int qeq_flag;
 #if defined(LOG_PERFORMANCE)
-    real t_start = 0, t_elapsed;
+    real t_start, t_end;
 
     //MPI_Barrier( mpi_data->world );
     if ( system->my_rank == MASTER_NODE )
@@ -1237,14 +1218,12 @@ void Compute_Forces( reax_system *system, control_params *control,
     //MPI_Barrier( mpi_data->world );
     if ( system->my_rank == MASTER_NODE )
     {
-        //Update_Timing_Info( &t_start, &(data->timing.init_forces) );
-        real t_end = MPI_Wtime();
+        t_end = MPI_Wtime();
         data->timing.init_forces += t_end - t_start;
         t_start = t_end;
     }
 #endif
 
-
     /********* bonded interactions ************/
     Compute_Bonded_Forces( system, control, data, workspace,
                            lists, out_control, mpi_data->world );
@@ -1253,7 +1232,9 @@ void Compute_Forces( reax_system *system, control_params *control,
     //MPI_Barrier( mpi_data->world );
     if ( system->my_rank == MASTER_NODE )
     {
-        t_start = MPI_Wtime();
+        t_end = MPI_Wtime();
+        data->timing.bonded += t_end - t_start;
+        t_start = t_end;
     }
 #endif
 #if defined(DEBUG_FOCUS)
@@ -1262,7 +1243,6 @@ void Compute_Forces( reax_system *system, control_params *control,
     MPI_Barrier( mpi_data->world );
 #endif
 
-
     /**************** qeq ************************/
 #if defined(PURE_REAX)
     if ( qeq_flag )
@@ -1272,8 +1252,9 @@ void Compute_Forces( reax_system *system, control_params *control,
     //MPI_Barrier( mpi_data->world );
     if ( system->my_rank == MASTER_NODE )
     {
-        t_elapsed = MPI_Wtime() - t_start;
-        data->timing.cm += t_elapsed;
+        t_end = MPI_Wtime();
+        data->timing.cm += t_end - t_start;
+        t_start = t_end;
     }
 #endif
 #if defined(DEBUG_FOCUS)
@@ -1282,7 +1263,6 @@ void Compute_Forces( reax_system *system, control_params *control,
 #endif
 #endif //PURE_REAX
 
-
     /********* nonbonded interactions ************/
     Compute_NonBonded_Forces( system, control, data, workspace,
                               lists, out_control, mpi_data->world );
@@ -1291,9 +1271,8 @@ void Compute_Forces( reax_system *system, control_params *control,
     //MPI_Barrier( mpi_data->world );
     if ( system->my_rank == MASTER_NODE )
     {
-        //Update_Timing_Info( &t_start, &(data->timing.nonb) );
-        real t_end = MPI_Wtime();
-        data->timing.nonb += t_end - t_start;
+        t_end = MPI_Wtime();
+        data->timing.nonb += t_end - t_start + data->timing.cm;;
         t_start = t_end;
     }
 #endif
@@ -1303,7 +1282,6 @@ void Compute_Forces( reax_system *system, control_params *control,
     MPI_Barrier( mpi_data->world );
 #endif
 
-
     /*********** total force ***************/
     Compute_Total_Force( system, control, data, workspace, lists, mpi_data );
 
@@ -1311,10 +1289,8 @@ void Compute_Forces( reax_system *system, control_params *control,
     //MPI_Barrier( mpi_data->world );
     if ( system->my_rank == MASTER_NODE )
     {
-        //Update_Timing_Info( &t_start, &(data->timing.bonded) );
-        real t_end = MPI_Wtime();
+        t_end = MPI_Wtime();
         data->timing.bonded += t_end - t_start;
-        t_start = t_end;
     }
 #endif
 #if defined(DEBUG_FOCUS)
diff --git a/PuReMD/src/integrate.c b/PuReMD/src/integrate.c
index c3b2719c..6b9d6b5a 100644
--- a/PuReMD/src/integrate.c
+++ b/PuReMD/src/integrate.c
@@ -56,16 +56,16 @@ void Velocity_Verlet_NVE( reax_system* system, control_params* control,
         /* HACK: currently required that preconditioner (re)computation step
          * and reneighbor step (i.e., (re)construct far nbr list)
          * are the same value, so use reneighbor for now */
-        if ( renbr )
-        {
-            lists[FAR_NBRS]->format = FULL_LIST;
-            workspace->H->format = SYM_FULL_MATRIX;
-        }
-        else
-        {
-            lists[FAR_NBRS]->format = HALF_LIST;
-            workspace->H->format = SYM_HALF_MATRIX;
-        }
+//        if ( renbr )
+//        {
+//            lists[FAR_NBRS]->format = FULL_LIST;
+//            workspace->H->format = SYM_FULL_MATRIX;
+//        }
+//        else
+//        {
+//            lists[FAR_NBRS]->format = HALF_LIST;
+//            workspace->H->format = SYM_HALF_MATRIX;
+//        }
     }
 
     for ( i = 0; i < system->n; i++ )
@@ -135,16 +135,16 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system* system,
         /* HACK: currently required that preconditioner (re)computation step
          * and reneighbor step (i.e., (re)construct far nbr list)
          * are the same value, so use reneighbor for now */
-        if ( renbr )
-        {
-            lists[FAR_NBRS]->format = FULL_LIST;
-            workspace->H->format = SYM_FULL_MATRIX;
-        }
-        else
-        {
-            lists[FAR_NBRS]->format = HALF_LIST;
-            workspace->H->format = SYM_HALF_MATRIX;
-        }
+//        if ( renbr )
+//        {
+//            lists[FAR_NBRS]->format = FULL_LIST;
+//            workspace->H->format = SYM_FULL_MATRIX;
+//        }
+//        else
+//        {
+//            lists[FAR_NBRS]->format = HALF_LIST;
+//            workspace->H->format = SYM_HALF_MATRIX;
+//        }
     }
 
     for ( i = 0; i < system->n; i++ )
@@ -246,16 +246,16 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system,
         /* HACK: currently required that preconditioner (re)computation step
          * and reneighbor step (i.e., (re)construct far nbr list)
          * are the same value, so use reneighbor for now */
-        if ( renbr )
-        {
-            lists[FAR_NBRS]->format = FULL_LIST;
-            workspace->H->format = SYM_FULL_MATRIX;
-        }
-        else
-        {
-            lists[FAR_NBRS]->format = HALF_LIST;
-            workspace->H->format = SYM_HALF_MATRIX;
-        }
+//        if ( renbr )
+//        {
+//            lists[FAR_NBRS]->format = FULL_LIST;
+//            workspace->H->format = SYM_FULL_MATRIX;
+//        }
+//        else
+//        {
+//            lists[FAR_NBRS]->format = HALF_LIST;
+//            workspace->H->format = SYM_HALF_MATRIX;
+//        }
     }
 
     /* velocity verlet, 1st part */
@@ -353,16 +353,16 @@ void Velocity_Verlet_Berendsen_NPT( reax_system* system,
         /* HACK: currently required that preconditioner (re)computation step
          * and reneighbor step (i.e., (re)construct far nbr list)
          * are the same value, so use reneighbor for now */
-        if ( renbr )
-        {
-            lists[FAR_NBRS]->format = FULL_LIST;
-            workspace->H->format = SYM_FULL_MATRIX;
-        }
-        else
-        {
-            lists[FAR_NBRS]->format = HALF_LIST;
-            workspace->H->format = SYM_HALF_MATRIX;
-        }
+//        if ( renbr )
+//        {
+//            lists[FAR_NBRS]->format = FULL_LIST;
+//            workspace->H->format = SYM_FULL_MATRIX;
+//        }
+//        else
+//        {
+//            lists[FAR_NBRS]->format = HALF_LIST;
+//            workspace->H->format = SYM_HALF_MATRIX;
+//        }
     }
 
     /* velocity verlet, 1st part */
diff --git a/PuReMD/src/linear_solvers.c b/PuReMD/src/linear_solvers.c
index 4a7b258e..395c6934 100644
--- a/PuReMD/src/linear_solvers.c
+++ b/PuReMD/src/linear_solvers.c
@@ -188,7 +188,7 @@ real setup_sparse_approx_inverse( reax_system *system, simulation_data *data, st
     {
         n_local += A->end[i] - A->start[i];
     }
-    s_local = (int) (12.0 * log2(n_local * nprocs));
+    s_local = (int) (12.0 * (log2(n_local) + log2(nprocs)));
     
     t_start = MPI_Wtime();
     MPI_Allreduce( &n_local, &n, 1, MPI_INT, MPI_SUM, comm );
-- 
GitLab