From a869e5a13429355163c0226736a7eda35e7e793b Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnk@msu.edu>
Date: Thu, 24 May 2018 10:26:24 -0400
Subject: [PATCH] PG-PuReMD: fix charge extrapolation. Improve memory
 management of local portion of charge matrix and reset linear system size for
 each time step.

---
 PG-PuReMD/src/allocate.c       |  27 +++++---
 PG-PuReMD/src/allocate.h       |   2 +-
 PG-PuReMD/src/charges.c        | 121 ++++++++++++++++++++-------------
 PG-PuReMD/src/forces.c         |  20 ++++--
 PG-PuReMD/src/init_md.c        |  18 ++---
 PG-PuReMD/src/io_tools.c       |  24 +++----
 PG-PuReMD/src/lin_alg.c        |  26 +++++--
 PG-PuReMD/src/nonbonded.c      |   7 +-
 PG-PuReMD/src/reax_types.h     |  12 ++--
 PG-PuReMD/src/reset_tools.c    |   5 ++
 PG-PuReMD/src/torsion_angles.c |  49 ++++---------
 11 files changed, 178 insertions(+), 133 deletions(-)

diff --git a/PG-PuReMD/src/allocate.c b/PG-PuReMD/src/allocate.c
index be5c2793..3be198f2 100644
--- a/PG-PuReMD/src/allocate.c
+++ b/PG-PuReMD/src/allocate.c
@@ -106,9 +106,9 @@ void ReAllocate_System( reax_system *system, int local_cap, int total_cap )
     system->max_hbonds = srealloc( system->max_hbonds, sizeof(int) * total_cap,
             "ReAllocate_System::system->max_hbonds" );
 
-    system->cm_entries = srealloc( system->cm_entries, sizeof(int) * total_cap,
+    system->cm_entries = srealloc( system->cm_entries, sizeof(int) * local_cap,
             "ReAllocate_System::system->cm_entries" );
-    system->max_cm_entries = srealloc( system->max_cm_entries, sizeof(int) * total_cap,
+    system->max_cm_entries = srealloc( system->max_cm_entries, sizeof(int) * local_cap,
             "ReAllocate_System::system->max_cm_entries" );
 }
 
@@ -287,13 +287,13 @@ void Allocate_Workspace( reax_system *system, control_params *control,
     switch ( control->charge_method )
     {
         case QEQ_CM:
-            system->N_cm = system->N;
+            system->n_cm = system->N;
             break;
         case EE_CM:
-            system->N_cm = system->N + 1;
+            system->n_cm = system->N + 1;
             break;
         case ACKS2_CM:
-            system->N_cm = 2 * system->N + 2;
+            system->n_cm = 2 * system->N + 2;
             break;
         default:
             fprintf( stderr, "[ERROR] Unknown charge method type. Terminating...\n" );
@@ -457,29 +457,34 @@ void Reallocate_Neighbor_List( reax_list *far_nbr_list, int n, int max_intrs )
 }
 
 
-void Allocate_Matrix( sparse_matrix *H, int n, int m )
+void Allocate_Matrix( sparse_matrix *H, int n, int n_max, int m )
 {
     H->n = n;
+    H->n_max = n_max;
     H->m = m;
 
-    H->start = smalloc( sizeof(int) * n, "Allocate_Matrix::start" );
-    H->end = smalloc( sizeof(int) * n, "Allocate_Matrix::end" );
+    H->start = smalloc( sizeof(int) * n_max, "Allocate_Matrix::start" );
+    H->end = smalloc( sizeof(int) * n_max, "Allocate_Matrix::end" );
     H->entries = smalloc( sizeof(sparse_matrix_entry) * m, "Allocate_Matrix::entries" );
 }
 
 
 void Deallocate_Matrix( sparse_matrix *H )
 {
+    H->n = 0;
+    H->n_max = 0;
+    H->m = 0;
+
     sfree( H->start, "Deallocate_Matrix::start" );
     sfree( H->end, "Deallocate_Matrix::end" );
     sfree( H->entries, "Deallocate_Matrix::entries" );
 }
 
 
-static void Reallocate_Matrix( sparse_matrix *H, int n, int m )
+static void Reallocate_Matrix( sparse_matrix *H, int n, int n_max, int m )
 {
     Deallocate_Matrix( H );
-    Allocate_Matrix( H, n, m );
+    Allocate_Matrix( H, n, n_max, m );
 }
 
 
@@ -813,7 +818,7 @@ void ReAllocate( reax_system *system, control_params *control,
                 (1024 * 1024)) );
 #endif
 
-        Reallocate_Matrix( H, system->local_cap, system->total_cm_entries );
+        Reallocate_Matrix( H, system->n, system->local_cap, system->total_cm_entries );
         Init_Matrix_Row_Indices( H, system->max_cm_entries );
         realloc->cm = FALSE;
     }
diff --git a/PG-PuReMD/src/allocate.h b/PG-PuReMD/src/allocate.h
index e7182e81..db9b865d 100644
--- a/PG-PuReMD/src/allocate.h
+++ b/PG-PuReMD/src/allocate.h
@@ -44,7 +44,7 @@ void Deallocate_Grid( grid* );
 
 void Allocate_MPI_Buffers( mpi_datatypes*, int, neighbor_proc* );
 
-void Allocate_Matrix( sparse_matrix*, int, int );
+void Allocate_Matrix( sparse_matrix*, int, int, int );
 
 void Deallocate_Matrix( sparse_matrix * );
 
diff --git a/PG-PuReMD/src/charges.c b/PG-PuReMD/src/charges.c
index 7632a957..7e809032 100644
--- a/PG-PuReMD/src/charges.c
+++ b/PG-PuReMD/src/charges.c
@@ -56,6 +56,24 @@ static void Init_Linear_Solver( reax_system *system, simulation_data *data,
     int i;
     reax_atom *atom;
 
+    /* reset size of locally owned portion of charge matrix */
+    switch ( control->charge_method )
+    {
+        case QEQ_CM:
+            system->n_cm = system->N;
+            break;
+        case EE_CM:
+            system->n_cm = system->N + 1;
+            break;
+        case ACKS2_CM:
+            system->n_cm = 2 * system->N + 2;
+            break;
+        default:
+            fprintf( stderr, "[ERROR] Unknown charge method type. Terminating...\n" );
+            exit( INVALID_INPUT );
+            break;
+    }
+
     /* initialize solution vectors for linear solves in charge method */
     switch ( control->charge_method )
     {
@@ -106,7 +124,7 @@ static void Init_Linear_Solver( reax_system *system, simulation_data *data,
                 workspace->b[system->n][0] = control->cm_q_net;
             }
 
-            for ( i = system->n + 1; i < system->N_cm; ++i )
+            for ( i = system->n + 1; i < system->n_cm; ++i )
             {
                 atom = &system->my_atoms[i];
 
@@ -140,11 +158,7 @@ static void Extrapolate_Charges_QEq( const reax_system * const system,
 
     /* spline extrapolation for s & t */
     //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer
-#ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
-        default(none) private(i, s_tmp, t_tmp)
-#endif
-    for ( i = 0; i < system->N_cm; ++i )
+    for ( i = 0; i < system->n_cm; ++i )
     {
         /* no extrapolation, previous solution as initial guess */
         if ( control->cm_init_guess_extrap1 == 0 )
@@ -167,17 +181,17 @@ static void Extrapolate_Charges_QEq( const reax_system * const system,
             s_tmp = 4.0 * (system->my_atoms[i].s[0] + system->my_atoms[i].s[2]) -
                     (6.0 * system->my_atoms[i].s[1] + system->my_atoms[i].s[3]);
         }
-        /* 4th order */
-        else if ( control->cm_init_guess_extrap1 == 4 )
-        {
-            s_tmp = 5.0 * (system->my_atoms[i].s[0] - system->my_atoms[i].s[3]) +
-                10.0 * (-1.0 * system->my_atoms[i].s[1] + system->my_atoms[i].s[2]) + system->my_atoms[i].s[4];
-        }
         else
         {
             s_tmp = 0.0;
         }
 
+        /* x is used as initial guess to solver */
+        workspace->x[i][0] = s_tmp;
+    }
+
+    for ( i = 0; i < system->n_cm; ++i )
+    {
         /* no extrapolation, previous solution as initial guess */
         if ( control->cm_init_guess_extrap2 == 0 )
         {
@@ -199,32 +213,36 @@ static void Extrapolate_Charges_QEq( const reax_system * const system,
             t_tmp = 4.0 * (system->my_atoms[i].t[0] + system->my_atoms[i].t[2]) -
                 (6.0 * system->my_atoms[i].t[1] + system->my_atoms[i].t[3]);
         }
-        /* 4th order */
-        else if ( control->cm_init_guess_extrap2 == 4 )
-        {
-            t_tmp = 5.0 * (system->my_atoms[i].t[0] - system->my_atoms[i].t[3]) +
-                10.0 * (-1.0 * system->my_atoms[i].t[1] + system->my_atoms[i].t[2]) + system->my_atoms[i].t[4];
-        }
         else
         {
             t_tmp = 0.0;
         }
 
-        system->my_atoms[i].s[4] = system->my_atoms[i].s[3];
+        /* x is used as initial guess to solver */
+        workspace->x[i][1] = t_tmp;
+    }
+}
+
+
+static void Extrapolate_Charges_QEq_Part2( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
+{
+    int i;
+
+    /* spline extrapolation for s & t */
+    //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer
+    for ( i = 0; i < system->n_cm; ++i )
+    {
         system->my_atoms[i].s[3] = system->my_atoms[i].s[2];
         system->my_atoms[i].s[2] = system->my_atoms[i].s[1];
         system->my_atoms[i].s[1] = system->my_atoms[i].s[0];
-        system->my_atoms[i].s[0] = s_tmp;
-        /* x is used as initial guess to solver */
-        workspace->x[i][0] = s_tmp;
+        system->my_atoms[i].s[0] = workspace->x[i][0];
 
-        system->my_atoms[i].t[4] = system->my_atoms[i].t[3];
         system->my_atoms[i].t[3] = system->my_atoms[i].t[2];
         system->my_atoms[i].t[2] = system->my_atoms[i].t[1];
         system->my_atoms[i].t[1] = system->my_atoms[i].t[0];
-        system->my_atoms[i].t[0] = t_tmp;
-        /* x is used as initial guess to solver */
-        workspace->x[i][1] = t_tmp;
+        system->my_atoms[i].t[0] = workspace->x[i][1];
     }
 }
 
@@ -238,11 +256,7 @@ static void Extrapolate_Charges_EE( const reax_system * const system,
 
     /* spline extrapolation for s */
     //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer
-#ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
-        default(none) private(i, s_tmp)
-#endif
-    for ( i = 0; i < system->N_cm; ++i )
+    for ( i = 0; i < system->n_cm; ++i )
     {
         /* no extrapolation, previous solution as initial guess */
         if ( control->cm_init_guess_extrap1 == 0 )
@@ -265,24 +279,31 @@ static void Extrapolate_Charges_EE( const reax_system * const system,
             s_tmp = 4.0 * (system->my_atoms[i].s[0] + system->my_atoms[i].s[2]) -
                     (6.0 * system->my_atoms[i].s[1] + system->my_atoms[i].s[3]);
         }
-        /* 4th order */
-        else if ( control->cm_init_guess_extrap1 == 4 )
-        {
-            s_tmp = 5.0 * (system->my_atoms[i].s[0] - system->my_atoms[i].s[3]) +
-                10.0 * (-1.0 * system->my_atoms[i].s[1] + system->my_atoms[i].s[2]) + system->my_atoms[i].s[4];
-        }
         else
         {
             s_tmp = 0.0;
         }
 
+        workspace->x[i][0] = s_tmp;
+    }
+}
+
+
+static void Extrapolate_Charges_EE_Part2( const reax_system * const system,
+        const control_params * const control,
+        simulation_data * const data, storage * const workspace )
+{
+    int i;
+
+    /* spline extrapolation for s */
+    //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer
+    for ( i = 0; i < system->n_cm; ++i )
+    {
         system->my_atoms[i].s[4] = system->my_atoms[i].s[3];
         system->my_atoms[i].s[3] = system->my_atoms[i].s[2];
         system->my_atoms[i].s[2] = system->my_atoms[i].s[1];
         system->my_atoms[i].s[1] = system->my_atoms[i].s[0];
-        system->my_atoms[i].s[0] = s_tmp;
-        /* x is used as initial guess to solver */
-        workspace->x[i][0] = s_tmp;
+        system->my_atoms[i].s[0] = workspace->x[i][0];
     }
 }
 
@@ -435,7 +456,7 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system,
             control->cm_solver_pre_comp_type == ILUT_PAR_PC )
     {
         Hptr->entries[Hptr->start[system->N + 1] - 1].val = 1.0;
-        Hptr->entries[Hptr->start[system->N_cm] - 1].val = 1.0;
+        Hptr->entries[Hptr->start[system->n_cm] - 1].val = 1.0;
     }
     
     switch ( control->cm_solver_pre_comp_type )
@@ -480,7 +501,7 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system,
             control->cm_solver_pre_comp_type == ILUT_PAR_PC )
     {
         Hptr->entries[Hptr->start[system->N + 1] - 1].val = 0.0;
-        Hptr->entries[Hptr->start[system->N_cm] - 1].val = 0.0;
+        Hptr->entries[Hptr->start[system->n_cm] - 1].val = 0.0;
     }
 }
 
@@ -588,7 +609,7 @@ static void Setup_Preconditioner_EE( const reax_system * const system,
         case DIAG_PC:
             if ( workspace->Hdia_inv == NULL )
             {
-//                workspace->Hdia_inv = scalloc( system->N_cm, sizeof( real ),
+//                workspace->Hdia_inv = scalloc( system->n_cm, sizeof( real ),
 //                        "Setup_Preconditioner_QEq::workspace->Hdiv_inv" );
             }
             break;
@@ -655,7 +676,7 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system,
             control->cm_solver_pre_comp_type == ILUT_PAR_PC )
     {
         Hptr->entries[Hptr->start[system->N + 1] - 1].val = 1.0;
-        Hptr->entries[Hptr->start[system->N_cm] - 1].val = 1.0;
+        Hptr->entries[Hptr->start[system->n_cm] - 1].val = 1.0;
     }
 
     switch ( control->cm_solver_pre_comp_type )
@@ -697,7 +718,7 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system,
             control->cm_solver_pre_comp_type == ILUT_PAR_PC )
     {
         Hptr->entries[Hptr->start[system->N + 1] - 1].val = 0.0;
-        Hptr->entries[Hptr->start[system->N_cm] - 1].val = 0.0;
+        Hptr->entries[Hptr->start[system->n_cm] - 1].val = 0.0;
     }
 }
 
@@ -727,8 +748,8 @@ static void Calculate_Charges_QEq( const reax_system * const system,
     u = all_sum[0] / all_sum[1];
     for ( i = 0; i < system->n; ++i )
     {
-        q[i] = workspace->x[i][0] - u * workspace->x[i][1];
-        system->my_atoms[i].q = q[i];
+        system->my_atoms[i].q  = workspace->x[i][0] - u * workspace->x[i][1];
+        q[i] = system->my_atoms[i].q;
     }
 
     Dist( system, mpi_data, q, REAL_PTR_TYPE, MPI_DOUBLE );
@@ -811,6 +832,8 @@ static void QEq( reax_system * const system, control_params * const control,
 #endif
 
     Calculate_Charges_QEq( system, workspace, mpi_data );
+
+    Extrapolate_Charges_QEq_Part2( system, control, data, workspace );
 }
 
 
@@ -856,6 +879,8 @@ static void EE( reax_system * const system, control_params * const control,
     data->timing.cm_solver_iters += iters;
 
     Calculate_Charges_EE( system, workspace, mpi_data );
+
+    Extrapolate_Charges_EE_Part2( system, control, data, workspace );
 }
 
 
@@ -901,6 +926,8 @@ static void ACKS2( reax_system * const system, control_params * const control,
     data->timing.cm_solver_iters += iters;
 
     Calculate_Charges_EE( system, workspace, mpi_data );
+
+    Extrapolate_Charges_EE_Part2( system, control, data, workspace );
 }
 
 
diff --git a/PG-PuReMD/src/forces.c b/PG-PuReMD/src/forces.c
index ab26b535..b8b84e8e 100644
--- a/PG-PuReMD/src/forces.c
+++ b/PG-PuReMD/src/forces.c
@@ -801,7 +801,10 @@ void Estimate_Storages( reax_system *system, control_params *control,
     {
         system->bonds[i] = 0;
         system->hbonds[i] = 0;
-        system->cm_entries[i] = 0;
+        if ( i < system->local_cap )
+        {
+            system->cm_entries[i] = 0;
+        }
     }
 
     for ( i = 0; i < system->N; ++i )
@@ -939,13 +942,19 @@ void Estimate_Storages( reax_system *system, control_params *control,
             system->max_hbonds[ system->my_atoms[i].Hindex ] = MAX(
                     (int)(system->hbonds[ system->my_atoms[i].Hindex ] * SAFE_ZONE), MIN_HBONDS );
         }
-        system->max_cm_entries[i] = MAX( (int)(system->cm_entries[i] * SAFE_ZONE), MIN_CM_ENTRIES );
+        if ( i < system->local_cap )
+        {
+            system->max_cm_entries[i] = MAX( (int)(system->cm_entries[i] * SAFE_ZONE), MIN_CM_ENTRIES );
+        }
     }
     for ( i = system->N; i < system->total_cap; ++i )
     {
         system->max_bonds[i] = MIN_BONDS;
         system->max_hbonds[i] = MIN_HBONDS;
-        system->max_cm_entries[i] = MIN_CM_ENTRIES;
+        if ( i < system->local_cap )
+        {
+            system->max_cm_entries[i] = MIN_CM_ENTRIES;
+        }
     }
 
     /* reductions to get totals */
@@ -968,7 +977,10 @@ void Estimate_Storages( reax_system *system, control_params *control,
 
         system->total_bonds += system->max_bonds[i];
         system->total_hbonds += system->max_hbonds[i];
-        system->total_cm_entries += system->max_cm_entries[i];
+        if ( i < system->local_cap )
+        {
+            system->total_cm_entries += system->max_cm_entries[i];
+        }
         system->total_thbodies += SQR( system->max_bonds[i] / 2.0 );
     }
 
diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c
index 3505c09c..5e93f254 100644
--- a/PG-PuReMD/src/init_md.c
+++ b/PG-PuReMD/src/init_md.c
@@ -179,24 +179,24 @@ void Init_System( reax_system *system, control_params *control,
     system->Hcap = MAX( system->n * SAFER_ZONE, MIN_CAP );
 
     /* list management */
-    system->far_nbrs = (int *) smalloc( sizeof(int) * system->total_cap,
+    system->far_nbrs = smalloc( sizeof(int) * system->total_cap,
             "ReAllocate_System::system->far_nbrs" );
-    system->max_far_nbrs = (int *) smalloc( sizeof(int) * system->total_cap,
+    system->max_far_nbrs = smalloc( sizeof(int) * system->total_cap,
             "ReAllocate_System::system->max_far_nbrs" );
 
-    system->bonds = (int *) smalloc( sizeof(int) * system->total_cap,
+    system->bonds = smalloc( sizeof(int) * system->total_cap,
             "ReAllocate_System::system->bonds" );
-    system->max_bonds = (int *) smalloc( sizeof(int) * system->total_cap,
+    system->max_bonds = smalloc( sizeof(int) * system->total_cap,
             "ReAllocate_System::system->max_bonds" );
 
-    system->hbonds = (int *) smalloc( sizeof(int) * system->total_cap,
+    system->hbonds = smalloc( sizeof(int) * system->total_cap,
             "ReAllocate_System::system->hbonds" );
-    system->max_hbonds = (int *) smalloc( sizeof(int) * system->total_cap,
+    system->max_hbonds = smalloc( sizeof(int) * system->total_cap,
             "ReAllocate_System::system->max_hbonds" );
 
-    system->cm_entries = (int *) smalloc( sizeof(int) * system->total_cap,
+    system->cm_entries = smalloc( sizeof(int) * system->local_cap,
             "ReAllocate_System::system->cm_entries" );
-    system->max_cm_entries = (int *) smalloc( sizeof(int) * system->total_cap,
+    system->max_cm_entries = smalloc( sizeof(int) * system->local_cap,
             "ReAllocate_System::max_cm_entries->max_hbonds" );
     
 #if defined(DEBUG_FOCUS)
@@ -640,7 +640,7 @@ void Init_Lists( reax_system *system, control_params *control,
     
     Estimate_Storages( system, control, lists );
     
-    Allocate_Matrix( &workspace->H, system->local_cap, system->total_cm_entries );
+    Allocate_Matrix( &workspace->H, system->n, system->local_cap, system->total_cm_entries );
     Init_Matrix_Row_Indices( &workspace->H, system->max_cm_entries );
 
     if ( control->hbond_cut > 0.0 )
diff --git a/PG-PuReMD/src/io_tools.c b/PG-PuReMD/src/io_tools.c
index 8efa3695..b619a671 100644
--- a/PG-PuReMD/src/io_tools.c
+++ b/PG-PuReMD/src/io_tools.c
@@ -829,16 +829,16 @@ void Print_Far_Neighbors( reax_system *system, reax_list **lists,
 
 void Print_Sparse_Matrix( reax_system *system, sparse_matrix *A )
 {
-    int i, j;
+    int i, pj;
 
     for ( i = 0; i < A->n; ++i )
     {
-        for ( j = A->start[i]; j < A->end[i]; ++j )
+        for ( pj = A->start[i]; pj < A->end[i]; ++pj )
         {
             fprintf( stderr, "%d %d %.15e\n",
-                     system->my_atoms[i].orig_id,
-                     system->my_atoms[A->entries[j].j].orig_id,
-                     A->entries[j].val );
+                    system->my_atoms[i].orig_id,
+                    system->my_atoms[A->entries[pj].j].orig_id,
+                    A->entries[pj].val );
         }
     }
 }
@@ -846,19 +846,19 @@ void Print_Sparse_Matrix( reax_system *system, sparse_matrix *A )
 
 void Print_Sparse_Matrix2( reax_system *system, sparse_matrix *A, char *fname )
 {
-    int i, j;
+    int i, pj;
     FILE *f;
     
     f = sfopen( fname, "w", "Print_Sparse_Matrix2::f" );
 
     for ( i = 0; i < A->n; ++i )
     {
-        for ( j = A->start[i]; j < A->end[i]; ++j )
+        for ( pj = A->start[i]; pj < A->end[i]; ++pj )
         {
             fprintf( f, "%d %d %.15e\n",
-                     system->my_atoms[i].orig_id,
-                     system->my_atoms[A->entries[j].j].orig_id,
-                     A->entries[j].val );
+                    system->my_atoms[i].orig_id,
+                    system->my_atoms[A->entries[pj].j].orig_id,
+                    A->entries[pj].val );
         }
     }
 
@@ -914,7 +914,7 @@ void Print_Linear_System( reax_system *system, control_params *control,
 
     for ( i = 0; i < system->n; i++ )
     {
-        ai = &(system->my_atoms[i]);
+        ai = &system->my_atoms[i];
         fprintf( out, "%6d%2d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n",
                  ai->renumber, ai->type, ai->x[0], ai->x[1], ai->x[2],
                  workspace->s[i], workspace->b_s[i],
@@ -922,7 +922,7 @@ void Print_Linear_System( reax_system *system, control_params *control,
     }
     sfclose( out, "Print_Linear_System::out" );
 
-    /* print QEq coef matrix */
+    /* print charge matrix */
     sprintf( fname, "%s.p%dH%d", control->sim_name, system->my_rank, step );
     Print_Symmetric_Sparse( system, &workspace->H, fname ); //MATRIX CHANGES
 
diff --git a/PG-PuReMD/src/lin_alg.c b/PG-PuReMD/src/lin_alg.c
index bef005a0..89643320 100644
--- a/PG-PuReMD/src/lin_alg.c
+++ b/PG-PuReMD/src/lin_alg.c
@@ -45,18 +45,24 @@ static void dual_Sparse_MatVec( const sparse_matrix * const A,
 
     for ( i = 0; i < N; ++i )
     {
-        b[i][0] = 0;
-        b[i][1] = 0;
+        b[i][0] = 0.0;
+        b[i][1] = 0.0;
     }
 
     /* perform multiplication */
     for ( i = 0; i < A->n; ++i )
     {
         si = A->start[i];
+#if defined(HALF_LIST)
         b[i][0] += A->entries[si].val * x[i][0];
         b[i][1] += A->entries[si].val * x[i][1];
+#endif
 
+#if defined(HALF_LIST)
         for ( k = si + 1; k < A->end[i]; ++k )
+#else
+        for ( k = si; k < A->end[i]; ++k )
+#endif
         {
             j = A->entries[k].j;
             H = A->entries[k].val;
@@ -314,28 +320,34 @@ const void Sparse_MatVec( const sparse_matrix * const A, const real * const x,
         real * const b, const int N )
 {
     int i, j, k, si;
-    real H;
+    real val;
 
     for ( i = 0; i < N; ++i )
     {
-        b[i] = 0;
+        b[i] = 0.0;
     }
 
     for ( i = 0; i < A->n; ++i )
     {
         si = A->start[i];
 
+#if defined(HALF_LIST)
         b[i] += A->entries[si].val * x[i];
+#endif
 
+#if defined(HALF_LIST)
         for ( k = si + 1; k < A->end[i]; ++k )
+#else
+        for ( k = si; k < A->end[i]; ++k )
+#endif
         {
             j = A->entries[k].j;
-            H = A->entries[k].val;
+            val = A->entries[k].val;
 
-            b[i] += H * x[j];
+            b[i] += val * x[j];
 #if defined(HALF_LIST)
             //if( j < A->n ) // comment out for tryQEq
-            b[j] += H * x[i];
+            b[j] += val * x[i];
 #endif
         }
     }
diff --git a/PG-PuReMD/src/nonbonded.c b/PG-PuReMD/src/nonbonded.c
index ce03d4f0..d5db14f8 100644
--- a/PG-PuReMD/src/nonbonded.c
+++ b/PG-PuReMD/src/nonbonded.c
@@ -116,10 +116,10 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control,
                 dTap += workspace->Tap[1] / r_ij;
 
                 /* vdWaals Calculations */
+                /* shielding */
                 if ( system->reax_param.gp.vdw_type == 1
                         || system->reax_param.gp.vdw_type == 3 )
                 {
-                    // shielding
                     powr_vdW1 = POW(r_ij, p_vdW1);
                     powgi_vdW1 = POW( 1.0 / twbp->gamma_w, p_vdW1);
 
@@ -136,7 +136,8 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control,
                     CEvd = dTap * e_vdW -
                            Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13;
                 }
-                else  // no shielding
+                /* no shielding */
+                else
                 {
                     exp1 = EXP( twbp->alpha * (1.0 - r_ij / twbp->r_vdW) );
                     exp2 = EXP( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) );
@@ -148,10 +149,10 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control,
                            Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2);
                 }
 
+                /* innner wall */
                 if ( system->reax_param.gp.vdw_type == 2
                         || system->reax_param.gp.vdw_type == 3 )
                 {
-                    // innner wall
                     e_core = twbp->ecore * EXP(twbp->acore * (1.0 - (r_ij / twbp->rcore)));
                     data->my_en.e_vdW += Tap * e_core;
 
diff --git a/PG-PuReMD/src/reax_types.h b/PG-PuReMD/src/reax_types.h
index f6c875fc..793a4b49 100644
--- a/PG-PuReMD/src/reax_types.h
+++ b/PG-PuReMD/src/reax_types.h
@@ -76,7 +76,7 @@
 //#define OLD_BOUNDARIES
 //#define MIDPOINT_BOUNDARIES
 /* build far neighbors list as a half-list */
-#define HALF_LIST
+//#define HALF_LIST
 
 #define SUCCESS (1)
 #define FAILURE (0)
@@ -1248,8 +1248,8 @@ struct reax_system
     int N;
     /* num. atoms within simulation */
     int bigN;
-    /* dimension of sparse charge method matrix */
-    int N_cm;
+    /* dimension of locally owned part of sparse charge matrix */
+    int n_cm;
     /* num. hydrogen atoms */
     int numH;
     /* num. hydrogen atoms (GPU) */
@@ -1946,8 +1946,10 @@ struct sparse_matrix_entry
  */
 struct sparse_matrix
 {
-    /* number of rows */
+    /* number of rows active for this processor */
     int n;
+    /* max. number of rows active for this processor */
+    int n_max;
     /* number of nonzeros (NNZ) ALLOCATED */
     int m;
     /* row start pointer (last element contains ACTUAL NNZ) */
@@ -2089,7 +2091,7 @@ struct storage
     /**/
     real *v;
 
-    /* CG storage */
+    /* CG, SDM storage */
     /**/
     real *r;
     /**/
diff --git a/PG-PuReMD/src/reset_tools.c b/PG-PuReMD/src/reset_tools.c
index 79386be5..c2fb5a05 100644
--- a/PG-PuReMD/src/reset_tools.c
+++ b/PG-PuReMD/src/reset_tools.c
@@ -211,6 +211,11 @@ void Reset_Lists( reax_system *system, control_params *control,
                 Set_End_Index( i, Start_Index( i, hbond_list ), hbond_list );
             }
         }
+
+        for ( i = 0; i < system->local_cap; ++i )
+        {
+            workspace->H.end[i] = workspace->H.start[i];
+        }
     }
 }
 
diff --git a/PG-PuReMD/src/torsion_angles.c b/PG-PuReMD/src/torsion_angles.c
index b0c85e57..d8ce2ed8 100644
--- a/PG-PuReMD/src/torsion_angles.c
+++ b/PG-PuReMD/src/torsion_angles.c
@@ -58,8 +58,9 @@ static real Calculate_Omega( rvec dvec_ij, real r_ij, rvec dvec_jk, real r_jk,
     cos_jkl = COS( p_jkl->theta );
 
     /* omega */
-    unnorm_cos_omega = -1.0 * rvec_Dot( dvec_ij, dvec_jk ) * rvec_Dot( dvec_jk, dvec_kl ) +
-        SQR( r_jk ) *  rvec_Dot( dvec_ij, dvec_kl );
+    unnorm_cos_omega = -1.0 * rvec_Dot( dvec_ij, dvec_jk )
+        * rvec_Dot( dvec_jk, dvec_kl ) + SQR( r_jk )
+        * rvec_Dot( dvec_ij, dvec_kl );
 
     rvec_Cross( cross_jk_kl, dvec_jk, dvec_kl );
     unnorm_sin_omega = -1.0 * r_jk * rvec_Dot( dvec_ij, cross_jk_kl );
@@ -87,9 +88,9 @@ static real Calculate_Omega( rvec dvec_ij, real r_ij, rvec dvec_jk, real r_jk,
         poem = 1e-20;
     }
 
-    tel  = SQR( r_ij ) + SQR( r_jk ) + SQR( r_kl ) - SQR( r_li ) -
-           2.0 * ( r_ij * r_jk * cos_ijk - r_ij * r_kl * cos_ijk * cos_jkl +
-                   r_jk * r_kl * cos_jkl );
+    tel = SQR( r_ij ) + SQR( r_jk ) + SQR( r_kl ) - SQR( r_li )
+        - 2.0 * ( r_ij * r_jk * cos_ijk - r_ij * r_kl * cos_ijk
+                * cos_jkl + r_jk * r_kl * cos_jkl );
 
     arg  = tel / poem;
     if ( arg >  1.0 )
@@ -101,26 +102,6 @@ static real Calculate_Omega( rvec dvec_ij, real r_ij, rvec dvec_jk, real r_jk,
         arg = -1.0;
     }
 
-    /* fprintf( out_control->etor,
-       "%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f\n",
-       htra, htrb, htrc, hthd, hthe, hnra, hnrc, hnhd, hnhe );
-       fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n",
-       dvec_ij[0]/r_ij, dvec_ij[1]/r_ij, dvec_ij[2]/r_ij );
-       fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n",
-       -dvec_jk[0]/r_jk, -dvec_jk[1]/r_jk, -dvec_jk[2]/r_jk );
-       fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n",
-       -dvec_kl[0]/r_kl, -dvec_kl[1]/r_kl, -dvec_kl[2]/r_kl );
-       fprintf( out_control->etor, "%12.6f%12.6f%12.6f%12.6f\n",
-       r_li, dvec_li[0], dvec_li[1], dvec_li[2] );
-       fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n",
-       poem, tel, arg ); */
-    /* fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n",
-       -p_ijk->dcos_dk[0]/sin_ijk, -p_ijk->dcos_dk[1]/sin_ijk,
-       -p_ijk->dcos_dk[2]/sin_ijk );
-       fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n",
-       -p_jkl->dcos_dk[0]/sin_jkl, -p_jkl->dcos_dk[1]/sin_jkl,
-       -p_jkl->dcos_dk[2]/sin_jkl );*/
-
     if ( sin_ijk >= 0 && sin_ijk <= MIN_SINE )
     {
         sin_ijk = MIN_SINE;
@@ -170,7 +151,7 @@ void Torsion_Angles( reax_system *system, control_params *control,
         simulation_data *data, storage *workspace,
         reax_list **lists, output_controls *out_control )
 {
-    int i, j, k, l, pi, pj, pk, pl, pij, plk, natoms;
+    int i, j, k, l, pi, pj, pk, pl, pij, plk;
     int type_i, type_j, type_k, type_l;
     int start_j, end_j, start_k, end_k;
     int start_pj, end_pj, start_pk, end_pk;
@@ -209,7 +190,6 @@ void Torsion_Angles( reax_system *system, control_params *control,
     reax_list *bond_list;
     reax_list *thb_list;
 
-    natoms = system->n;
     num_frb_intrs = 0;
     p_tor2 = system->reax_param.gp.l[23];
     p_tor3 = system->reax_param.gp.l[24];
@@ -218,7 +198,7 @@ void Torsion_Angles( reax_system *system, control_params *control,
     bond_list = lists[BONDS];
     thb_list = lists[THREE_BODIES];
 
-    for ( j = 0; j < natoms; ++j )
+    for ( j = 0; j < system->n; ++j )
     {
         type_j = system->my_atoms[j].type;
         Delta_j = workspace->Delta_boc[j];
@@ -236,8 +216,9 @@ void Torsion_Angles( reax_system *system, control_params *control,
             /* see if there are any 3-body interactions involving j and k
              * where j is the central atom. Otherwise there is no point in
              * trying to form a 4-body interaction out of this neighborhood */
-            if ( system->my_atoms[j].orig_id < system->my_atoms[k].orig_id &&
-                    bo_jk->BO > control->thb_cut && Num_Entries(pk, thb_list) )
+            if ( system->my_atoms[j].orig_id < system->my_atoms[k].orig_id
+                    && bo_jk->BO > control->thb_cut
+                    && Num_Entries(pk, thb_list) )
             {
                 start_k = Start_Index( k, bond_list );
                 end_k = End_Index( k, bond_list );
@@ -397,7 +378,7 @@ void Torsion_Angles( reax_system *system, control_params *control,
                                     CEtors9 = fn10 * sin_ijk * sin_jkl *
                                               (0.5 * fbp->V1 - 2.0 * fbp->V2 * exp_tor1 * cos_omega +
                                                1.5 * fbp->V3 * (cos2omega + 2.0 * SQR(cos_omega)));
-                                    /* end  of torsion energy */
+                                    /* end of torsion energy */
 
                                     /* 4-body conjugation energy */
                                     fn12 = exp_cot2_ij * exp_cot2_jk * exp_cot2_kl;
@@ -527,14 +508,14 @@ void Torsion_Angles( reax_system *system, control_params *control,
                                     /* fprintf( out_control->etor, "%12.6f%12.6f%12.6f%12.6f\n",
                                        fbp->V1, fbp->V2, fbp->V3, fbp->p_tor1 );*/
 
-                                    fprintf(out_control->etor,
+                                    fprintf( out_control->etor,
                                             //"%6d%6d%6d%6d%24.15e%24.15e%24.15e%24.15e\n",
                                             "%6d%6d%6d%6d%12.4f%12.4f%12.4f%12.4f\n",
                                             system->my_atoms[i].orig_id, system->my_atoms[j].orig_id,
                                             system->my_atoms[k].orig_id, system->my_atoms[l].orig_id,
                                             RAD2DEG(omega), BOA_jk, e_tor, data->my_en.e_tor );
 
-                                    fprintf(out_control->econ,
+                                    fprintf( out_control->econ,
                                             //"%6d%6d%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n",
                                             "%6d%6d%6d%6d%12.4f%12.4f%12.4f%12.4f%12.4f%12.4f\n",
                                             system->my_atoms[i].orig_id, system->my_atoms[j].orig_id,
@@ -558,7 +539,7 @@ void Torsion_Angles( reax_system *system, control_params *control,
                                     rvec_ScaledAdd( workspace->f_tor[j],
                                                     CEtors7, p_ijk->dcos_dj );
 
-				rvec_ScaledAdd( workspace->f_tor[k],
+                                    rvec_ScaledAdd( workspace->f_tor[k],
                                                     CEtors7, p_ijk->dcos_di );
 
                                     rvec_ScaledAdd( workspace->f_tor[j],
-- 
GitLab