From a7c6e0e53bb45e292c68429582371be5c1fd87be Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnku@msu.edu>
Date: Mon, 21 Nov 2016 14:21:03 -0500
Subject: [PATCH] PG-PuReMD: more code clean-up. Update build system. Update
 template config file.

---
 PG-PuReMD/Makefile.am        |  2 +-
 PG-PuReMD/src/allocate.c     |  3 ++
 PG-PuReMD/src/cuda_forces.cu | 17 ++++---
 PG-PuReMD/src/cuda_utils.cu  | 98 ++++++++++++++++++------------------
 PG-PuReMD/src/cuda_utils.h   | 23 +++++----
 PG-PuReMD/src/ffield.c       |  3 +-
 PG-PuReMD/src/init_md.c      | 10 ++--
 PG-PuReMD/src/integrate.c    | 55 +++++++++++---------
 PG-PuReMD/src/list.c         | 94 +++++++++++++++++++++++-----------
 PG-PuReMD/src/validation.cu  | 15 +++++-
 environ/parallel_control     |  2 +-
 11 files changed, 195 insertions(+), 127 deletions(-)

diff --git a/PG-PuReMD/Makefile.am b/PG-PuReMD/Makefile.am
index 3aa47517..706195a0 100644
--- a/PG-PuReMD/Makefile.am
+++ b/PG-PuReMD/Makefile.am
@@ -30,7 +30,7 @@ bin_pg_puremd_SOURCES = src/allocate.c src/basic_comm.c src/ffield.c src/grid.c
 	src/qEq.c src/bond_orders.c src/multi_body.c src/bonds.c src/valence_angles.c \
 	src/hydrogen_bonds.c src/torsion_angles.c src/nonbonded.c src/forces.c \
 	src/integrate.c src/init_md.c src/parallelreax.c
-include_HEADERS = src/reax_defs.h src/reax_types.h \
+include_HEADERS = src/reax_types.h \
         src/allocate.h src/basic_comm.h src/ffield.h src/grid.h src/list.h \
 	src/lookup.h src/io_tools.h src/reset_tools.h src/restart.h src/random.h \
 	src/tool_box.h src/traj.h src/analyze.h src/box.h src/system_props.h \
diff --git a/PG-PuReMD/src/allocate.c b/PG-PuReMD/src/allocate.c
index f80a13b9..80b87c13 100644
--- a/PG-PuReMD/src/allocate.c
+++ b/PG-PuReMD/src/allocate.c
@@ -391,6 +391,7 @@ int Allocate_Workspace( reax_system *system, control_params *control,
 void Reallocate_Neighbor_List( reax_list *far_nbrs, int n, int num_intrs )
 {
     Delete_List( far_nbrs);
+
     if (!Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, far_nbrs))
     {
         fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n");
@@ -398,10 +399,12 @@ void Reallocate_Neighbor_List( reax_list *far_nbrs, int n, int num_intrs )
     }
 }
 
+
 #ifdef HAVE_CUDA
 void Cuda_Reallocate_Neighbor_List( reax_list *far_nbrs, int n, int num_intrs )
 {
     Dev_Delete_List( far_nbrs);
+
     if (!Dev_Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, far_nbrs))
     {
         fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n");
diff --git a/PG-PuReMD/src/cuda_forces.cu b/PG-PuReMD/src/cuda_forces.cu
index b1b1737d..cec6c0eb 100644
--- a/PG-PuReMD/src/cuda_forces.cu
+++ b/PG-PuReMD/src/cuda_forces.cu
@@ -247,8 +247,9 @@ void Cuda_Estimate_Storages(reax_system *system, control_params *control,
          (control_params *)control->d_control_params, *(*dev_lists + FAR_NBRS), system->reax_param.num_atom_types,
          system->n, system->N, system->Hcap, system->total_cap, 
          d_Htop, d_num_3body, d_bond_top, d_hb_top );
-    cudaThreadSynchronize ();
-    cudaCheckError ();
+
+    cudaThreadSynchronize();
+    cudaCheckError();
 
     copy_host_device( Htop, d_Htop, sizeof (int), cudaMemcpyDeviceToHost, "Htop");
     copy_host_device( num_3body, d_num_3body, sizeof (int), cudaMemcpyDeviceToHost, "num_3body");
@@ -287,18 +288,19 @@ void Cuda_Estimate_Storages(reax_system *system, control_params *control,
     }
     system->max_hbonds = max_hbonds * SAFER_ZONE;
 
-//#if defined(DEBUG)
+#if defined(DEBUG)
     fprintf (stderr, " TOTAL DEVICE BOND COUNT: %d \n", bond_count);
     fprintf (stderr, " TOTAL DEVICE HBOND COUNT: %d \n", hbond_count);
     fprintf (stderr, " TOTAL DEVICE SPARSE COUNT: %d \n", *Htop);
     fprintf (stderr, "p:%d --> Bonds(%d, %d) HBonds (%d, %d) *******\n", 
             system->my_rank, min_bonds, max_bonds, min_hbonds, max_hbonds);
-//#endif
+#endif
 
     ker_init_system_atoms <<<blocks, ST_BLOCK_SIZE>>>
         (system->d_my_atoms, system->N, d_hb_top, d_bond_top );
-    cudaThreadSynchronize ();
-    cudaCheckError ();
+
+    cudaThreadSynchronize();
+    cudaCheckError();
 }
 
 
@@ -950,8 +952,11 @@ int Cuda_Validate_Lists (reax_system *system, storage *workspace, reax_list **li
 
     //update the current step max_sp_entries;
     realloc->Htop = max_sp_entries;
+
+#if defined(DEBUG)
     fprintf (stderr, "p:%d - Cuda_Reallocate: Total H matrix entries: %d, cap: %d, used: %d \n", 
             system->my_rank, dev_workspace->H.n, dev_workspace->H.m, total_sp_entries);
+#endif
 
     if (total_sp_entries >= dev_workspace->H.m) {
         fprintf (stderr, "p:%d - **ran out of space for sparse matrix: step: %d, allocated: %d, used: %d \n", 
diff --git a/PG-PuReMD/src/cuda_utils.cu b/PG-PuReMD/src/cuda_utils.cu
index d8164bf1..2aa87574 100644
--- a/PG-PuReMD/src/cuda_utils.cu
+++ b/PG-PuReMD/src/cuda_utils.cu
@@ -1,33 +1,36 @@
 #include "cuda_utils.h"
 
 
-extern "C" void cuda_malloc (void **ptr, int size, int mem_set, char *msg) {
+extern "C" void cuda_malloc(void **ptr, int size, int mem_set, const char *msg)
+{
 
     cudaError_t retVal = cudaSuccess;
 
-    retVal = cudaMalloc (ptr, size);
+    retVal = cudaMalloc( ptr, size );
 
-    if (retVal != cudaSuccess)
+    if( retVal != cudaSuccess )
     {
-        fprintf (stderr, "Failed to allocate memory on device for the res: %s...  exiting with code: %d size: %d \n", 
-                msg, retVal, size);
+        fprintf( stderr, "Failed to allocate memory on device for the res: %s...  exiting with code: %d size: %d \n", 
+                msg, retVal, size );
         exit (-1);
     }  
 
-    if (mem_set)
+    if( mem_set )
     {
-        retVal = cudaMemset (*ptr, 0, size);
+        retVal = cudaMemset( *ptr, 0, size );
 
-        if (retVal != cudaSuccess) {
-            fprintf (stderr, "Failed to memset memory on device for resource %s\n", 
-                    msg);
-            exit (-1);
+        if( retVal != cudaSuccess )
+        {
+            fprintf( stderr, "Failed to memset memory on device for resource %s\n", 
+                    msg );
+            exit( -1 );
         }
     }  
 }
 
 
-extern "C" void cuda_free (void *ptr, char *msg) {
+extern "C" void cuda_free(void *ptr, const char *msg)
+{
 
     cudaError_t retVal = cudaSuccess;
 
@@ -36,106 +39,101 @@ extern "C" void cuda_free (void *ptr, char *msg) {
         return;
     }  
 
-    retVal = cudaFree (ptr);
+    retVal = cudaFree( ptr );
 
-    if (retVal != cudaSuccess)
+    if( retVal != cudaSuccess )
     {
-        fprintf (stderr, "Failed to release memory on device for res %s... exiting with code %d -- Address %ld\n", 
-                msg, retVal, (long int) ptr);
+        fprintf( stderr, "Failed to release memory on device for res %s... exiting with code %d -- Address %ld\n", 
+                msg, retVal, (long int) ptr );
         return;
     }  
 }
 
 
-extern "C" void cuda_memset (void *ptr, int data, size_t count, char *msg){
+extern "C" void cuda_memset(void *ptr, int data, size_t count, const char *msg){
     cudaError_t retVal = cudaSuccess;
 
-    retVal = cudaMemset (ptr, data, count);
+    retVal = cudaMemset( ptr, data, count );
 
-    if (retVal != cudaSuccess)
+    if( retVal != cudaSuccess )
     {
-        fprintf (stderr, "Failed to memset memory on device for %s, cuda code %d\n", 
-                msg, retVal);
-        exit (-1);
+        fprintf( stderr, "Failed to memset memory on device for %s, cuda code %d\n", 
+                msg, retVal );
+        exit( -1 );
     }
 }
 
 
-extern "C" void copy_host_device (void *host, void *dev, int size, enum cudaMemcpyKind dir, char *msg)
+extern "C" void copy_host_device(void *host, void *dev, int size, enum cudaMemcpyKind dir, const char *msg)
 {
     cudaError_t retVal = cudaErrorNotReady;
 
-    if (dir == cudaMemcpyHostToDevice)
+    if( dir == cudaMemcpyHostToDevice )
     {
-        retVal = cudaMemcpy (dev, host, size, cudaMemcpyHostToDevice);
+        retVal = cudaMemcpy( dev, host, size, cudaMemcpyHostToDevice );
     }
     else
     {
-        retVal = cudaMemcpy (host, dev, size, cudaMemcpyDeviceToHost);
+        retVal = cudaMemcpy( host, dev, size, cudaMemcpyDeviceToHost );
     }
 
-    if (retVal != cudaSuccess)
+    if( retVal != cudaSuccess )
     {
-        fprintf (stderr, "could not copy resource %s from host to device: reason %d \n",
-                msg, retVal);
-        exit (-1);
+        fprintf( stderr, "could not copy resource %s from host to device: reason %d \n",
+                msg, retVal );
+        exit( -1 );
     }
 }
 
 
-extern "C" void copy_device (void *dest, void *src, int size, char *msg)
+extern "C" void copy_device(void *dest, void *src, int size, const char *msg)
 {
     cudaError_t retVal = cudaErrorNotReady;
 
-    retVal = cudaMemcpy (dest, src, size, cudaMemcpyDeviceToDevice);
-    if (retVal != cudaSuccess) {
-        fprintf (stderr, "could not copy resource %s from device to device: reason %d \n",
-                msg, retVal);
-        exit (-1);
+    retVal = cudaMemcpy( dest, src, size, cudaMemcpyDeviceToDevice );
+    if( retVal != cudaSuccess )
+    {
+        fprintf( stderr, "could not copy resource %s from device to device: reason %d \n",
+                msg, retVal );
+        exit( -1 );
     }
 }
 
 
-extern "C" void compute_blocks ( int *blocks, int *block_size, int count )
+extern "C" void compute_blocks( int *blocks, int *block_size, int count )
 {
     *block_size = CUDA_BLOCK_SIZE;
     *blocks = (int) CEIL((double) count / CUDA_BLOCK_SIZE);
 }
 
 
-extern "C" void compute_matvec_blocks ( int *blocks, int count )
+extern "C" void compute_matvec_blocks( int *blocks, int count )
 {
 
     *blocks = (int) CEIL((double) count * MATVEC_KER_THREADS_PER_ROW / MATVEC_BLOCK_SIZE);
 }
 
 
-extern "C" void compute_nearest_pow_2 (int blocks, int *result)
+extern "C" void compute_nearest_pow_2(int blocks, int *result)
 {
 
   *result = (int) EXP2( CEIL( LOG2((double) blocks) ) );
 }
 
 
-void print_info ()
+extern "C" void print_device_mem_usage()
 {
     size_t total, free;
 
-    cudaMemGetInfo (&free, &total);
+    cudaMemGetInfo( &free, &total );
 
-    if ( cudaGetLastError () != cudaSuccess )
+    if ( cudaGetLastError() != cudaSuccess )
     {
-        fprintf (stderr, "Error on the memory call \n");
+        fprintf( stderr, "Error on the memory call \n" );
         return;
     }
 
-    fprintf (stderr, "Total %ld Mb %ld gig %ld , free %ld, Mb %ld , gig %ld \n", 
+    fprintf( stderr, "Total %ld Mb %ld gig %ld , free %ld, Mb %ld , gig %ld \n", 
             total, total/(1024*1024), total/ (1024*1024*1024), 
             free, free/(1024*1024), free/ (1024*1024*1024) );
 }
-
-
-extern "C" void print_device_mem_usage ()
-{
-    print_info ();
-}
diff --git a/PG-PuReMD/src/cuda_utils.h b/PG-PuReMD/src/cuda_utils.h
index 8f29667e..f4c6ae2e 100644
--- a/PG-PuReMD/src/cuda_utils.h
+++ b/PG-PuReMD/src/cuda_utils.h
@@ -8,22 +8,22 @@
 
 #include "reax_types.h"
 
+
 #ifdef __cplusplus
 extern "C"  {
 #endif
 
-void compute_blocks (int *, int *, int);
-void compute_nearest_pow_2 (int blocks, int *result);
-void compute_matvec_blocks (int *, int);
-
+void compute_blocks(int *, int *, int);
+void compute_nearest_pow_2(int blocks, int *result);
+void compute_matvec_blocks(int *, int);
 
-void cuda_malloc (void **, int , int , char *);
-void cuda_free (void *, char *);
-void cuda_memset (void *, int , size_t , char *);
-void copy_host_device (void *, void *, int , enum cudaMemcpyKind, char *);
-void copy_device (void *, void *, int , char *);
+void cuda_malloc(void **, int , int , const char *);
+void cuda_free(void *, const char *);
+void cuda_memset(void *, int , size_t , const char *);
+void copy_host_device(void *, void *, int , enum cudaMemcpyKind, const char *);
+void copy_device(void *, void *, int , const char *);
 
-void print_device_mem_usage ();
+void print_device_mem_usage();
 
 #define cudaCheckError()    __cudaCheckError( __FILE__, __LINE__ )
 inline void __cudaCheckError( const char *file, const int line )
@@ -31,7 +31,7 @@ inline void __cudaCheckError( const char *file, const int line )
     cudaError err = cudaGetLastError();
     if ( cudaSuccess != err )
     {
-        fprintf (stderr, "Failed .. %s:%d -- gpu erro code %d\n", file, line, err );
+        fprintf( stderr, "Failed .. %s:%d -- gpu erro code %d\n", file, line, err );
         exit( -1 );
     }
 
@@ -44,6 +44,7 @@ inline void __cudaCheckError( const char *file, const int line )
        exit( -1 );
     }
     */
+
     return;
 }
 
diff --git a/PG-PuReMD/src/ffield.c b/PG-PuReMD/src/ffield.c
index 9584dd72..a260e628 100644
--- a/PG-PuReMD/src/ffield.c
+++ b/PG-PuReMD/src/ffield.c
@@ -342,6 +342,7 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax,
 
     /* Equate vval3 to valf for first-row elements (25/10/2004) */
     for ( i = 0; i < reax->num_atom_types; i++ )
+    {
         if ( reax->sbp[i].mass < 21 &&
                 reax->sbp[i].valency_val != reax->sbp[i].valency_boc )
         {
@@ -349,7 +350,7 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax,
                      reax->sbp[i].name );
             reax->sbp[i].valency_val = reax->sbp[i].valency_boc;
         }
-
+    }
 
     /* next line is number of two body combination and some comments */
     fgets(s, MAX_LINE, fp);
diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c
index 8be04fd2..f2df3ef7 100644
--- a/PG-PuReMD/src/init_md.c
+++ b/PG-PuReMD/src/init_md.c
@@ -955,9 +955,10 @@ int  Cuda_Init_Lists( reax_system *system, control_params *control,
     //MATRIX CHANGES
     //workspace->L = NULL;
     //workspace->U = NULL;
+
+#if defined(DEBUG_FOCUS)
     fprintf (stderr, "p:%d - allocated H matrix: max_entries: %d, cap: %d \n",
              system->my_rank, system->max_sparse_entries, dev_workspace->H.m);
-#if defined(DEBUG_FOCUS)
     fprintf( stderr, "p%d: allocated H matrix: Htop=%d, space=%dMB\n",
              system->my_rank, Htop,
              (int)(Htop * sizeof(sparse_matrix_entry) / (1024 * 1024)) );
@@ -995,9 +996,10 @@ int  Cuda_Init_Lists( reax_system *system, control_params *control,
             MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY );
         }
 
+#if defined(DEBUG_FOCUS)
         fprintf (stderr, "**** Total HBonds allocated --> %d total_cap: %d per atom: %d, max_hbonds: %d \n",
                  total_hbonds, system->total_cap, (total_hbonds / system->total_cap), system->max_hbonds );
-
+#endif
 
         //TODO
         //Cuda_Init_HBond_Indices (hb_top, system->n);
@@ -1007,7 +1009,6 @@ int  Cuda_Init_Lists( reax_system *system, control_params *control,
         //THIS IS COMMENTED OUT - CHANGE ORIGINAL
         /****/
 
-
 #if defined(DEBUG_FOCUS)
         fprintf( stderr, "p%d: allocated hbonds: total_hbonds=%d, space=%dMB\n",
                  system->my_rank, total_hbonds,
@@ -1024,8 +1025,11 @@ int  Cuda_Init_Lists( reax_system *system, control_params *control,
         total_bonds += MAX (bond_top[i] * 4, MIN_BONDS);
     }
     bond_cap = MAX( total_bonds, MIN_CAP * MIN_BONDS );
+
+#if defined(DEBUG)
     fprintf (stderr, "**** Total Bonds allocated --> %d total_cap: %d per atom: %d, max_bonds: %d \n",
              bond_cap, system->total_cap, (bond_cap / system->total_cap), system->max_bonds );
+#endif
 
 
     /***************/
diff --git a/PG-PuReMD/src/integrate.c b/PG-PuReMD/src/integrate.c
index 706db126..79bd1f2e 100644
--- a/PG-PuReMD/src/integrate.c
+++ b/PG-PuReMD/src/integrate.c
@@ -77,7 +77,9 @@ void Velocity_Verlet_NVE( reax_system* system, control_params* control,
     Comm_Atoms( system, control, data, workspace, lists, mpi_data, renbr );
     Reset( system, control, data, workspace, lists );
     if ( renbr )
+    {
         Generate_Neighbor_Lists( system, data, workspace, lists );
+    }
     Compute_Forces(system, control, data, workspace, lists, out_control, mpi_data);
 
     for ( i = 0; i < system->n; i++ )
@@ -94,14 +96,9 @@ void Velocity_Verlet_NVE( reax_system* system, control_params* control,
 }
 
 
-
 void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system* system,
-        control_params* control,
-        simulation_data *data,
-        storage *workspace,
-        reax_list **lists,
-        output_controls *out_control,
-        mpi_datatypes *mpi_data )
+        control_params* control, simulation_data *data, storage *workspace,
+        reax_list **lists, output_controls *out_control, mpi_datatypes *mpi_data )
 {
     int  i, itr, steps, renbr;
     real inv_m, coef_v;
@@ -116,6 +113,7 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system* system,
     fprintf( stderr, "p%d @ step%d\n", system->my_rank, data->step );
     MPI_Barrier( MPI_COMM_WORLD );
 #endif
+
     dt = control->dt;
     dt_sqr = SQR(dt);
     therm = &( data->therm );
@@ -130,8 +128,10 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system* system,
         rvec_Add( system->my_atoms[i].x, dx );
         rvec_Copy( atom->f_old, atom->f );
     }
+
     /* Compute xi(t + dt) */
     therm->xi += ( therm->v_xi * dt + 0.5 * dt_sqr * therm->G_xi );
+
 #if defined(DEBUG_FOCUS)
     fprintf(stderr, "p%d @ step%d: verlet1 done\n", system->my_rank, data->step);
     MPI_Barrier( MPI_COMM_WORLD );
@@ -141,7 +141,9 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system* system,
     Comm_Atoms( system, control, data, workspace, lists, mpi_data, renbr );
     Reset( system, control, data, workspace, lists );
     if ( renbr )
+    {
         Generate_Neighbor_Lists( system, data, workspace, lists );
+    }
     Compute_Forces( system, control, data, workspace, lists,
                     out_control, mpi_data );
 
@@ -196,13 +198,9 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system* system,
 /* uses Berendsen-type coupling for both T and P.
    All box dimensions are scaled by the same amount,
    there is no change in the angles between axes. */
-void Velocity_Verlet_Berendsen_NVT( reax_system* system,
-                                    control_params* control,
-                                    simulation_data *data,
-                                    storage *workspace,
-                                    reax_list **lists,
-                                    output_controls *out_control,
-                                    mpi_datatypes *mpi_data )
+void Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* control,
+        simulation_data *data, storage *workspace, reax_list **lists,
+        output_controls *out_control, mpi_datatypes *mpi_data )
 {
     int i, steps, renbr;
     real inv_m, dt, lambda;
@@ -236,7 +234,9 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system,
 
     ReAllocate( system, control, data, workspace, lists, mpi_data );
     if ( renbr )
+    {
         Update_Grid( system, control, mpi_data->world );
+    }
     Comm_Atoms( system, control, data, workspace, lists, mpi_data, renbr );
     Reset( system, control, data, workspace, lists );
     if ( renbr )
@@ -254,6 +254,7 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system,
         /* Compute v(t + dt) */
         rvec_ScaledAdd( atom->v, 0.5 * dt * -F_CONV * inv_m, atom->f );
     }
+
 #if defined(DEBUG_FOCUS)
     fprintf(stderr, "p%d @ step%d: verlet2 done\n", system->my_rank, data->step);
     MPI_Barrier( MPI_COMM_WORLD );
@@ -263,9 +264,13 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system,
     Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D );
     lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0);
     if ( lambda < MIN_dT )
+    {
         lambda = MIN_dT;
+    }
     else if (lambda > MAX_dT )
+    {
         lambda = MAX_dT;
+    }
     lambda = SQRT( lambda );
 
     /* Scale velocities and positions at t+dt */
@@ -345,8 +350,9 @@ void Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system,
 
     if ( renbr )
     {
-
+#if defined(DEBUG)
         t_over_start  = Get_Time ();
+#endif
 
         nbr_indices = (int *) host_scratch;
         memset (nbr_indices, 0, sizeof (int) * system->N);
@@ -423,9 +429,11 @@ void Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system,
         Cuda_Init_Bond_Indices (bond_top, system->N, bond_cap);
         */
 
+#if defined(DEBUG)
         t_over_elapsed  = Get_Timing_Info (t_over_start);
         fprintf (stderr, "p%d --> Overhead (Step-%d) %f \n",
                  system->my_rank, data->step, t_over_elapsed);
+#endif
     }
 
     //Compute_Forces( system, control, data, workspace,
@@ -434,7 +442,6 @@ void Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system,
                          lists, out_control, mpi_data );
 
     /* velocity verlet, 2nd part */
-    //t_over_start = Get_Time ();
     bNVT_update_velocity_part2 (system, dt);
 
 #if defined(DEBUG_FOCUS)
@@ -448,9 +455,13 @@ void Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system,
 
     lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0);
     if ( lambda < MIN_dT )
+    {
         lambda = MIN_dT;
+    }
     else if (lambda > MAX_dT )
+    {
         lambda = MAX_dT;
+    }
     lambda = SQRT( lambda );
 
     /* Scale velocities and positions at t+dt */
@@ -471,13 +482,9 @@ void Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system,
 /* uses Berendsen-type coupling for both T and P.
    All box dimensions are scaled by the same amount,
    there is no change in the angles between axes. */
-void Velocity_Verlet_Berendsen_NPT( reax_system* system,
-                                    control_params* control,
-                                    simulation_data *data,
-                                    storage *workspace,
-                                    reax_list **lists,
-                                    output_controls *out_control,
-                                    mpi_datatypes *mpi_data )
+void Velocity_Verlet_Berendsen_NPT( reax_system* system, control_params* control,
+        simulation_data *data, storage *workspace, reax_list **lists,
+        output_controls *out_control, mpi_datatypes *mpi_data )
 {
     int i, steps, renbr;
     real inv_m, dt;
@@ -511,7 +518,9 @@ void Velocity_Verlet_Berendsen_NPT( reax_system* system,
 
     ReAllocate( system, control, data, workspace, lists, mpi_data );
     if ( renbr )
+    {
         Update_Grid( system, control, mpi_data->world );
+    }
     Comm_Atoms( system, control, data, workspace, lists, mpi_data, renbr );
     Reset( system, control, data, workspace, lists );
     if ( renbr )
diff --git a/PG-PuReMD/src/list.c b/PG-PuReMD/src/list.c
index 09a38e59..695e8423 100644
--- a/PG-PuReMD/src/list.c
+++ b/PG-PuReMD/src/list.c
@@ -29,31 +29,44 @@
 #include "reax_tool_box.h"
 #endif
 
-void Print_List(reax_list* list){
-	//printf("List_Print\n");
-	int i;
-        printf("START INDICES \n");
-	for (i=0;i<list->n;i++){
-		printf("%d \n",list->index[i]);
-	}
-	printf("END INDICES \n");
-	for (i=0;i<list->n;i++){
-		printf("%d \n", list->end_index[i]);
-	}
+
+void Print_List(reax_list* list)
+{
+    //printf("List_Print\n");
+    int i;
+
+    printf("START INDICES \n");
+    for( i=0; i<list->n; i++ )
+    {
+        printf("%d \n",list->index[i]);
+    }
+
+    printf("END INDICES \n");
+    for ( i=0; i<list->n; i++ )
+    {
+        printf("%d \n", list->end_index[i]);
+    }
 
 }
+
+
 /************* allocate list space ******************/
 int Make_List(int n, int num_intrs, int type, reax_list *l)
 {
-    l->allocated = 1;
+    int ret = SUCCESS;
 
+    l->allocated = 1;
     l->n = n;
     l->num_intrs = num_intrs;
 
-    l->index = (int*) smalloc( n * sizeof(int), "list:index" );
-    l->end_index = (int*) smalloc( n * sizeof(int), "list:end_index" );
-//printf("SUCCESS\n");
+    if( (l->index = (int*) smalloc( n * sizeof(int), "list:index" )) == NULL
+        ||  (l->end_index = (int*) smalloc( n * sizeof(int), "list:end_index" )) == NULL )
+    {
+        ret = FAILURE;
+    }
+
     l->type = type;
+
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "list: n=%d num_intrs=%d type=%d\n", n, num_intrs, type );
 #endif
@@ -61,38 +74,59 @@ int Make_List(int n, int num_intrs, int type, reax_list *l)
     switch (l->type)
     {
     case TYP_VOID:
-        l->select.v = (void*) smalloc( l->num_intrs * sizeof(void*), "list:v" );
+        if( (l->select.v = (void*) smalloc( l->num_intrs * sizeof(void*), "list:v" )) == NULL )
+        {
+            ret = FAILURE;
+        }
         break;
 
     case TYP_THREE_BODY:
-        l->select.three_body_list = (three_body_interaction_data*)
-                                    smalloc( l->num_intrs * sizeof(three_body_interaction_data),
-                                             "list:three_bodies" );
+        if( (l->select.three_body_list = (three_body_interaction_data*)
+            smalloc( l->num_intrs * sizeof(three_body_interaction_data),
+            "list:three_bodies" )) == NULL )
+        {
+            ret = FAILURE;
+        }
         break;
 
     case TYP_BOND:
-        l->select.bond_list = (bond_data*)
-                              smalloc( l->num_intrs * sizeof(bond_data), "list:bonds" );
+        if( (l->select.bond_list = (bond_data*)
+            smalloc( l->num_intrs * sizeof(bond_data), "list:bonds" )) == NULL )
+        {
+            ret = FAILURE;
+        }
         break;
 
     case TYP_DBO:
-        l->select.dbo_list = (dbond_data*)
-                             smalloc( l->num_intrs * sizeof(dbond_data), "list:dbonds" );
+        if( (l->select.dbo_list = (dbond_data*)
+            smalloc( l->num_intrs * sizeof(dbond_data), "list:dbonds" )) == NULL )
+        {
+            ret = FAILURE;
+        }
         break;
 
     case TYP_DDELTA:
-        l->select.dDelta_list = (dDelta_data*)
-                                smalloc( l->num_intrs * sizeof(dDelta_data), "list:dDeltas" );
+        if( (l->select.dDelta_list = (dDelta_data*)
+            smalloc( l->num_intrs * sizeof(dDelta_data), "list:dDeltas" )) == NULL )
+        {
+            ret = FAILURE;
+        }
         break;
 
     case TYP_FAR_NEIGHBOR:
-        l->select.far_nbr_list = (far_neighbor_data*)
-                                 smalloc( l->num_intrs * sizeof(far_neighbor_data), "list:far_nbrs" );
+        if( (l->select.far_nbr_list = (far_neighbor_data*)
+            smalloc( l->num_intrs * sizeof(far_neighbor_data), "list:far_nbrs" )) == NULL )
+        {
+            ret = FAILURE;
+        }
         break;
 
     case TYP_HBOND:
-        l->select.hbond_list = (hbond_data*)
-                               smalloc( l->num_intrs * sizeof(hbond_data), "list:hbonds" );
+        if( (l->select.hbond_list = (hbond_data*)
+            smalloc( l->num_intrs * sizeof(hbond_data), "list:hbonds" )) == NULL )
+        {
+            ret = FAILURE;
+        }
         break;
 
     default:
@@ -100,7 +134,7 @@ int Make_List(int n, int num_intrs, int type, reax_list *l)
         MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT );
     }
 
-    return SUCCESS;
+    return ret;
 }
 
 
diff --git a/PG-PuReMD/src/validation.cu b/PG-PuReMD/src/validation.cu
index 2bffc36f..b614fc89 100644
--- a/PG-PuReMD/src/validation.cu
+++ b/PG-PuReMD/src/validation.cu
@@ -1,4 +1,3 @@
-
 #include "validation.h"
 #include "cuda_utils.h"
 #include "list.h"
@@ -7,6 +6,7 @@
 #include "index_utils.h"
 #include "vector.h"
 
+
 bool check_zero (real p1, real p2)
 {
     if (abs (p1 - p2) >= GPU_TOLERANCE)
@@ -15,6 +15,7 @@ bool check_zero (real p1, real p2)
         return false;
 }
 
+
 bool check_zero (rvec p1, rvec p2)
 {
 
@@ -25,6 +26,7 @@ bool check_zero (rvec p1, rvec p2)
     else return false;
 }
 
+
 bool check_zero_rvec2 (rvec2 p1, rvec2 p2)
 {
 
@@ -34,6 +36,7 @@ bool check_zero_rvec2 (rvec2 p1, rvec2 p2)
     else return false;
 }
 
+
 bool check_same (ivec p1, ivec p2)
 {
     if ( (p1[0] == p2[0]) || (p1[1] == p2[1]) || (p1[2] == p2[2]) )
@@ -42,6 +45,7 @@ bool check_same (ivec p1, ivec p2)
         return false;
 }
 
+
 void print_bond_data (bond_order_data *s)
 {
     /*   
@@ -1669,22 +1673,30 @@ int print_device_rvec2 (rvec2 *b, int n)
     return print_host_rvec2 (a, n);
 }
 
+
 int print_host_array (real *a, int n)
 {
 
     for (int i = 0; i < n; i++)
+    {
         fprintf (stderr," a[%d] = %f \n", i, a[i]);
+    }
     fprintf(stderr, " ----------------------------------\n");
+
     return SUCCESS;
 }
 
+
 int print_device_array (real *a, int n)
 {
     real *b = (real *) host_scratch;
     copy_host_device (b, a, sizeof (real) * n, cudaMemcpyDeviceToHost, "real");
     print_host_array (b, n);
+
+    return SUCCESS;
 }
 
+
 int check_zeros_host (rvec2 *host, int n, char *msg)
 {
     int count, count1;
@@ -1699,6 +1711,7 @@ int check_zeros_host (rvec2 *host, int n, char *msg)
     return 1;
 }
 
+
 int check_zeros_device (rvec2 *device, int n, char *msg)
 {
     rvec2 *a = (rvec2 *) host_scratch;    
diff --git a/environ/parallel_control b/environ/parallel_control
index bf37d538..d69d3eae 100644
--- a/environ/parallel_control
+++ b/environ/parallel_control
@@ -1,5 +1,5 @@
 simulation_name          water.6.notab.111      ! output files will carry this name + their specific ext.
-ensemble_type            0                      ! 0: NVE, 1: Berendsen NVT, 2: Nose-Hoover NVT(under testing), 3: semi-isotropic NPT, 4: isotropic NPT, 5: anisotropic NPT (under development)
+ensemble_type            1                      ! 0: NVE, 1: Berendsen NVT, 2: Nose-Hoover NVT(under testing), 3: semi-isotropic NPT, 4: isotropic NPT, 5: anisotropic NPT (under development)
 nsteps                   100                    ! number of simulation steps
 dt                       0.25                   ! time step in fs
 proc_by_dim              1 1 1                  ! distribution of processors by dimensions
-- 
GitLab