From 20a75e0f3eb106f47627ff4f01c9d1f4edca2cb2 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnku@msu.edu>
Date: Thu, 22 Jun 2017 08:39:27 -0400
Subject: [PATCH] PG-PuReMD: add hydrogen bonding.

---
 PG-PuReMD/src/allocate.c             |  11 +-
 PG-PuReMD/src/cuda_allocate.cu       |  27 +-
 PG-PuReMD/src/cuda_bond_orders.cu    |  25 +-
 PG-PuReMD/src/cuda_copy.cu           |  68 ++++--
 PG-PuReMD/src/cuda_forces.cu         | 353 +++++++++++++--------------
 PG-PuReMD/src/cuda_forces.h          |   2 +-
 PG-PuReMD/src/cuda_hydrogen_bonds.cu | 140 ++++++-----
 PG-PuReMD/src/cuda_neighbors.cu      | 163 +++++++++++--
 PG-PuReMD/src/cuda_neighbors.h       |   2 +-
 PG-PuReMD/src/cuda_reset_tools.cu    | 123 ++--------
 PG-PuReMD/src/cuda_reset_tools.h     |   3 +
 PG-PuReMD/src/cuda_validation.cu     |   6 +-
 PG-PuReMD/src/ffield.c               |  97 +++-----
 PG-PuReMD/src/forces.c               | 111 ++++++---
 PG-PuReMD/src/hydrogen_bonds.c       | 158 ++++++------
 PG-PuReMD/src/index_utils.h          |   8 +-
 PG-PuReMD/src/init_md.c              |  89 ++-----
 PG-PuReMD/src/integrate.c            |  21 +-
 PG-PuReMD/src/io_tools.c             |  81 +++++-
 PG-PuReMD/src/io_tools.h             |   4 +-
 PG-PuReMD/src/parallelreax.c         |  41 ++--
 PG-PuReMD/src/reax_types.h           |  37 +--
 PG-PuReMD/src/reset_tools.c          |   4 +-
 PG-PuReMD/src/traj.c                 | 138 +++++++----
 PG-PuReMD/src/traj.h                 |  30 ++-
 25 files changed, 958 insertions(+), 784 deletions(-)

diff --git a/PG-PuReMD/src/allocate.c b/PG-PuReMD/src/allocate.c
index c1811b52..2a4e48f3 100644
--- a/PG-PuReMD/src/allocate.c
+++ b/PG-PuReMD/src/allocate.c
@@ -83,7 +83,7 @@ void DeAllocate_Workspace( control_params *control, storage *workspace )
 {
     int i;
 
-    if ( !workspace->allocated )
+    if ( workspace->allocated == FALSE )
     {
         return;
     }
@@ -204,13 +204,6 @@ void DeAllocate_Workspace( control_params *control, storage *workspace )
     sfree( workspace->id_all, "id_all" );
     sfree( workspace->f_all, "f_all" );
 #endif
-
-    /* hbond storage */
-    //sfree( workspace->Hindex, "Hindex" );
-    //sfree( workspace->num_bonds );
-    //sfree( workspace->num_hbonds );
-    //sfree( workspace->hash, "hash" );
-    //sfree( workspace->rev_hash, "rev_hash" );
 }
 
 
@@ -409,7 +402,7 @@ void Reallocate_HBonds_List( reax_system *system, reax_list *hbonds )
         if ( (id = system->my_atoms[i].Hindex) >= 0 )
         {
             system->my_atoms[i].num_hbonds = MAX( Num_Entries(id, hbonds) * SAFER_ZONE,
-                                                  MIN_HBONDS );
+                    MIN_HBONDS );
             total_hbonds += system->my_atoms[i].num_hbonds;
         }
     }
diff --git a/PG-PuReMD/src/cuda_allocate.cu b/PG-PuReMD/src/cuda_allocate.cu
index c16f17a6..dcf99114 100644
--- a/PG-PuReMD/src/cuda_allocate.cu
+++ b/PG-PuReMD/src/cuda_allocate.cu
@@ -162,6 +162,7 @@ void dev_alloc_system( reax_system *system )
     cuda_malloc( (void **) &system->d_my_atoms,
             system->total_cap * sizeof(reax_atom),
             TRUE, "system:d_my_atoms" );
+    cuda_malloc( (void **) &system->d_numH, sizeof(int), TRUE, "system:d_numH" );
 
     /* list management */
     cuda_malloc( (void **) &system->d_far_nbrs,
@@ -625,6 +626,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control,
             realloc->far_nbrs = FALSE;
         }
     }
+    fprintf( stderr, "    [CUDA_REALLOCATE::far nbrs]\n" );
 
     /* charge coef matrix */
     //if( nflag || realloc->Htop >= system->max_sparse_entries * DANGER_ZONE ) {
@@ -671,27 +673,26 @@ void Cuda_ReAllocate( reax_system *system, control_params *control,
         //workspace->U = NULL;
         realloc->Htop = 0;
     }
+    fprintf( stderr, "    [CUDA_REALLOCATE::charge matrix\n" );
 
     /* hydrogen bonds list */
-    // FIX - 4 - Added additional check here for hydrogen Bond fix
-    if ( control->hbond_cut > 0 && system->numH > 0 )
+    if ( control->hbond_cut > 0.0 && system->numH > 0 )
     {
 
-        if ( Nflag == TRUE || realloc->hbonds )
+        if ( Nflag == TRUE || realloc->hbonds == TRUE )
         {
-            fprintf( stderr, "p:%d - *** Reallocating Hbonds *** Step:%d\n", system->my_rank, data->step );
-            //MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY );
-
-            Cuda_Reallocate_HBonds_List( (*dev_lists) + HBONDS, system->total_cap, realloc->num_hbonds );
-//            Cuda_Init_HBond_Indices( system );
-            realloc->hbonds = 0;
-
 #if defined(DEBUG_FOCUS)
             fprintf( stderr, "p%d: reallocating hbonds: total_hbonds=%d space=%dMB\n",
-                    system->my_rank, ret, (int)(ret * sizeof(hbond_data) / (1024 * 1024)) );
+                    system->my_rank, system->total_hbonds,
+                    (int)(system->total_hbonds * sizeof(hbond_data) / (1024 * 1024)) );
 #endif
+
+            Cuda_Reallocate_HBonds_List( (*dev_lists) + HBONDS, system->total_cap, system->total_hbonds );
+            Cuda_Init_HBond_Indices( system );
+            realloc->hbonds = FALSE;
         }
     }
+    fprintf( stderr, "    [CUDA_REALLOCATE::hbonds\n" );
 
     /* bonds list */
     if ( Nflag == TRUE || realloc->bonds == TRUE )
@@ -706,6 +707,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control,
         Cuda_Init_Bond_Indices( system );
         realloc->bonds = FALSE;
     }
+    fprintf( stderr, "    [CUDA_REALLOCATE::bonds\n" );
 
     /* 3-body list */
     if ( Nflag == TRUE || realloc->num_3body > 0 )
@@ -721,6 +723,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control,
                 (*dev_lists + BONDS)->num_intrs, system->total_thbodies );
         realloc->num_3body = -1;
     }
+    fprintf( stderr, "    [CUDA_REALLOCATE::thbody\n" );
 
     /* grid */
     if ( renbr && realloc->gcell_atoms > -1 )
@@ -753,6 +756,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control,
         //dev_alloc_grid_cell_atoms (system, realloc->gcell_atoms);
         realloc->gcell_atoms = -1;
     }
+    fprintf( stderr, "    [CUDA_REALLOCATE::grid\n" );
 
     /* mpi buffers */
     // we have to be at a renbring step -
@@ -825,6 +829,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control,
         Deallocate_MPI_Buffers( mpi_data );
         Allocate_MPI_Buffers( mpi_data, system->est_recv, system->my_nbrs, msg );
     }
+    fprintf( stderr, "    [CUDA_REALLOCATE::MPI buffers\n" );
 
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "p%d @ step%d: reallocate done\n",
diff --git a/PG-PuReMD/src/cuda_bond_orders.cu b/PG-PuReMD/src/cuda_bond_orders.cu
index 9910fad3..b0ee399f 100644
--- a/PG-PuReMD/src/cuda_bond_orders.cu
+++ b/PG-PuReMD/src/cuda_bond_orders.cu
@@ -784,24 +784,28 @@ CUDA_DEVICE void Cuda_dbond_to_Forces_postprocess( int i, reax_atom *atoms,
 }
 
 
-CUDA_GLOBAL void ker_total_forces_postprocess( reax_atom *my_atoms,
+CUDA_GLOBAL void k_total_forces_postprocess( reax_atom *my_atoms,
         reax_list p_bonds, storage p_workspace, int N )
 {
-    int i = blockIdx.x * blockDim.x + threadIdx.x;
+    int i;
+    reax_list *bonds;
+    storage *workspace;
+
+    i = blockIdx.x * blockDim.x + threadIdx.x;
 
     if ( i >= N )
     {
         return;
     }
 
-    reax_list *bonds = &( p_bonds );
-    storage *workspace = &( p_workspace );
+    bonds = &p_bonds;
+    workspace = &p_workspace;
 
     Cuda_dbond_to_Forces_postprocess( i, my_atoms, bonds, workspace );
 }
 
 
-CUDA_GLOBAL void ker_total_forces( storage p_workspace, reax_list p_bonds, 
+CUDA_GLOBAL void k_total_forces( storage p_workspace, reax_list p_bonds, 
         control_params *control, simulation_data *data, rvec *data_ext_press,
         int N )
 {
@@ -846,7 +850,7 @@ void Cuda_Total_Forces( reax_system *system, control_params *control,
 
     blocks = system->N / DEF_BLOCK_SIZE + 
         ((system->N % DEF_BLOCK_SIZE == 0) ? 0 : 1);
-    ker_total_forces <<< blocks, DEF_BLOCK_SIZE >>>
+    k_total_forces <<< blocks, DEF_BLOCK_SIZE >>>
         ( *dev_workspace, *(*dev_lists + BONDS), 
           (control_params *) control->d_control_params, 
           (simulation_data *)data->d_simulation_data, 
@@ -869,14 +873,14 @@ void Cuda_Total_Forces( reax_system *system, control_params *control,
     }
 
     //do the post processing for the atomic forces here
-    ker_total_forces_postprocess  <<< blocks, DEF_BLOCK_SIZE >>>
+    k_total_forces_postprocess  <<< blocks, DEF_BLOCK_SIZE >>>
         ( system->d_my_atoms, *(*dev_lists + BONDS), *dev_workspace, system->N );
     cudaThreadSynchronize( ); 
     cudaCheckError( ); 
 }
 
 
-CUDA_GLOBAL void ker_total_forces_pure( reax_atom *my_atoms, int n, 
+CUDA_GLOBAL void k_total_forces_pure( reax_atom *my_atoms, int n, 
         storage p_workspace )
 {
     int i;
@@ -889,7 +893,7 @@ CUDA_GLOBAL void ker_total_forces_pure( reax_atom *my_atoms, int n,
         return;
     }
 
-    workspace = &( p_workspace );
+    workspace = &p_workspace;
 
     rvec_Copy( my_atoms[i].f, workspace->f[i] );
 }
@@ -901,7 +905,8 @@ void Cuda_Total_Forces_PURE( reax_system *system, storage *workspace )
 
     blocks = system->n / DEF_BLOCK_SIZE + 
         ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1);
-    ker_total_forces_pure <<< blocks, DEF_BLOCK_SIZE >>>
+
+    k_total_forces_pure <<< blocks, DEF_BLOCK_SIZE >>>
         ( system->d_my_atoms, system->n, *dev_workspace);
     cudaThreadSynchronize( ); 
     cudaCheckError( ); 
diff --git a/PG-PuReMD/src/cuda_copy.cu b/PG-PuReMD/src/cuda_copy.cu
index a28dbeb4..a3bfca30 100644
--- a/PG-PuReMD/src/cuda_copy.cu
+++ b/PG-PuReMD/src/cuda_copy.cu
@@ -5,6 +5,7 @@
 #include "vector.h"
 
 
+/* Copy grid info from host to device */
 void Sync_Grid( grid *host, grid *device )
 {
     int total;
@@ -47,59 +48,62 @@ void Sync_Grid( grid *host, grid *device )
 }
 
 
+/* Copy atom info from host to device */
 void Sync_Atoms( reax_system *sys )
 {
     //TODO METIN FIX, coredump on his machine
-    //copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) * sys->total_cap, cudaMemcpyHostToDevice, "system:my_atoms" );
+//    copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) * sys->total_cap,
+//            cudaMemcpyHostToDevice, "Sync_Atoms::system->my_atoms" );
 
 #if defined(__CUDA_DEBUG_LOG__)
     fprintf( stderr, "p:%d - Synching atoms: n: %d N: %d, total_cap: %d \n", 
             sys->my_rank, sys->n, sys->N, sys->total_cap );
 #endif
 
-    copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) *
-            sys->N, cudaMemcpyHostToDevice, "system:my_atoms" );
+    copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) * sys->N,
+            cudaMemcpyHostToDevice, "Sync_Atoms::system->my_atoms" );
     //TODO METIN FIX, coredump on his machine
 }
 
 
+/* Copy atomic system info from host to device */
 void Sync_System( reax_system *sys )
 {
-    //fprintf (stderr, "p:%d - trying to copy atoms : %d \n", sys->my_rank, sys->local_cap);
     Sync_Atoms( sys );
 
     copy_host_device( &sys->my_box, sys->d_my_box, sizeof(simulation_box),
-            cudaMemcpyHostToDevice, "system:my_box" );
+            cudaMemcpyHostToDevice, "Sync_System::system->my_box" );
 
     copy_host_device( &sys->my_ext_box, sys->d_my_ext_box,
             sizeof(simulation_box), cudaMemcpyHostToDevice,
-            "system:my_ext_box" );
+            "Sync_System::system->my_ext_box" );
 
     copy_host_device( sys->reax_param.sbp, sys->reax_param.d_sbp,
             sizeof(single_body_parameters) * sys->reax_param.num_atom_types,
-            cudaMemcpyHostToDevice, "system:sbp" );
+            cudaMemcpyHostToDevice, "Sync_System::system->sbp" );
     copy_host_device( sys->reax_param.tbp, sys->reax_param.d_tbp,
             sizeof(two_body_parameters) * POW(sys->reax_param.num_atom_types, 2),
-            cudaMemcpyHostToDevice, "system:tbp" );
+            cudaMemcpyHostToDevice, "Sync_System::system->tbp" );
     copy_host_device( sys->reax_param.thbp, sys->reax_param.d_thbp,
             sizeof(three_body_header) * POW(sys->reax_param.num_atom_types, 3),
-            cudaMemcpyHostToDevice, "system:thbh" );
+            cudaMemcpyHostToDevice, "Sync_System::system->thbh" );
     copy_host_device( sys->reax_param.hbp, sys->reax_param.d_hbp,
             sizeof(hbond_parameters) * POW(sys->reax_param.num_atom_types, 3),
-            cudaMemcpyHostToDevice, "system:hbond" );
+            cudaMemcpyHostToDevice, "Sync_System::system->hbond" );
     copy_host_device( sys->reax_param.fbp, sys->reax_param.d_fbp, 
             sizeof(four_body_header) * POW(sys->reax_param.num_atom_types, 4),
-            cudaMemcpyHostToDevice, "system:four_header" );
+            cudaMemcpyHostToDevice, "Sync_System::system->four_header" );
 
     copy_host_device( sys->reax_param.gp.l, sys->reax_param.d_gp.l,
             sizeof(real) * sys->reax_param.gp.n_global, cudaMemcpyHostToDevice,
-            "system:global_parameters" );
+            "Sync_System::system->global_parameters" );
 
     sys->reax_param.d_gp.n_global = sys->reax_param.gp.n_global; 
     sys->reax_param.d_gp.vdw_type = sys->reax_param.gp.vdw_type; 
 }
 
 
+/* Copy atom info from device to host */
 void Output_Sync_Atoms( reax_system *sys )
 {
     copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) *
@@ -107,6 +111,7 @@ void Output_Sync_Atoms( reax_system *sys )
 }
 
 
+/* Copy simulation data from device to host */
 void Output_Sync_Simulation_Data( simulation_data *host, simulation_data *dev )
 {
     copy_host_device( &host->my_en, &dev->my_en, sizeof(energy_data), 
@@ -120,36 +125,49 @@ void Output_Sync_Simulation_Data( simulation_data *host, simulation_data *dev )
 }
 
 
+/* Copy interaction lists from device to host */
 void Output_Sync_Lists( reax_list *host, reax_list *device, int type )
 {
-    //fprintf (stderr, " Trying to copy *%d* list from device to host \n", type);
-
-    //list is already allocated -- discard it first
-    //if (host->n > 0)
-    //if (host->allocated > 0)
-    //  Delete_List (host);
+#if defined(DEBUG)
+    fprintf( stderr, " Trying to copy *%d* list from device to host \n", type );
+#endif
 
-    //memory is allocated on the host
-    //Make_List(device->n, device->num_intrs, type, host);
+//    if ( host->allocated == TRUE )
+//    {
+//        Delete_List( host );
+//    }
+//    Make_List( device->n, device->num_intrs, type, host );
 
     copy_host_device( host->index, device->index, sizeof(int) * device->n,
-            cudaMemcpyDeviceToHost, "output_sync_list:list:index" );
+            cudaMemcpyDeviceToHost, "Output_Sync_Lists::list->index" );
     copy_host_device( host->end_index, device->end_index, sizeof(int) *
-            device->n, cudaMemcpyDeviceToHost, "output_sync:list:end_index" );
+            device->n, cudaMemcpyDeviceToHost, "Output_Sync_Lists::list->end_index" );
 
-    switch (type)
+    switch ( type )
     {   
+        case TYP_FAR_NEIGHBOR:
+            copy_host_device( host->select.far_nbr_list, device->select.far_nbr_list,
+                    sizeof(far_neighbor_data) * device->num_intrs,
+                    cudaMemcpyDeviceToHost, "Output_Sync_Lists::far_neighbor_list" );
+            break;
+
         case TYP_BOND:
             copy_host_device( host->select.bond_list, device->select.bond_list,
                     sizeof(bond_data) * device->num_intrs,
-                    cudaMemcpyDeviceToHost, "bond_list" );
+                    cudaMemcpyDeviceToHost, "Output_Sync_Lists::bond_list" );
+            break;
+
+        case TYP_HBOND:
+            copy_host_device( host->select.hbond_list, device->select.hbond_list,
+                    sizeof(hbond_data) * device->num_intrs,
+                    cudaMemcpyDeviceToHost, "Output_Sync_Lists::hbond_list" );
             break;
 
         case TYP_THREE_BODY:
             copy_host_device( host->select.three_body_list,
                     device->select.three_body_list,
                     sizeof(three_body_interaction_data )* device->num_intrs,
-                    cudaMemcpyDeviceToHost, "three_body_list" );
+                    cudaMemcpyDeviceToHost, "Output_Sync_Lists::three_body_list" );
             break;
 
         default:
diff --git a/PG-PuReMD/src/cuda_forces.cu b/PG-PuReMD/src/cuda_forces.cu
index c8e4ebe4..67009b7c 100644
--- a/PG-PuReMD/src/cuda_forces.cu
+++ b/PG-PuReMD/src/cuda_forces.cu
@@ -32,7 +32,7 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms,
         control_params *control, reax_list far_nbrs, 
         int num_atom_types, int n, int N, int Hcap, int total_cap, int *Htop,
         int *bonds, int *max_bonds, int *realloc_bonds,
-        int *hbonds )
+        int *hbonds, int *max_hbonds, int *realloc_hbonds )
 {
     int i, j, pj; 
     int start_i, end_i;
@@ -68,15 +68,17 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms,
             local = TRUE;
             cutoff = control->nonb_cut;
             atomicAdd( Htop, 1 );
-            ihb = sbp_i->p_hbond;
+//            ihb = sbp_i->p_hbond;
         }   
         else
         {
             local = FALSE;
             cutoff = control->bond_cut;
-            ihb = -1; 
+//            ihb = NON_H_BONDING_ATOM; 
         } 
 
+        ihb = NON_H_BONDING_ATOM; 
+
         for ( pj = start_i; pj < end_i; ++pj )
         { 
             nbr_pj = &( far_nbrs.select.far_nbr_list[pj] );
@@ -89,17 +91,19 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms,
                 sbp_j = &(sbp[type_j]);
                 ihb = sbp_i->p_hbond;
                 jhb = sbp_j->p_hbond;
-                if ( control->hbond_cut > 0.1 
-                        && nbr_pj->d <= control->hbond_cut 
-                        && ihb == 2 && jhb == 1 && j < n && i > n )
+
+                /* atom i: H bonding, ghost
+                 * atom j: H atom, native */
+                if ( control->hbond_cut > 0.0 && nbr_pj->d <= control->hbond_cut 
+                        && ihb == H_BONDING_ATOM && jhb == H_ATOM && i >= n && j < n )
                 {
                     atomicAdd( hbonds + i, 1 );
                 }
 
-                if ( i >= n )
-                {
-                    ihb = -1;
-                }
+//                if ( i >= n )
+//                {
+//                    ihb = NON_H_BONDING_ATOM;
+//                }
             }
 
             if ( nbr_pj->d <= cutoff )
@@ -107,7 +111,7 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms,
                 type_j = my_atoms[j].type;
                 r_ij = nbr_pj->d;
                 sbp_j = &(sbp[type_j]);
-                twbp = &(tbp[index_tbp (type_i,type_j,num_atom_types)]);
+                twbp = &(tbp[ index_tbp(type_i ,type_j, num_atom_types) ]);
 
                 if ( local == TRUE )
                 {
@@ -120,15 +124,21 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms,
                         atomicAdd( Htop, 1 );
                     }
 
-                    if ( control->hbond_cut > 0.1 && (ihb == 1 || ihb == 2) &&
+                    /* atom i: H atom OR H bonding atom, native */
+                    if ( control->hbond_cut > 0.0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) &&
                             nbr_pj->d <= control->hbond_cut )
                     {
                         jhb = sbp_j->p_hbond;
-                        if( ihb == 1 && jhb == 2 )
+
+                        /* atom i: H atom, native
+                         * atom j: H bonding atom */
+                        if( ihb == H_ATOM && jhb == H_BONDING_ATOM )
                         {
                             atomicAdd( hbonds + i, 1 );
                         }
-                        else if( ihb == 2 && jhb == 1 && j < n )
+                        /* atom i: H bonding atom, native
+                         * atom j: H atom, native */
+                        else if( ihb == H_BONDING_ATOM && jhb == H_ATOM && j < n )
                         {
                             atomicAdd( hbonds + i, 1 );
                         }
@@ -186,20 +196,20 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms,
     else
     {
         bonds[i] = MIN_BONDS;
-        hbonds[i] = MIN_HBONDS;
+        hbonds[i] = 0;
     }
 
     if ( bonds[i] > max_bonds[i] )
     {
-        max_bonds[i] = MAX( (int)(bonds[i] * SAFE_ZONE), MIN_BONDS );
+        max_bonds[i] = MAX( (int)(bonds[i] * 2), MIN_BONDS );
         *realloc_bonds = TRUE;
     }
 
-//    if ( hbonds[i] > max_hbonds[i] )
-//    {
-//        max_hbonds[i] = MAX( (int)(hbonds[i] * SAFE_ZONE), MIN_HBONDS );
-//        *realloc_hbonds = TRUE;
-//    }
+    if ( hbonds[i] > max_hbonds[i] )
+    {
+        max_hbonds[i] = MAX( (int)(hbonds[i] * SAFE_ZONE), MIN_HBONDS );
+        *realloc_hbonds = TRUE;
+    }
 }
 
 
@@ -227,14 +237,12 @@ CUDA_GLOBAL void k_init_system_atoms( reax_atom *my_atoms, int N,
 
 
 int Cuda_Estimate_Storages( reax_system *system, control_params *control, 
-        reax_list **lists, int *Htop, int *hb_top, int step )
+        reax_list **lists, int *Htop, int step )
 {
     int i, ret, ret_bonds, ret_hbonds;
     int blocks = 0;
-    int *d_Htop, *d_hb_top;
+    int *d_Htop;
     int *tmp = (int*) scratch;
-    int hbond_count = 0;
-    int max_hbonds = 0, min_hbonds = 999999;
 
     ret = SUCCESS;
 
@@ -250,11 +258,8 @@ int Cuda_Estimate_Storages( reax_system *system, control_params *control,
             "Cuda_Estimate_Storages::d_hbonds" );
  
     d_Htop = tmp; 
-    d_hb_top = d_Htop + 1;
     cuda_memset( d_Htop, 0, sizeof(int), 
             "Cuda_Estimate_Storages::dHtop" );
-    cuda_memset( d_hb_top, 0, system->total_cap * sizeof(int), 
-            "Cuda_Estimate_Storages::d_hb_top" );
    
     blocks = (int)CEIL( (real)system->total_cap / ST_BLOCK_SIZE );
 
@@ -264,14 +269,12 @@ int Cuda_Estimate_Storages( reax_system *system, control_params *control,
           *(*dev_lists + FAR_NBRS), system->reax_param.num_atom_types,
           system->n, system->N, system->Hcap, system->total_cap, d_Htop,
           system->d_bonds, system->d_max_bonds, system->d_realloc_bonds,
-          d_hb_top );
+          system->d_hbonds, system->d_max_hbonds, system->d_realloc_hbonds );
     cudaThreadSynchronize( );
     cudaCheckError( );
 
     copy_host_device( Htop, d_Htop, sizeof(int),
             cudaMemcpyDeviceToHost, "Htop");
-    copy_host_device( hb_top, d_hb_top, sizeof(int) * system->total_cap,
-            cudaMemcpyDeviceToHost, "hb_top");
 
     /* check reallocation flags on device */
     copy_host_device( &ret_bonds, system->d_realloc_bonds, sizeof(int), 
@@ -294,55 +297,40 @@ int Cuda_Estimate_Storages( reax_system *system, control_params *control,
         ret = FAILURE;
     }
 
-//    if ( ret_hbonds == TRUE )
-//    {
-//        Cuda_Reduction_Sum( system->d_max_hbonds, system->d_total_hbonds,
-//                system->total_cap );
-//
-//        copy_host_device( &(system->total_hbonds), system->d_total_hbonds, sizeof(int), 
-//                cudaMemcpyDeviceToHost, "Cuda_Estimate_Storages::d_total_hbonds" );
-//
-//        fprintf( stderr, "system->total_hbonds = %d\n", system->total_bonds );
-//
-//        if ( step > 0 )
-//        {
-//            dev_workspace->realloc.hbonds = TRUE;
-//        }
-//        ret = FAILURE;
-//    }
-
-    //TODO: change
-    for ( i = 0; i < system->N; i++ )
+    if ( system->numH > 0 && control->hbond_cut > 0.0 && ret_hbonds == TRUE )
     {
-        if ( hb_top[i] >= max_hbonds )
+        Cuda_Reduction_Sum( system->d_max_hbonds, system->d_total_hbonds,
+                system->total_cap );
+
+        copy_host_device( &(system->total_hbonds), system->d_total_hbonds, sizeof(int), 
+                cudaMemcpyDeviceToHost, "Cuda_Estimate_Storages::d_total_hbonds" );
+
+        if ( step > 0 )
         {
-            max_hbonds = hb_top[i];
+            dev_workspace->realloc.hbonds = TRUE;
         }
-        if ( hb_top[i] <= min_hbonds )
+        ret = FAILURE;
+    }
+    else
+    {
+        /* if number of hydrogen atoms is 0, disable hydrogen bond functionality */
+        if ( system->numH == 0 && step == 0 )
         {
-            min_hbonds = hb_top[i];
+            fprintf( stderr, "WARNING: DISABLING HYDROGEN BONDS\n" );
+            control->hbond_cut = 0.0;
+            k_disable_hydrogen_bonding <<< 1, 1 >>> ( (control_params *)control->d_control_params );
         }
-
-        hbond_count += hb_top[i];
     }
-    system->max_hbonds = max_hbonds * SAFER_ZONE;
 
 #if defined(DEBUG)
     fprintf( stderr, "p:%d -->\n", system->my_rank );
     fprintf( stderr, " TOTAL DEVICE BOND COUNT: %d \n", system->total_bonds );
-    fprintf( stderr, " TOTAL DEVICE HBOND COUNT: %d \n", hbond_count );
+    fprintf( stderr, " TOTAL DEVICE HBOND COUNT: %d \n", system->total_hbonds );
     fprintf( stderr, " TOTAL DEVICE SPARSE COUNT: %d \n", *Htop );
 #endif
 
-    /* if number of hydrogen atoms is 0, disable hydrogen bond functionality */
-    if ( hbond_count == 0 )
-    {
-        control->hbond_cut = 0.0;
-        k_disable_hydrogen_bonding <<< 1, 1 >>> ( (control_params *)control->d_control_params );
-    }
-
     k_init_system_atoms <<< blocks, ST_BLOCK_SIZE >>>
-        ( system->d_my_atoms, system->N, d_hb_top, system->d_bonds );
+        ( system->d_my_atoms, system->N, system->d_hbonds, system->d_bonds );
     cudaThreadSynchronize( );
     cudaCheckError( );
 
@@ -613,11 +601,48 @@ int Cuda_Estimate_Storage_Sparse_Matrix( reax_system *system, control_params *co
 }
 
 
+CUDA_GLOBAL void k_print_hbond_info( reax_atom *my_atoms, single_body_parameters *sbp, 
+        control_params *control, reax_list hbonds, int N )
+{
+    int i;
+    int type_i;
+    int ihb, ihb_top;
+    single_body_parameters *sbp_i;
+    reax_atom *atom_i;
+
+    i = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if ( i >= N )
+    {
+        return;
+    }
+
+    atom_i = &(my_atoms[i]);
+    type_i = atom_i->type;
+    sbp_i = &(sbp[type_i]);
+
+    if ( control->hbond_cut > 0.0 )
+    {
+        ihb = sbp_i->p_hbond;
+        if ( ihb == H_ATOM  || ihb == H_BONDING_ATOM )
+        {
+            ihb_top = Dev_Start_Index( atom_i->Hindex, &hbonds );
+        }
+        else
+        {
+            ihb_top = -1;
+        }
+    }
+
+    printf( "atom %6d: ihb = %2d, ihb_top = %2d\n", i, ihb, ihb_top );
+}
+
+
 CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp, 
         two_body_parameters *tbp, storage workspace, control_params *control, 
         reax_list far_nbrs, reax_list bonds, reax_list hbonds, 
         LR_lookup_table *t_LR, int n, int N, int num_atom_types, 
-        int max_sparse_entries, int renbr, int max_hbonds )
+        int max_sparse_entries, int renbr )
 {
     int i, j, pj;
     int start_i, end_i;
@@ -649,7 +674,6 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp
     start_i = Dev_Start_Index( i, &far_nbrs );
     end_i = Dev_End_Index( i, &far_nbrs );
     btop_i = Dev_Start_Index( i, &bonds );
-
     sbp_i = &(sbp[type_i]);
 
     if ( i < n )
@@ -669,9 +693,8 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp
         workspace.bond_mark[i] = 1000;
     }
 
-    ihb = -1;
+    ihb = NON_H_BONDING_ATOM;
     ihb_top = -1;
-    //CHANGE ORIGINAL
     H->start[i] = Htop;
 
     if ( local == TRUE )
@@ -680,18 +703,14 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp
         H->entries[Htop].val = sbp_i->eta;
         ++Htop;
     }
-    //CHANGE ORIGINAL
 
     if ( control->hbond_cut > 0.0 )
     {
         ihb = sbp_i->p_hbond;
-        //CHANGE ORIGINAL
-        if ( ihb == 1  || ihb == 2 )
+
+        if ( ihb == H_ATOM || ihb == H_BONDING_ATOM )
         {
-            //CHANGE ORIGINAL
-            //ihb_top = Dev_Start_Index( atom_i->Hindex, &hbonds );
-            ihb_top = i * max_hbonds;
-            Dev_Set_Start_Index( atom_i->Hindex, ihb_top, &hbonds );
+            ihb_top = Dev_Start_Index( atom_i->Hindex, &hbonds );
         }
         else
         {
@@ -705,6 +724,7 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp
         nbr_pj = &( far_nbrs.select.far_nbr_list[pj] );
         j = nbr_pj->nbr;
         atom_j = &(my_atoms[j]);
+
         if ( renbr )
         {
             if ( nbr_pj->d <= cutoff )
@@ -764,12 +784,15 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp
         }
         if ( flag2 == TRUE )
         {
-            ihb = sbp_i->p_hbond;
             type_j = atom_j->type;
             sbp_j = &(sbp[type_j]);
+            ihb = sbp_i->p_hbond;
             jhb = sbp_j->p_hbond;
+
+            /* atom i: H bonding, ghost
+             * atom j: H atom, native */
             if ( control->hbond_cut > 0.0 && nbr_pj->d <= control->hbond_cut
-                    && ihb == 2 && jhb == 1 && i >= n && j < n ) 
+                    && ihb == H_BONDING_ATOM && jhb == H_ATOM && i >= n && j < n ) 
             {
                 hbonds.select.hbond_list[ihb_top].nbr = j;
                 hbonds.select.hbond_list[ihb_top].scl = -1;
@@ -848,14 +871,17 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp
                 //bool condition = !((i >= n) && (j >= n));
 
                 /* hydrogen bond lists */
-                if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) &&
-                        nbr_pj->d <= control->hbond_cut // && i < j
-                        )
+                if ( control->hbond_cut > 0.0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) &&
+                        nbr_pj->d <= control->hbond_cut )
                 {
                     jhb = sbp_j->p_hbond;
-                    if ( ihb == 1 && jhb == 2 )
+
+                    /* atom i: H atom, native
+                     * atom j: H bonding atom */
+                    if ( ihb == H_ATOM && jhb == H_BONDING_ATOM )
                     {
                         hbonds.select.hbond_list[ihb_top].nbr = j;
+
                         if ( i < j )
                         {
                             hbonds.select.hbond_list[ihb_top].scl = 1;
@@ -872,7 +898,9 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp
 
                         ++ihb_top;
                     }
-                    else if ( ihb == 2 && jhb == 1 && j < n )
+                    /* atom i: H bonding atom, native
+                     * atom j: H atom, native */
+                    else if ( ihb == H_BONDING_ATOM && jhb == H_ATOM && j < n )
                     {
                         //jhb_top = End_Index( atom_j->Hindex, hbonds );
                         hbonds.select.hbond_list[ihb_top].nbr = j;
@@ -884,9 +912,6 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp
                         rvec_MakeZero( hbonds.select.hbond_list[ihb_top].hb_f );
 
                         ++ihb_top;
-
-                        //Set_End_Index( atom_j->Hindex, jhb_top+1, hbonds );
-                        //++num_hbonds;
                     }
                 }
             }
@@ -900,26 +925,28 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp
                 //num_bonds += 2;
                 ++btop_i;
 
-                /* Need to do later... since i and j are parallel
-                   if( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 )
-                   workspace->bond_mark[j] = workspace->bond_mark[i] + 1;
-                   else if( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) {
-                   workspace->bond_mark[i] = workspace->bond_mark[j] + 1;
-                   }
-                 */
+                /* Need to do later... since i and j are parallel */
+//                if( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 )
+//                {
+//                    workspace->bond_mark[j] = workspace->bond_mark[i] + 1;
+//                }
+//                else if( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 )
+//                {
+//                    workspace->bond_mark[i] = workspace->bond_mark[j] + 1;
+//                }
             }
         }
     }
 
     Dev_Set_End_Index( i, btop_i, &bonds );
-    //    if( local == TRUE ) {
     H->end[i] = Htop;
-    //   }
-    //CHANGE ORIGINAL
-    if ( ( ihb == 1 || ihb == 2 ) && ihb_top > 0 && control->hbond_cut > 0.0 )
-    {
-        Dev_Set_End_Index( atom_i->Hindex, ihb_top, &hbonds );
-    }
+//    if( local == TRUE )
+//    {
+        if ( control->hbond_cut > 0.0 && ihb_top > 0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) )
+        {
+            Dev_Set_End_Index( atom_i->Hindex, ihb_top, &hbonds );
+        }
+//    }
     //} Commented for cuda kernel
 }
 
@@ -998,13 +1025,13 @@ CUDA_GLOBAL void New_fix_sym_hbond_indices( reax_atom *my_atoms, reax_list hbond
     }
 
     i = warp_id;
-    j = start + lane_id;
     start = Dev_Start_Index( my_atoms[i].Hindex, &hbonds );
     end = Dev_End_Index( my_atoms[i].Hindex, &hbonds );
+    j = start + lane_id;
 
     while ( j < end )
     {
-        ihbond = &( hbonds.select.hbond_list [j] );
+        ihbond = &( hbonds.select.hbond_list[j] );
         nbr = ihbond->nbr;
 
         nbrstart = Dev_Start_Index( my_atoms[nbr].Hindex, &hbonds );
@@ -1012,7 +1039,7 @@ CUDA_GLOBAL void New_fix_sym_hbond_indices( reax_atom *my_atoms, reax_list hbond
 
         for ( k = nbrstart; k < nbrend; k++ )
         {
-            jhbond = &( hbonds.select.hbond_list [k] );
+            jhbond = &( hbonds.select.hbond_list[k] );
 
             if ( jhbond->nbr == i )
             {
@@ -1040,8 +1067,9 @@ CUDA_GLOBAL void k_update_bonds( reax_atom *my_atoms, reax_list bonds, int n )
         return;
     }
 
-    my_atoms[i].num_bonds = 
-        MAX( Dev_Num_Entries(i, &bonds) * 2, MIN_BONDS );
+//    my_atoms[i].num_bonds = 
+//        MAX( Dev_Num_Entries(i, &bonds) * 2, MIN_BONDS );
+    my_atoms[i].num_bonds = Dev_Num_Entries( i, &bonds );
 }
 
 
@@ -1058,8 +1086,9 @@ CUDA_GLOBAL void k_update_hbonds( reax_atom *my_atoms, reax_list hbonds, int n )
     }
 
     Hindex = my_atoms[i].Hindex;
-    my_atoms[i].num_hbonds = 
-        MAX( Dev_Num_Entries(Hindex, &hbonds) * SAFER_ZONE, MIN_HBONDS );
+//    my_atoms[i].num_hbonds = 
+//        MAX( Dev_Num_Entries(Hindex, &hbonds) * SAFER_ZONE, MIN_HBONDS );
+    my_atoms[i].num_hbonds = Dev_Num_Entries( Hindex, &hbonds );
 }
 ////////////////////////
 ////////////////////////
@@ -1076,10 +1105,8 @@ int Cuda_Validate_Lists( reax_system *system, storage *workspace,
     int *thbody;
     reax_list *bonds, *hbonds;
     reallocate_data *realloc;
-    int max_sp_entries, num_hbonds, num_bonds;
+    int max_sp_entries;
     int total_sp_entries;
-    int max_hbonds;
-    real *spad = (real *) scratch;
 
     realloc = &( dev_workspace->realloc );
     blocks = system->n / DEF_BLOCK_SIZE + 
@@ -1097,8 +1124,7 @@ int Cuda_Validate_Lists( reax_system *system, storage *workspace,
     if ( control->hbond_cut > 0.0 && system->numH > 0 )
     {
         k_update_hbonds <<< blocks, DEF_BLOCK_SIZE >>>
-            (system->d_my_atoms, *(*lists + HBONDS), 
-             system->n);
+            ( system->d_my_atoms, *(*lists + HBONDS), system->n );
         cudaThreadSynchronize( );
         cudaCheckError( );
     }
@@ -1166,68 +1192,6 @@ int Cuda_Validate_Lists( reax_system *system, storage *workspace,
     }
     fprintf( stderr, "        [sparse_matrix: %d]\n", ret );
 
-    /* validate Hbonds list */
-    num_hbonds = 0;
-    // FIX - 4 - added additional check here
-    if ( numH > 0 && control->hbond_cut > 0.0 )
-    {
-        hbonds = *lists + HBONDS;
-        memset( host_scratch, 0, 2 * hbonds->n * sizeof(int) );
-        index = (int *) host_scratch;
-        end_index = index + hbonds->n;
-
-        copy_host_device( index, hbonds->index, hbonds->n * sizeof(int), 
-                cudaMemcpyDeviceToHost, "hbonds:index" );
-        copy_host_device( end_index, hbonds->end_index, hbonds->n * sizeof(int), 
-                cudaMemcpyDeviceToHost, "hbonds:end_index" );
-
-        /*
-           for (i = 0; i < N-1; i++) {
-           Hindex = my_atoms [i].Hindex;
-           if (Hindex > -1) 
-           comp = index [Hindex + 1];
-           else
-           comp = hbonds->num_intrs;
-
-           if (end_index [Hindex] > comp) {
-           fprintf(stderr,"step%d-atom:%d hbondchk failed: H=%d start(H)=%d end(H)=%d str(H+1)=%d\n",
-           step, i, Hindex, index[Hindex], end_index[Hindex], comp );
-           return FAILURE;
-           }
-
-           num_hbonds += MAX( (end_index [Hindex] - index [Hindex]) * 2, MIN_HBONDS * 2);
-           }
-           if (end_index [my_atoms[i].Hindex] > hbonds->num_intrs) {
-           fprintf(stderr,"step%d-atom:%d hbondchk failed: H=%d start(H)=%d end(H)=%d num_intrs=%d\n",
-           step, i, Hindex, index[Hindex], end_index[Hindex], hbonds->num_intrs);
-           return FAILURE;
-           }
-
-           num_hbonds += MIN( (end_index [my_atoms[i].Hindex] - index [my_atoms[i].Hindex]) * 2, 
-           2 * MIN_HBONDS);
-           num_hbonds = MAX( num_hbonds, MIN_CAP*MIN_HBONDS );
-           realloc->num_hbonds = num_hbonds;
-         */
-
-        max_hbonds = 0;
-        for ( i = 0; i < system->N; i++ )
-        {
-            if ( end_index[i] - index[i] >= system->max_hbonds )
-            {
-                //TODO: update
-//                fprintf( stderr, "step%d-hbondchk failed: i=%d start(i)=%d end(i)=%d max_hbonds=%d\n",
-//                        step, i, index[i], end_index[i], system->max_hbonds );
-//                return FAILURE;
-            }
-            if ( end_index[i] - index[i] >= max_hbonds )
-            {
-                max_hbonds = end_index[i] - index[i];
-            }
-        }
-        realloc->num_hbonds = max_hbonds;
-    }
-    fprintf( stderr, "        [hbonds: %d]\n", ret );
-
     /* 3bodies list: since a more accurate estimate of the num.
      * of three body interactions requires that bond orders have
      * been computed, delay validation until for computation */
@@ -1304,13 +1268,10 @@ int Cuda_Init_Forces( reax_system *system, control_params *control,
         simulation_data *data, storage *workspace,
         reax_list **lists, output_controls *out_control ) 
 {
-    int i, ret, Htop, *hb_top;
+    int i, ret, Htop;
     int blocks, hblocks;
 
-    hb_top = (int*) host_scratch;
-
-    ret = Cuda_Estimate_Storages( system, control, dev_lists, &Htop,
-            hb_top, data->step );
+    ret = Cuda_Estimate_Storages( system, control, dev_lists, &Htop, data->step );
 
     if ( ret == SUCCESS )
     {
@@ -1332,6 +1293,13 @@ int Cuda_Init_Forces( reax_system *system, control_params *control,
 //            ( system->d_my_atoms, *(*dev_lists + FAR_NBRS), *(*dev_lists + BONDS),
 //              dev_workspace->total_bond_order, system->N );
 //        cudaThreadSynchronize( );
+//        cudaCheckError( );
+
+//        k_print_hbond_info <<< blocks, DEF_BLOCK_SIZE >>>
+//            ( system->d_my_atoms, system->reax_param.d_sbp,
+//              (control_params *)control->d_control_params,
+//              *(*dev_lists + HBONDS), system->N );
+//        cudaThreadSynchronize( );
 //        cudaCheckError( );
 
         k_init_forces <<< blocks, DEF_BLOCK_SIZE >>>
@@ -1342,7 +1310,7 @@ int Cuda_Init_Forces( reax_system *system, control_params *control,
               *(*dev_lists + HBONDS), d_LR, system->n,
               system->N, system->reax_param.num_atom_types,
               system->max_sparse_entries, (((data->step-data->prev_steps) %
-                      control->reneighbor) == 0), system->max_hbonds );
+                      control->reneighbor) == 0) );
         cudaThreadSynchronize( );
         cudaCheckError( );
 
@@ -1358,7 +1326,7 @@ int Cuda_Init_Forces( reax_system *system, control_params *control,
         if ( control->hbond_cut > 0 && system->numH > 0 )
         {
             /* make hbond_list symmetric */
-            hblocks = (system->N * HB_KER_SYM_THREADS_PER_ATOM) / HB_SYM_BLOCK_SIZE + 
+            hblocks = (system->N * HB_KER_SYM_THREADS_PER_ATOM / HB_SYM_BLOCK_SIZE) + 
                 ((((system->N * HB_KER_SYM_THREADS_PER_ATOM) % HB_SYM_BLOCK_SIZE) == 0) ? 0 : 1);
 
             New_fix_sym_hbond_indices <<< hblocks, HB_BLOCK_SIZE >>>
@@ -1397,7 +1365,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
         simulation_data *data, storage *workspace, 
         reax_list **lists, output_controls *out_control )
 {
-    int i, hbs, hnbrs_bl, ret;
+    int i, hbs, hnbrs_blocks, ret;
     int *thbody;
     static int compute_bonded_part1 = FALSE;
     real t_start, t_elapsed;
@@ -1446,6 +1414,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
                 cudaGetLastError( ), t_elapsed );
         fprintf( stderr, "Cuda_Calculate_Bond_Orders Done... \n" );
 #endif
+        fprintf( stderr, "      [CUDA_COMPUTE_BONDED_FORCES:BO]\n" );
 
         /* 2. Bond Energy Interactions */
         t_start = Get_Time( );
@@ -1469,6 +1438,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
                 cudaGetLastError( ), t_elapsed );
         fprintf( stderr, "Cuda_Bond_Energy Done... \n" );
 #endif
+        fprintf( stderr, "      [CUDA_COMPUTE_BONDED_FORCES:Bond E]\n" );
 
         /* 3. Atom Energy Interactions */
         t_start = Get_Time( );
@@ -1512,6 +1482,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
                 cudaGetLastError( ), t_elapsed );
         fprintf( stderr, "test_LonePair_postprocess Done... \n");
 #endif
+        fprintf( stderr, "      [CUDA_COMPUTE_BONDED_FORCES:Atom E]\n" );
 
         compute_bonded_part1 = TRUE;
     }
@@ -1589,6 +1560,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
                 t_elapsed );
         fprintf( stderr, "Three_Body_Interactions Done... \n" );
 #endif
+        fprintf( stderr, "      [CUDA_COMPUTE_BONDED_FORCES:Valence Angles]\n" );
 
         /* 5. Torsion Angles Interactions */
         t_start = Get_Time( );
@@ -1642,6 +1614,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
                 cudaGetLastError( ), t_elapsed );
         fprintf( stderr, " Four_Body_ Done... \n");
 #endif
+        fprintf( stderr, "      [CUDA_COMPUTE_BONDED_FORCES:Torsion]\n" );
 
         /* 6. Hydrogen Bonds Interactions */
         // FIX - 4 - Added additional check here
@@ -1651,12 +1624,12 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
             cuda_memset( spad, 0,
                     2 * sizeof(real) * system->n + sizeof(rvec) * system->n * 2, "scratch" );
 
-            hbs = ((system->n * HB_KER_THREADS_PER_ATOM)/ HB_BLOCK_SIZE) + 
+            hbs = (system->n * HB_KER_THREADS_PER_ATOM / HB_BLOCK_SIZE) + 
                 (((system->n * HB_KER_THREADS_PER_ATOM) % HB_BLOCK_SIZE) == 0 ? 0 : 1);
 
+//            Cuda_Hydrogen_Bonds <<< BLOCKS, BLOCK_SIZE >>>
             Cuda_Hydrogen_Bonds_MT <<< hbs, HB_BLOCK_SIZE, 
                     HB_BLOCK_SIZE * (2 * sizeof(real) + 2 * sizeof(rvec)) >>>
-//            Cuda_Hydrogen_Bonds <<< BLOCKS, BLOCK_SIZE>>>
                     ( system->d_my_atoms, system->reax_param.d_sbp,
                       system->reax_param.d_hbp, system->reax_param.d_gp,
                       (control_params *) control->d_control_params,
@@ -1665,6 +1638,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
                       spad, (rvec *) (spad + 2 * system->n) );
             cudaThreadSynchronize( );
             cudaCheckError( );
+            fprintf( stderr, "      [CUDA_COMPUTE_BONDED_FORCES:Hbonds]\n" );
 
             /* reduction for E_HB */
             Cuda_Reduction_Sum( spad,
@@ -1692,17 +1666,20 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
                    *(*dev_lists + BONDS), system->N );
             cudaThreadSynchronize( );
             cudaCheckError( );
+            fprintf( stderr, "      [CUDA_COMPUTE_BONDED_FORCES:Hbonds_PostProcess]\n" );
 
             /* post process step2 */
+            hnbrs_blocks = (system->N * HB_POST_PROC_KER_THREADS_PER_ATOM / HB_POST_PROC_BLOCK_SIZE) +
+                (((system->N * HB_POST_PROC_KER_THREADS_PER_ATOM) % HB_POST_PROC_BLOCK_SIZE) == 0 ? 0 : 1);
+
 //            Cuda_Hydrogen_Bonds_HNbrs <<< system->N, 32, 32 * sizeof(rvec) >>>
 //                ( system->d_my_atoms, *dev_workspace, *(*dev_lists + HBONDS) );
-            hnbrs_bl = ((system->N * HB_POST_PROC_KER_THREADS_PER_ATOM)/ HB_POST_PROC_BLOCK_SIZE) +
-                (((system->N * HB_POST_PROC_KER_THREADS_PER_ATOM) % HB_POST_PROC_BLOCK_SIZE) == 0 ? 0 : 1);
-            Cuda_Hydrogen_Bonds_HNbrs_BL <<< hnbrs_bl, HB_POST_PROC_BLOCK_SIZE, 
+            Cuda_Hydrogen_Bonds_HNbrs_BL <<< hnbrs_blocks, HB_POST_PROC_BLOCK_SIZE, 
                     HB_POST_PROC_BLOCK_SIZE * sizeof(rvec) >>>
                 ( system->d_my_atoms, *dev_workspace, *(*dev_lists + HBONDS), system->N );
             cudaThreadSynchronize( );
             cudaCheckError( );
+            fprintf( stderr, "      [CUDA_COMPUTE_BONDED_FORCES:Hbonds_HNbrs]\n" );
 
             t_elapsed = Get_Timing_Info( t_start );
 
diff --git a/PG-PuReMD/src/cuda_forces.h b/PG-PuReMD/src/cuda_forces.h
index f0340a83..a4ae2655 100644
--- a/PG-PuReMD/src/cuda_forces.h
+++ b/PG-PuReMD/src/cuda_forces.h
@@ -11,7 +11,7 @@ extern "C" {
 
 
 int Cuda_Estimate_Storages( reax_system *, control_params *, reax_list **,
-        int *, int *, int );
+        int *, int );
 
 int Cuda_Estimate_Storage_Three_Body( reax_system *, control_params *,
         int, reax_list **, int *, int * );
diff --git a/PG-PuReMD/src/cuda_hydrogen_bonds.cu b/PG-PuReMD/src/cuda_hydrogen_bonds.cu
index 3adafa09..89682a39 100644
--- a/PG-PuReMD/src/cuda_hydrogen_bonds.cu
+++ b/PG-PuReMD/src/cuda_hydrogen_bonds.cu
@@ -19,10 +19,11 @@
   <http://www.gnu.org/licenses/>.
   ----------------------------------------------------------------------*/
 
+#include "cuda_hydrogen_bonds.h"
+
 #include "reax_types.h"
 #include "index_utils.h"
 
-#include "cuda_hydrogen_bonds.h"
 #include "cuda_valence_angles.h"
 #include "cuda_helpers.h"
 #include "cuda_list.h"
@@ -41,13 +42,11 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
     int start_j, end_j, hb_start_j, hb_end_j;
     int hblist[MAX_BONDS];
     int itr, top;
-    int num_hb_intrs;
     ivec rel_jk;
     real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2;
     real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3;
     rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk;
     rvec dvec_jk, force, ext_press;
-    // rtensor temp_rtensor, total_rtensor;
     hbond_parameters *hbp;
     bond_order_data *bo_ij;
     bond_data *pbond_ij;
@@ -56,6 +55,9 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
     bond_data *bond_list;
     hbond_data *hbond_list, *hbond_jk;
     storage *workspace;
+#if defined(DEBUG)
+    int num_hb_intrs;
+#endif
 
     j = blockIdx.x * blockDim.x + threadIdx.x;
 
@@ -66,28 +68,29 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
 
     bonds = &( p_bonds );
     bond_list = bonds->select.bond_list;
-    hbonds = & ( p_hbonds );
+    hbonds = &( p_hbonds );
     hbond_list = hbonds->select.hbond_list;
     workspace = &( p_workspace );
+#if defined(DEBUG)
     num_hb_intrs = 0;
+#endif
 
     /* loops below discover the Hydrogen bonds between i-j-k triplets.
-       here j is H atom and there has to be some bond between i and j.
-       Hydrogen bond is between j and k.
-       so in this function i->X, j->H, k->Z when we map 
-       variables onto the ones in the handout.*/
+     * here j is H atom and there has to be some bond between i and j.
+     * Hydrogen bond is between j and k.
+     * so in this function i->X, j->H, k->Z when we map 
+     * variables onto the ones in the handout.*/
     //for( j = 0; j < system->n; ++j )
-    /* j has to be of type H */
-    if ( sbp[ my_atoms[j].type ].p_hbond == 1 )
+    if ( sbp[ my_atoms[j].type ].p_hbond == H_ATOM )
     {
-        /*set j's variables */
         type_j = my_atoms[j].type;
-        start_j = Dev_Start_Index(j, bonds);
-        end_j = Dev_End_Index(j, bonds);
+        start_j = Dev_Start_Index( j, bonds );
+        end_j = Dev_End_Index( j, bonds );
         hb_start_j = Dev_Start_Index( my_atoms[j].Hindex, hbonds );
         hb_end_j = Dev_End_Index( my_atoms[j].Hindex, hbonds );
 
         top = 0;
+        /* search bonded atoms to atom j (i.e., hydrogen atom) for potential hydrogen bonding */
         for( pi = start_j; pi < end_j; ++pi )
         {
             pbond_ij = &( bond_list[pi] );
@@ -95,16 +98,17 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
             bo_ij = &(pbond_ij->bo_data);
             type_i = my_atoms[i].type;
 
-            if( sbp[type_i].p_hbond == 2 && 
+            if( sbp[type_i].p_hbond == H_BONDING_ATOM && 
                     bo_ij->BO >= HB_THRESHOLD )
             {
                 hblist[top++] = pi;
             }
         }
 
-        // fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n", 
-        //          j, top, hb_start_j, hb_end_j );
+//        fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n",
+//                j, top, hb_start_j, hb_end_j );
 
+        /* for each hbond of atom j */
         for ( pk = hb_start_j; pk < hb_end_j; ++pk )
         {
             /* set k's varibles */
@@ -114,9 +118,10 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
             r_jk = nbr_jk->d;
             rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec );
 
-            hbond_jk = &( hbond_list [pk] );
-            rvec_MakeZero (hbond_jk->hb_f);
+            hbond_jk = &( hbond_list[pk] );
+            rvec_MakeZero( hbond_jk->hb_f );
 
+            /* find matching hbond to atom k */
             for ( itr = 0; itr < top; ++itr )
             {
                 pi = hblist[itr];
@@ -129,7 +134,10 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
                     type_i = my_atoms[i].type;
                     r_ij = pbond_ij->d;         
                     hbp = &(d_hbp[ index_hbp (type_i,type_j,type_k,num_atom_types) ]);
+
+#if defined(DEBUG)
                     ++num_hb_intrs;
+#endif
 
                     Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk,
                             &theta, &cos_theta );
@@ -139,7 +147,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
                             &dcos_theta_dk );
 
                     /* hydrogen bond energy */
-                    sin_theta2 = SIN( theta/2.0 );
+                    sin_theta2 = SIN( theta / 2.0 );
                     sin_xhz4 = SQR(sin_theta2);
                     sin_xhz4 *= sin_xhz4;
                     cos_xhz1 = ( 1.0 - cos_theta );
@@ -169,7 +177,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
                     {
                         // dcos terms
                         //rvec_ScaledAdd( workspace->f[i], +CEhb2, dcos_theta_di ); 
-                        //atomic_rvecScaledAdd (workspace->f[i], +CEhb2, dcos_theta_di );
+                        //atomic_rvecScaledAdd( workspace->f[i], +CEhb2, dcos_theta_di );
                         rvec_ScaledAdd( pbond_ij->hb_f, +CEhb2, dcos_theta_di ); 
 
                         rvec_ScaledAdd( workspace->f[j], +CEhb2, dcos_theta_dj );
@@ -188,12 +196,12 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
                     else
                     {
                         /* for pressure coupling, terms that are not related to bond order
-                           derivatives are added directly into pressure vector/tensor */
+                         * derivatives are added directly into pressure vector/tensor */
                         rvec_Scale( force, +CEhb2, dcos_theta_di ); // dcos terms
                         //rvec_Add( workspace->f[i], force );
                         rvec_Add( pbond_ij->hb_f, force );
                         rvec_iMultiply( ext_press, pbond_ij->rel_box, force );
-                        rvec_ScaledAdd( data_ext_press [j], 1.0, ext_press );
+                        rvec_ScaledAdd( data_ext_press[j], 1.0, ext_press );
 
                         rvec_ScaledAdd( workspace->f[j], +CEhb2, dcos_theta_dj );
 
@@ -206,7 +214,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter
                         // dr terms
                         rvec_ScaledAdd( workspace->f[j], -CEhb3/r_jk, dvec_jk ); 
 
-                        rvec_Scale( force, CEhb3/r_jk, dvec_jk );
+                        rvec_Scale( force, CEhb3 / r_jk, dvec_jk );
                         //rvec_Add( workspace->f[k], force );
                         rvec_Add( hbond_jk->hb_f, force );
                         rvec_iMultiply( ext_press, rel_jk, force );
@@ -267,20 +275,19 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame
     rvec *sh_atomf = (rvec *)(sh_cdbo + blockDim.x);
     rvec *sh_hf = (rvec *) (sh_atomf + blockDim.x);
 #endif
-    int __THREADS_PER_ATOM__, thread_id, warp_id, lane_id; 
-    int  i, j, k, pi, pk;
-    int  type_i, type_j, type_k;
-    int  start_j, end_j, hb_start_j, hb_end_j;
-    int  hblist[MAX_BONDS];
-    int  itr, top;
-    int  num_hb_intrs;
+    int __THREADS_PER_ATOM__, thread_id, group_id, lane_id; 
+    int i, j, k, pi, pk;
+    int type_i, type_j, type_k;
+    int start_j, end_j, hb_start_j, hb_end_j;
+    //TODO: re-write and remove
+    int hblist[MAX_BONDS];
+    int itr, top;
     int loopcount, count;
     ivec rel_jk;
     real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2;
     real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3;
     rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk;
     rvec dvec_jk, force, ext_press;
-    // rtensor temp_rtensor, total_rtensor;
     hbond_parameters *hbp;
     bond_order_data *bo_ij;
     bond_data *pbond_ij;
@@ -289,24 +296,29 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame
     bond_data *bond_list;
     hbond_data *hbond_list, *hbond_jk;
     storage *workspace;
+#if defined(DEBUG)
+    int num_hb_intrs;
+#endif
 
     __THREADS_PER_ATOM__ = HB_KER_THREADS_PER_ATOM;
     thread_id = blockIdx.x * blockDim.x + threadIdx.x;
-    warp_id = thread_id / __THREADS_PER_ATOM__;
+    group_id = thread_id / __THREADS_PER_ATOM__;
     lane_id = thread_id & (__THREADS_PER_ATOM__ - 1); 
 
-    if ( warp_id >= n )
+    if ( group_id >= n )
     {
         return;
     }
 
-    num_hb_intrs = 0;
     workspace = &( p_workspace );
     bonds = &( p_bonds );
     bond_list = bonds->select.bond_list;
-    hbonds = & ( p_hbonds );
+    hbonds = &( p_hbonds );
     hbond_list = hbonds->select.hbond_list;
-    j = warp_id;
+    j = group_id;
+#if defined(DEBUG)
+    num_hb_intrs = 0;
+#endif
 
     /* loops below discover the Hydrogen bonds between i-j-k triplets.
        here j is H atom and there has to be some bond between i and j.
@@ -316,7 +328,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame
     //for( j = 0; j < system->n; ++j )
 
 #if defined( __SM_35__)
-    sh_hb  = 0;
+    sh_hb = 0;
     rvec_MakeZero( sh_atomf );
 #else
     sh_hb[threadIdx.x] = 0;
@@ -324,7 +336,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame
 #endif
 
     /* j has to be of type H */
-    if ( sbp[ my_atoms[j].type ].p_hbond == 1 )
+    if ( sbp[ my_atoms[j].type ].p_hbond == H_ATOM )
     {
         /* set j's variables */
         type_j = my_atoms[j].type;
@@ -341,7 +353,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame
             bo_ij = &(pbond_ij->bo_data);
             type_i = my_atoms[i].type;
 
-            if ( sbp[type_i].p_hbond == 2 && 
+            if ( sbp[type_i].p_hbond == H_BONDING_ATOM && 
                     bo_ij->BO >= HB_THRESHOLD )
             {
                 hblist[top++] = pi;
@@ -396,7 +408,10 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame
                     type_i = my_atoms[i].type;
                     r_ij = pbond_ij->d;         
                     hbp = &(d_hbp[ index_hbp(type_i,type_j,type_k,num_atom_types) ]);
+
+#if defined(DEBUG)
                     ++num_hb_intrs;
+#endif
 
                     Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk,
                             &theta, &cos_theta );
@@ -405,7 +420,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame
                             &dcos_theta_di, &dcos_theta_dj, &dcos_theta_dk );
 
                     /* hydrogen bond energy */
-                    sin_theta2 = SIN( theta/2.0 );
+                    sin_theta2 = SIN( theta / 2.0 );
                     sin_xhz4 = SQR(sin_theta2);
                     sin_xhz4 *= sin_xhz4;
                     cos_xhz1 = ( 1.0 - cos_theta );
@@ -598,10 +613,10 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_PostProcess( reax_atom *atoms,
         storage p_workspace, reax_list p_bonds, int N )
 {
     int i, pj;
-    storage *workspace = &( p_workspace );
+    storage *workspace;
     bond_data *pbond;
     bond_data *sym_index_bond;
-    reax_list *bonds = &p_bonds;
+    reax_list *bonds;
 
     i = blockIdx.x * blockDim.x + threadIdx.x;
 
@@ -610,10 +625,13 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_PostProcess( reax_atom *atoms,
         return;
     }
 
+    workspace = &p_workspace;
+    bonds = &p_bonds;
+
     for ( pj = Dev_Start_Index(i, bonds); pj < Dev_End_Index(i, bonds); ++pj )
     {
         pbond = &(bonds->select.bond_list[pj]);
-        sym_index_bond = &( bonds->select.bond_list[ pbond->sym_index ] );
+        sym_index_bond = &( bonds->select.bond_list[pbond->sym_index] );
 
         //rvec_Add( atoms[i].f, sym_index_bond->hb_f );
         rvec_Add( workspace->f[i], sym_index_bond->hb_f );
@@ -631,11 +649,13 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_HNbrs( reax_atom *atoms,
 #endif
     int i, pj,j;
     int start, end;
-    storage *workspace = &( p_workspace );
+    storage *workspace;
     hbond_data *nbr_pj, *sym_index_nbr;
-    reax_list *hbonds = &p_hbonds;
+    reax_list *hbonds;
 
     i = blockIdx.x;
+    workspace = &p_workspace;
+    hbonds = &p_hbonds;
 
     start = Dev_Start_Index( i, hbonds );
     end = Dev_End_Index( i, hbonds );
@@ -710,25 +730,33 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_HNbrs_BL( reax_atom *atoms,
 {
 #if defined(__SM_35__)
     rvec __f;
+    int s;
 #else
     extern __shared__ rvec __f[];
 #endif
     int i, pj,j;
     int start, end;
-    storage *workspace = &( p_workspace );
+    storage *workspace;
     hbond_data *nbr_pj, *sym_index_nbr;
-    reax_list *hbonds = &p_hbonds;
-    int __THREADS_PER_ATOM__ = HB_POST_PROC_KER_THREADS_PER_ATOM;
-    int thread_id = blockIdx.x * blockDim.x + threadIdx.x;
-    int warp_id = thread_id / __THREADS_PER_ATOM__;
-    int lane_id = thread_id & (__THREADS_PER_ATOM__ -1); 
+    reax_list *hbonds;
+    int __THREADS_PER_ATOM__;
+    int thread_id;
+    int group_id;
+    int lane_id; 
 
-    if ( warp_id >= N )
+    __THREADS_PER_ATOM__ = HB_POST_PROC_KER_THREADS_PER_ATOM;
+    thread_id = blockIdx.x * blockDim.x + threadIdx.x;
+    group_id = thread_id / __THREADS_PER_ATOM__;
+    lane_id = thread_id & (__THREADS_PER_ATOM__ - 1);
+
+    if ( group_id >= N )
     {
         return;
     }
 
-    i = warp_id;
+    workspace = &( p_workspace );
+    hbonds = &p_hbonds;
+    i = group_id;
     start = Dev_Start_Index( i, hbonds );
     end = Dev_End_Index( i, hbonds );
     pj = start + lane_id;
@@ -745,16 +773,16 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_HNbrs_BL( reax_atom *atoms,
 
         sym_index_nbr = &(hbonds->select.hbond_list[ nbr_pj->sym_index ]);
 #if defined(__SM_35__)
-        rvec_Add ( __f, sym_index_nbr->hb_f );
+        rvec_Add( __f, sym_index_nbr->hb_f );
 #else
-        rvec_Add ( __f[threadIdx.x], sym_index_nbr->hb_f );
+        rvec_Add( __f[threadIdx.x], sym_index_nbr->hb_f );
 #endif
 
         pj += __THREADS_PER_ATOM__;
     }
 
 #if defined(__SM_35__)
-    for ( int s = __THREADS_PER_ATOM__ >> 1; s >= 1; s/=2 )
+    for ( s = __THREADS_PER_ATOM__ >> 1; s >= 1; s /= 2 )
     {
         __f[0] += shfl( __f[0], s );
         __f[1] += shfl( __f[1], s );
diff --git a/PG-PuReMD/src/cuda_neighbors.cu b/PG-PuReMD/src/cuda_neighbors.cu
index 18af354b..54f6d021 100644
--- a/PG-PuReMD/src/cuda_neighbors.cu
+++ b/PG-PuReMD/src/cuda_neighbors.cu
@@ -344,7 +344,8 @@ CUDA_GLOBAL void k_mt_generate_neighbor_lists( reax_atom *my_atoms,
                 {
                     //do leader selection here
                     leader = -1;
-                    for ( ll = my_bucket *__THREADS_PER_ATOM__; ll < (my_bucket)*__THREADS_PER_ATOM__ + __THREADS_PER_ATOM__; ll++ )
+                    for ( ll = my_bucket *__THREADS_PER_ATOM__;
+                            ll < (my_bucket) * __THREADS_PER_ATOM__ + __THREADS_PER_ATOM__; ll++ )
                     {
                         if ( tnbr[ll] )
                         {
@@ -541,7 +542,7 @@ CUDA_GLOBAL void k_estimate_neighbors( reax_atom *my_atoms,
             ivec_Copy( nbrs_x, g.nbrs_x[index_grid_nbrs(i, j, k, itr, &g)] );
 
             if ( //(g.str[index_grid_3d(i, j, k, &g)] <= g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]) &&  
-                    Dev_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs(i, j, k, itr, &g)],atom1->x) <= cutoff ) 
+                    Dev_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs(i, j, k, itr, &g)], atom1->x) <= cutoff ) 
             {
                 /* pick up another atom from the neighbor cell */
                 for ( m = g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; 
@@ -637,8 +638,8 @@ int Cuda_Estimate_Neighbors( reax_system *system, int step )
 
     k_estimate_neighbors <<< blocks, DEF_BLOCK_SIZE >>>
         ( system->d_my_atoms, system->my_ext_box, system->d_my_grid,
-          system->n, system->N, system->total_cap, system->d_far_nbrs,
-          system->d_max_far_nbrs, system->d_realloc_far_nbrs );
+          system->n, system->N, system->total_cap,
+          system->d_far_nbrs, system->d_max_far_nbrs, system->d_realloc_far_nbrs );
     cudaThreadSynchronize( );
     cudaCheckError( );
 
@@ -680,6 +681,103 @@ CUDA_GLOBAL void k_init_end_index( int * intr_cnt, int *indices, int *end_indice
 }
 
 
+CUDA_GLOBAL void k_setup_hindex( reax_atom *my_atoms, int N )
+{
+    int i;
+
+    i = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if ( i >= N )
+    {
+        return;
+    }
+
+    my_atoms[i].Hindex = i;
+}
+
+
+CUDA_GLOBAL void k_setup_hindex_part1( reax_atom *my_atoms, int * hindex, int n )
+{
+    int i;
+
+    i = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if ( i >= n )
+    {
+        return;
+    }
+
+    hindex[i] = my_atoms[i].Hindex;
+}
+
+
+CUDA_GLOBAL void k_setup_hindex_part2( reax_atom *my_atoms, int * hindex, int n )
+{
+    int i;
+
+    i = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if ( i >= n )
+    {
+        return;
+    }
+
+    if ( hindex[i + 1] - hindex[i] > 0 )
+    {
+        my_atoms[i].Hindex = hindex[i];
+    }
+    else
+    {
+        my_atoms[i].Hindex = -1;
+    }
+}
+
+
+CUDA_GLOBAL void k_init_hbond_indices( reax_atom * atoms, single_body_parameters *sbp,
+        int *hbonds, int *max_hbonds, int *indices, int *end_indices, int N )
+{
+    int i, hindex, my_hbonds;
+
+    i = blockIdx.x * blockDim.x  + threadIdx.x;
+
+    if ( i >= N )
+    {
+        return;
+    }
+
+    hindex = atoms[i].Hindex;
+
+    if ( sbp[ atoms[i].type ].p_hbond == H_ATOM || 
+            sbp[ atoms[i].type ].p_hbond == H_BONDING_ATOM )
+    {
+        my_hbonds = hbonds[i];
+        indices[hindex] = max_hbonds[i];
+        end_indices[hindex] = indices[hindex] + hbonds[i];
+    }
+    else
+    {
+        my_hbonds = 0;
+        indices[hindex] = 0;
+        end_indices[hindex] = 0;
+    }
+    atoms[i].num_hbonds = my_hbonds;
+
+//    hindex = atoms[i].Hindex;
+//
+//    if ( hindex >= 0 )
+//    {
+//        my_hbonds = hbonds[i];
+//        indices[hindex] = max_hbonds[i];
+//        end_indices[hindex] = indices[hindex] + hbonds[i];
+//    }
+//    else
+//    {
+//        my_hbonds = 0;
+//    }
+//    atoms[i].num_hbonds = my_hbonds;
+}
+
+
 /* Initialize indices for far neighbors list post reallocation
  *
  * system: atomic system info. */
@@ -688,10 +786,10 @@ void Cuda_Init_Neighbor_Indices( reax_system *system )
     int blocks;
     reax_list *far_nbrs = *dev_lists + FAR_NBRS;
 
-    /* init indinces */
+    /* init indices */
     Cuda_Scan_Excl_Sum( system->d_max_far_nbrs, far_nbrs->index, system->total_cap );
 
-    /* init end_indinces */
+    /* init end_indices */
     blocks = system->N / DEF_BLOCK_SIZE + 
         ((system->N % DEF_BLOCK_SIZE == 0) ? 0 : 1);
     k_init_end_index <<< blocks, DEF_BLOCK_SIZE >>>
@@ -701,23 +799,48 @@ void Cuda_Init_Neighbor_Indices( reax_system *system )
 }
 
 
-void Cuda_Init_HBond_Indices( int *indices, int entries )
+void Cuda_Init_HBond_Indices( reax_system *system )
 {
-    int i;
+    int blocks;
+    int *temp;
     reax_list *hbonds = *dev_lists + HBONDS;
 
-    for ( i = 1 ; i < entries; i++ )
-    {
-        indices[i] += indices[i - 1];
-    }
+    temp = (int *) scratch;
+//    cuda_memset( temp, 0, 2 * (system->N + 1) * sizeof(int), 
+//            "Cuda_Init_HBond_Indices::temp" );
+
+    /* init Hindices */
+    blocks = system->N / DEF_BLOCK_SIZE + 
+        ((system->N % DEF_BLOCK_SIZE == 0) ? 0 : 1);
+
+    k_setup_hindex <<< blocks, DEF_BLOCK_SIZE >>>
+        ( system->d_my_atoms, system->N );
+    cudaThreadSynchronize( );
+    cudaCheckError( );
 
-//    Cuda_Scan_Excl_Sum( indices, hbonds->index, entries );
-//    //TODO: kernel to compute end_index[i] += index[i]
+//    blocks = system->n / DEF_BLOCK_SIZE + 
+//        ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1);
+//
+//    k_setup_hindex_part1 <<< blocks, DEF_BLOCK_SIZE >>>
+//        ( system->d_my_atoms, temp, system->n );
+//    cudaThreadSynchronize( );
+//    cudaCheckError( );
+//
+//    Cuda_Scan_Excl_Sum( temp, temp + system->n + 1, system->n + 1 );
+//
+//    k_setup_hindex_part2 <<< blocks, DEF_BLOCK_SIZE >>>
+//        ( system->d_my_atoms, temp + system->n + 1, system->n );
+//    cudaThreadSynchronize( );
+//    cudaCheckError( );
+
+    /* init indices and end_indices */
+    Cuda_Scan_Excl_Sum( system->d_max_hbonds, temp, system->total_cap );
 
-    copy_host_device( indices, hbonds->index + 1, (entries - 1) * sizeof(int), 
-            cudaMemcpyHostToDevice, "dev_hbonds:index" );
-    copy_host_device( indices, hbonds->end_index, entries * sizeof(int), 
-            cudaMemcpyHostToDevice, "dev_hbonds:end_index" );
+    k_init_hbond_indices <<< blocks, DEF_BLOCK_SIZE >>>
+        ( system->d_my_atoms, system->reax_param.d_sbp, system->d_hbonds, temp, 
+          hbonds->index, hbonds->end_index, system->N );
+    cudaThreadSynchronize( );
+    cudaCheckError( );
 }
 
 
@@ -726,10 +849,10 @@ void Cuda_Init_Bond_Indices( reax_system *system )
     int blocks;
     reax_list *bonds = *dev_lists + BONDS;
 
-    /* init indinces */
+    /* init indices */
     Cuda_Scan_Excl_Sum( system->d_max_bonds, bonds->index, system->total_cap );
 
-    /* init end_indinces */
+    /* init end_indices */
     blocks = system->N / DEF_BLOCK_SIZE + 
         ((system->N % DEF_BLOCK_SIZE == 0) ? 0 : 1);
     k_init_end_index <<< blocks, DEF_BLOCK_SIZE >>>
diff --git a/PG-PuReMD/src/cuda_neighbors.h b/PG-PuReMD/src/cuda_neighbors.h
index 1a1cd72b..d6b0e1d7 100644
--- a/PG-PuReMD/src/cuda_neighbors.h
+++ b/PG-PuReMD/src/cuda_neighbors.h
@@ -16,7 +16,7 @@ int Cuda_Estimate_Neighbors( reax_system *, int );
 
 void Cuda_Init_Neighbor_Indices( reax_system * );
 
-void Cuda_Init_HBond_Indices( int *, int );
+void Cuda_Init_HBond_Indices( reax_system * );
 
 void Cuda_Init_Bond_Indices( reax_system * );
 
diff --git a/PG-PuReMD/src/cuda_reset_tools.cu b/PG-PuReMD/src/cuda_reset_tools.cu
index 47d03674..27cb4580 100644
--- a/PG-PuReMD/src/cuda_reset_tools.cu
+++ b/PG-PuReMD/src/cuda_reset_tools.cu
@@ -1,34 +1,13 @@
 
 #include "cuda_reset_tools.h"
 
-#include "cuda_utils.h"
 #include "cuda_list.h"
+#include "cuda_utils.h"
+#include "cuda_reduction.h"
 
 #include "reset_tools.h"
 
 
-CUDA_GLOBAL void k_reset_hbond_list( reax_atom *my_atoms, 
-        reax_list hbonds, int N )
-{
-    int Hindex;
-    int i;
-    
-    i = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if ( i >= N )
-    {
-        return;
-    }
-
-    Hindex = my_atoms[i].Hindex;
-
-    if ( Hindex > 1 )
-    {
-        Dev_Set_End_Index( Hindex, Dev_Start_Index (Hindex, &hbonds), &hbonds );
-    }
-}
-
-
 extern "C"
 {
 
@@ -45,7 +24,8 @@ void Cuda_Reset_Workspace( reax_system *system, storage *workspace )
 }
 
 
-CUDA_GLOBAL void k_reset_hindex( reax_atom *my_atoms, int N )
+CUDA_GLOBAL void k_reset_hindex( reax_atom *my_atoms, single_body_parameters *sbp,
+        int * hindex, int N )
 {
     int i;
 
@@ -56,95 +36,44 @@ CUDA_GLOBAL void k_reset_hindex( reax_atom *my_atoms, int N )
         return;
     }
 
+    if ( sbp[ my_atoms[i].type ].p_hbond == H_ATOM ||
+      sbp[ my_atoms[i].type ].p_hbond == H_BONDING_ATOM )
+    {
+        hindex[i] = 1;
+    }
+    else
+    {
+        hindex[i] = 0;
+    }
+
+//    my_atoms[i].Hindex = hindex[i];
     my_atoms[i].Hindex = i;
 }
 
 
 void Cuda_Reset_Atoms( reax_system* system, control_params *control )
 {
-    int i;
-    reax_atom *atom;
     int blocks;
+    int *hindex;
 
-    /*
-       if( control->hbond_cut > 0 ) 
-    //TODO
-    for( i = 0; i < system->N; ++i ) { 
-    atom = &(system->my_atoms[i]);
-    //if( system->reax_param.sbp[ atom->type ].p_hbond == 1 ) 
-    atom->Hindex = system->numH++;
-    //else atom->Hindex = -1; 
-    }   
-    //TODO
-     */
-    ////////////////////////////////
-    ////////////////////////////////
-    ////////////////////////////////
-    ////////////////////////////////
-    // FIX - 3 - Commented out this line for Hydrogen Bond fix
-    // FIX - HBOND ISSUE
-    // FIX - HBOND ISSUE
-    // FIX - HBOND ISSUE
-    // COMMENTED OUT THIS LINE BELOW
-    //system->numH = system->N;
-    // FIX - HBOND ISSUE
-    // FIX - HBOND ISSUE
-    // FIX - HBOND ISSUE
-    ////////////////////////////////
-    ////////////////////////////////
-    ////////////////////////////////
-    ////////////////////////////////
-    ////////////////////////////////
+    hindex = (int *) scratch;
+    cuda_memset( scratch, 0, system->N * sizeof(int),
+           "Cuda_Reset_Atoms::scratch" );
 
     blocks = system->N / DEF_BLOCK_SIZE + 
         ((system->N % DEF_BLOCK_SIZE == 0 ) ? 0 : 1);
+
     k_reset_hindex <<< blocks, DEF_BLOCK_SIZE >>>
-        ( system->d_my_atoms, system->N );
+        ( system->d_my_atoms, system->reax_param.d_sbp, hindex + 1, system->N );
     cudaThreadSynchronize( );
     cudaCheckError( );
-}
 
+    Cuda_Reduction_Sum( hindex, system->d_numH, system->N );
 
-int Cuda_Reset_Neighbor_Lists( reax_system *system, control_params *control,
-        storage *workspace, reax_list **lists )
-{
-    int total_hbonds;
-    reax_list *hbonds;
-    int blocks;
+    copy_host_device( &(system->numH), system->d_numH, sizeof(int), 
+            cudaMemcpyDeviceToHost, "Cuda_Reset_Atoms::d_numH" );
 
-    //HBonds processing
-    //FIX - 4 - Added additional check
-    if ( control->hbond_cut > 0.0 && system->numH > 0 )
-    {
-        hbonds = (*dev_lists) + HBONDS;
-        total_hbonds = 0;
-
-        /* reset start-end indexes */
-        //TODO
-        blocks = system->N / DEF_BLOCK_SIZE + 
-            ((system->N % DEF_BLOCK_SIZE == 0 ) ? 0 : 1);
-        k_reset_hbond_list <<< blocks, DEF_BLOCK_SIZE >>>
-            ( system->d_my_atoms, *(*dev_lists + HBONDS), system->N );
-        cudaThreadSynchronize( );
-        cudaCheckError( );
-
-        //TODO compute the total hbonds here
-        total_hbonds = 0;
-
-        /* is reallocation needed? */
-        if ( total_hbonds >= hbonds->num_intrs * DANGER_ZONE )
-        {
-            workspace->realloc.hbonds = 1;
-            if ( total_hbonds >= hbonds->num_intrs )
-            {
-                fprintf( stderr, "p%d: not enough space for hbonds! total=%d allocated=%d\n",
-                        system->my_rank, total_hbonds, hbonds->num_intrs );
-                return FAILURE;
-            }
-        }
-    }
-
-    return SUCCESS;
+    system->Hcap = MAX( system->numH * SAFER_ZONE, MIN_CAP );
 }
 
 
@@ -162,8 +91,6 @@ void Cuda_Reset( reax_system *system, control_params *control,
 
     Cuda_Reset_Workspace( system, workspace );
 
-    Cuda_Reset_Neighbor_Lists( system, control, workspace, lists );
-
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "p%d @ step%d: reset done\n", system->my_rank, data->step );
     MPI_Barrier( MPI_COMM_WORLD );
diff --git a/PG-PuReMD/src/cuda_reset_tools.h b/PG-PuReMD/src/cuda_reset_tools.h
index 4477a188..f158afec 100644
--- a/PG-PuReMD/src/cuda_reset_tools.h
+++ b/PG-PuReMD/src/cuda_reset_tools.h
@@ -10,9 +10,12 @@ extern "C"  {
 
 
 void Cuda_Reset_Workspace( reax_system *, storage * );
+
 void Cuda_Reset_Atoms( reax_system *, control_params * );
+
 int  Cuda_Reset_Neighbor_Lists( reax_system *, control_params *,
         storage *, reax_list ** );
+
 void Cuda_Reset( reax_system*, control_params*, simulation_data*,
         storage*, reax_list** );
 
diff --git a/PG-PuReMD/src/cuda_validation.cu b/PG-PuReMD/src/cuda_validation.cu
index 89bb698d..830b889c 100644
--- a/PG-PuReMD/src/cuda_validation.cu
+++ b/PG-PuReMD/src/cuda_validation.cu
@@ -461,7 +461,7 @@ int validate_hbonds( reax_system *system, storage *workspace,
     sym_count = 0;
     for (int i = 0; i < system->n; i++) {
 
-        if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == 1 )
+        if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == H_ATOM )
         {
             count += End_Index (i, hbonds) - Start_Index (i, hbonds);
             dev_count += d_end [i] - d_start[i];
@@ -485,7 +485,7 @@ int validate_hbonds( reax_system *system, storage *workspace,
     sym_count = 0;
 
     for (int i = system->n; i < system->N; i++) {
-        //if (system->reax_param.sbp[ system->my_atoms[i].type].p_hbond == 2)
+        //if (system->reax_param.sbp[ system->my_atoms[i].type].p_hbond == H_BONDING_ATOM )
         {
             sym_count += d_end[i] - d_start[i];
         }
@@ -504,7 +504,7 @@ int validate_hbonds( reax_system *system, storage *workspace,
            d_end[d_index] - d_start[d_index]);
          */
 
-        if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond != 1 )
+        if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond != H_ATOM )
         {
             /*
                int x;
diff --git a/PG-PuReMD/src/ffield.c b/PG-PuReMD/src/ffield.c
index 019d5cd5..a00e7511 100644
--- a/PG-PuReMD/src/ffield.c
+++ b/PG-PuReMD/src/ffield.c
@@ -64,7 +64,7 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax,
 
     /* reading the number of global parameters */
     n = atoi(tmp[0]);
-    if (n < 1)
+    if ( n < 1 )
     {
         fprintf( stderr, "WARNING: number of globals in ffield file is 0!\n" );
         return 1;
@@ -98,80 +98,41 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax,
     fgets(s, MAX_LINE, fp);
 
     /* Allocating structures in reax_interaction */
-    /*
-    reax->sbp = (single_body_parameters*)
-      scalloc( reax->num_atom_types, sizeof(single_body_parameters), "sbp" );
-    reax->tbp = (two_body_parameters**)
-      scalloc( reax->num_atom_types, sizeof(two_body_parameters*), "tbp" );
-    reax->thbp= (three_body_header***)
-      scalloc( reax->num_atom_types, sizeof(three_body_header**), "thbp" );
-    reax->hbp = (hbond_parameters***)
-      scalloc( reax->num_atom_types, sizeof(hbond_parameters**), "hbp" );
-    reax->fbp = (four_body_header****)
-      scalloc( reax->num_atom_types, sizeof(four_body_header***), "fbp" );
-    tor_flag  = (char****)
-      scalloc( reax->num_atom_types, sizeof(char***), "tor_flag" );
-
-    for( i = 0; i < reax->num_atom_types; i++ ) {
-      reax->tbp[i] = (two_body_parameters*)
-        scalloc( reax->num_atom_types, sizeof(two_body_parameters), "tbp[i]" );
-      reax->thbp[i]= (three_body_header**)
-        scalloc( reax->num_atom_types, sizeof(three_body_header*), "thbp[i]" );
-      reax->hbp[i] = (hbond_parameters**)
-        scalloc( reax->num_atom_types, sizeof(hbond_parameters*), "hbp[i]" );
-      reax->fbp[i] = (four_body_header***)
-        scalloc( reax->num_atom_types, sizeof(four_body_header**), "fbp[i]" );
-      tor_flag[i]  = (char***)
-        scalloc( reax->num_atom_types, sizeof(char**), "tor_flag[i]" );
-
-      for( j = 0; j < reax->num_atom_types; j++ ) {
-        reax->thbp[i][j]= (three_body_header*)
-    scalloc( reax->num_atom_types, sizeof(three_body_header), "thbp[i,j]" );
-        reax->hbp[i][j] = (hbond_parameters*)
-    scalloc( reax->num_atom_types, sizeof(hbond_parameters), "hbp[i,j]" );
-        reax->fbp[i][j] = (four_body_header**)
-    scalloc( reax->num_atom_types, sizeof(four_body_header*), "fbp[i,j]" );
-        tor_flag[i][j]  = (char**)
-    scalloc( reax->num_atom_types, sizeof(char*), "tor_flag[i,j]" );
-
-        for (k=0; k < reax->num_atom_types; k++) {
-    reax->fbp[i][j][k] = (four_body_header*)
-      scalloc(reax->num_atom_types, sizeof(four_body_header), "fbp[i,j,k]");
-    tor_flag[i][j][k]  = (char*)
-      scalloc( reax->num_atom_types, sizeof(char), "tor_flag[i,j,k]" );
-        }
-      }
-    }
-    */
-
     __N = reax->num_atom_types;
 
     reax->sbp = (single_body_parameters*)
-                calloc( reax->num_atom_types, sizeof(single_body_parameters) );
+        scalloc( reax->num_atom_types, sizeof(single_body_parameters),
+                "Read_Force_Field::reax->sbp" );
 
     reax->tbp = (two_body_parameters*)
-                calloc( pow (reax->num_atom_types, 2), sizeof(two_body_parameters) );
+        scalloc( POW(reax->num_atom_types, 2), sizeof(two_body_parameters),
+              "Read_Force_Field::reax->tbp" );
 
     reax->thbp = (three_body_header*)
-                 calloc( pow (reax->num_atom_types, 3), sizeof(three_body_header) );
+        scalloc( POW(reax->num_atom_types, 3), sizeof(three_body_header),
+              "Read_Force_Field::reax->thbp" );
+
     reax->hbp = (hbond_parameters*)
-                calloc( pow (reax->num_atom_types, 3), sizeof(hbond_parameters) );
+        scalloc( POW(reax->num_atom_types, 3), sizeof(hbond_parameters),
+              "Read_Force_Field::reax->hbp" );
 
     reax->fbp = (four_body_header*)
-                calloc( pow (reax->num_atom_types, 4), sizeof(four_body_header) );
+        scalloc( POW(reax->num_atom_types, 4), sizeof(four_body_header),
+              "Read_Force_Field::reax->fbp" );
 
-    tor_flag  = (char*)
-                calloc( pow (reax->num_atom_types, 4), sizeof(char) );
+    tor_flag  = (char*) scalloc( POW(reax->num_atom_types, 4), sizeof(char),
+           "Read_Force_Field::tor_flag" );
 
-    // vdWaals type: 1: Shielded Morse, no inner-wall
-    //               2: inner wall, no shielding
-    //               3: inner wall+shielding
+    /* vdWaals type:
+     * 1: Shielded Morse, no inner-wall
+     * 2: inner wall, no shielding
+     * 3: inner wall+shielding */
     reax->gp.vdw_type = 0;
 
     /* reading single atom parameters */
     /* there are 4 lines of each single atom parameters in ff files. these
-       parameters later determine some of the pair and triplet parameters using
-       combination rules. */
+     * parameters later determine some of the pair and triplet parameters using
+     * combination rules. */
     for ( i = 0; i < reax->num_atom_types; i++ )
     {
         /* line one */
@@ -183,7 +144,9 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax,
             reax->sbp[i].name[j] = toupper( tmp[0][j] );
         }
 
-        //fprintf( stderr, "Atom Name in the force field : %s \n", reax->sbp[i].name);
+#if defined(DEBUG_FOCUS)
+        fprintf( stderr, "Atom Name in the force field : %s \n", reax->sbp[i].name );
+#endif
 
         val = atof(tmp[1]);
         reax->sbp[i].r_s = val;
@@ -221,7 +184,7 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax,
         val = atof(tmp[6]);
         reax->sbp[i].eta = 2.0 * val;
         val = atof(tmp[7]);
-        reax->sbp[i].p_hbond = (int) val;
+        reax->sbp[i].p_hbond = (int)val;
 
         /* line 3 */
         fgets( s, MAX_LINE, fp );
@@ -645,12 +608,12 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax,
     }
 
     /* 4-body parameters are entered in compact form. i.e. 0-X-Y-0
-       correspond to any type of pair of atoms in 1 and 4
-       position. However, explicit X-Y-Z-W takes precedence over the
-       default description.
-       supports multi-well potentials (upto MAX_4BODY_PARAM in mytypes.h)
-       IMPORTANT: for now, directions on how to read multi-entries from ffield
-       is not clear */
+     * correspond to any type of pair of atoms in 1 and 4
+     * position. However, explicit X-Y-Z-W takes precedence over the
+     * default description.
+     * supports multi-well potentials (upto MAX_4BODY_PARAM in mytypes.h)
+     * IMPORTANT: for now, directions on how to read multi-entries from ffield
+     * is not clear */
 
     /* clear all entries first */
     for ( i = 0; i < reax->num_atom_types; ++i )
diff --git a/PG-PuReMD/src/forces.c b/PG-PuReMD/src/forces.c
index 0647c737..ae60867e 100644
--- a/PG-PuReMD/src/forces.c
+++ b/PG-PuReMD/src/forces.c
@@ -65,8 +65,8 @@
 
 
 #ifdef HAVE_CUDA
-void Cuda_Total_Forces (reax_system *, control_params *, simulation_data *, storage *);
-void Cuda_Total_Forces_PURE (reax_system *, storage *);
+void Cuda_Total_Forces( reax_system *, control_params *, simulation_data *, storage * );
+void Cuda_Total_Forces_PURE( reax_system *, storage * );
 #endif
 
 
@@ -225,16 +225,18 @@ void Cuda_Compute_Total_Force( reax_system *system, control_params *control,
         simulation_data *data, storage *workspace,
         reax_list **lists, mpi_datatypes *mpi_data )
 {
-    rvec *f = (rvec *) host_scratch;
+    rvec *f;
+
+    f = (rvec *) host_scratch;
     memset( f, 0, sizeof(rvec) * system->N );
 
     Cuda_Total_Forces( system, control, data, workspace );
 
 #if defined(PURE_REAX)
     /* now all forces are computed to their partially-final values
-       based on the neighbors information each processor has had.
-       final values of force on each atom needs to be computed by adding up
-       all partially-final pieces */
+     * based on the neighbors information each processor has had.
+     * final values of force on each atom needs to be computed by adding up
+     * all partially-final pieces */
 
     //MVAPICH2
     copy_host_device( f, dev_workspace->f, sizeof(rvec) * system->N ,
@@ -876,7 +878,7 @@ int Init_Forces( reax_system *system, control_params *control,
             cutoff = control->bond_cut;
         }
 
-        ihb = -1;
+        ihb = NON_H_BONDING_ATOM;
         ihb_top = -1;
         if ( local )
         {
@@ -888,7 +890,7 @@ int Init_Forces( reax_system *system, control_params *control,
             if ( control->hbond_cut > 0.0 )
             {
                 ihb = sbp_i->p_hbond;
-                if ( ihb == 1 )
+                if ( ihb == H_ATOM )
                 {
                     ihb_top = End_Index( atom_i->Hindex, hbonds );
                 }
@@ -972,13 +974,13 @@ int Init_Forces( reax_system *system, control_params *control,
                     }
 
                     /* hydrogen bond lists */
-                    if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) &&
+                    if ( control->hbond_cut > 0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) &&
                             nbr_pj->d <= control->hbond_cut )
                     {
                         // fprintf( stderr, "%d %d\n", atom1, atom2 );
 
                         jhb = sbp_j->p_hbond;
-                        if ( ihb == 1 && jhb == 2 )
+                        if ( ihb == H_ATOM && jhb == H_BONDING_ATOM )
                         {
                             hbonds->select.hbond_list[ihb_top].nbr = j;
                             hbonds->select.hbond_list[ihb_top].scl = 1;
@@ -986,7 +988,7 @@ int Init_Forces( reax_system *system, control_params *control,
                             ++ihb_top;
                             ++num_hbonds;
                         }
-                        else if ( j < system->n && ihb == 2 && jhb == 1 )
+                        else if ( j < system->n && ihb == H_BONDING_ATOM && jhb == H_ATOM )
                         {
                             jhb_top = End_Index( atom_j->Hindex, hbonds );
                             hbonds->select.hbond_list[jhb_top].nbr = i;
@@ -1032,7 +1034,7 @@ int Init_Forces( reax_system *system, control_params *control,
             //printf("Htop: %d \n", Htop);
 
             H->end[i] = Htop;
-            if ( ihb == 1 )
+            if ( ihb == H_ATOM )
             {
                 Set_End_Index( atom_i->Hindex, ihb_top, hbonds );
             }
@@ -1161,12 +1163,12 @@ int Init_Forces_No_Charges( reax_system *system, control_params *control,
             cutoff = control->bond_cut;
         }
 
-        ihb = -1;
+        ihb = NON_H_BONDING_ATOM;
         ihb_top = -1;
         if ( local && control->hbond_cut > 0 )
         {
             ihb = sbp_i->p_hbond;
-            if ( ihb == 1 )
+            if ( ihb == H_ATOM )
             {
                 ihb_top = End_Index( atom_i->Hindex, hbonds );
             }
@@ -1223,12 +1225,12 @@ int Init_Forces_No_Charges( reax_system *system, control_params *control,
                 if ( local )
                 {
                     /* hydrogen bond lists */
-                    if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) &&
+                    if ( control->hbond_cut > 0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) &&
                             nbr_pj->d <= control->hbond_cut )
                     {
                         // fprintf( stderr, "%d %d\n", atom1, atom2 );
                         jhb = sbp_j->p_hbond;
-                        if ( ihb == 1 && jhb == 2 )
+                        if ( ihb == H_ATOM && jhb == H_BONDING_ATOM )
                         {
                             hbonds->select.hbond_list[ihb_top].nbr = j;
                             hbonds->select.hbond_list[ihb_top].scl = 1;
@@ -1236,7 +1238,7 @@ int Init_Forces_No_Charges( reax_system *system, control_params *control,
                             ++ihb_top;
                             ++num_hbonds;
                         }
-                        else if ( j < system->n && ihb == 2 && jhb == 1 )
+                        else if ( j < system->n && ihb == H_BONDING_ATOM && jhb == H_ATOM )
                         {
                             jhb_top = End_Index( atom_j->Hindex, hbonds );
                             hbonds->select.hbond_list[jhb_top].nbr = i;
@@ -1273,7 +1275,7 @@ int Init_Forces_No_Charges( reax_system *system, control_params *control,
         }
 
         Set_End_Index( i, btop_i, bonds );
-        if ( local && ihb == 1 )
+        if ( local && ihb == H_ATOM )
         {
             Set_End_Index( atom_i->Hindex, ihb_top, hbonds );
         }
@@ -1455,16 +1457,16 @@ void Estimate_Storages( reax_system *system, control_params *control,
 
         if ( i < system->n )
         {
-            local = 1;
+            local = TRUE;
             cutoff = control->nonb_cut;
             ++(*Htop);
             ihb = sbp_i->p_hbond;
         }
         else
         {
-            local = 0;
+            local = FALSE;
             cutoff = control->bond_cut;
-            ihb = -1;
+            ihb = NON_H_BONDING_ATOM;
         }
 
         for ( pj = start_i; pj < end_i; ++pj )
@@ -1482,19 +1484,21 @@ void Estimate_Storages( reax_system *system, control_params *control,
                 //twbp = &(system->reax_param.tbp[type_i][type_j]);
                 twbp = &(system->reax_param.tbp[index_tbp (type_i, type_j, system->reax_param.num_atom_types)]);
 
-                if ( local )
+                if ( local == TRUE )
                 {
                     if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) //tryQEq ||1
                         ++(*Htop);
 
 
-                    if ( control->hbond_cut > 0.1 && (ihb == 1 || ihb == 2) &&
+                    if ( control->hbond_cut > 0.1 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) &&
                             nbr_pj->d <= control->hbond_cut )
                     {
                         jhb = sbp_j->p_hbond;
-                        if ( ihb == 1 && jhb == 2 )
+                        if ( ihb == H_ATOM && jhb == H_BONDING_ATOM )
+                        {
                             ++hb_top[i];
-                        else if ( j < system->n && ihb == 2 && jhb == 1 )
+                        }
+                        else if ( j < system->n && ihb == H_BONDING_ATOM && jhb == H_ATOM )
                         {
                             ++hb_top[j];
 
@@ -1573,8 +1577,8 @@ void Estimate_Storages( reax_system *system, control_params *control,
 
 #else
 void Estimate_Storages( reax_system *system, control_params *control,
-        reax_list **lists, int *Htop, int *hb_top,
-        int *bond_top, int *num_3body)
+        reax_list **lists, int *Htop, int *hb_top, int *bond_top,
+        int *num_3body )
 {
 
     int i, j, pj;
@@ -1608,16 +1612,16 @@ void Estimate_Storages( reax_system *system, control_params *control,
 
         if ( i < system->n )
         {
-            local = 1;
+            local = TRUE;
             cutoff = control->nonb_cut;
             ++(*Htop);
             ihb = sbp_i->p_hbond;
         }
         else
         {
-            local = 0;
+            local = FALSE;
             cutoff = control->bond_cut;
-            ihb = -1;
+            ihb = NON_H_BONDING_ATOM;
         }
 
         for ( pj = start_i; pj < end_i; ++pj )
@@ -1633,22 +1637,26 @@ void Estimate_Storages( reax_system *system, control_params *control,
                 r_ij = nbr_pj->d;
                 sbp_j = &(system->reax_param.sbp[type_j]);
                 //twbp = &(system->reax_param.tbp[type_i][type_j]);
-                twbp = &(system->reax_param.tbp[index_tbp (type_i, type_j, system->reax_param.num_atom_types)]);
+                twbp = &(system->reax_param.tbp[index_tbp(type_i, type_j, system->reax_param.num_atom_types)]);
 
-                if ( local )
+                if ( local == TRUE )
                 {
                     if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) //tryQEq ||1
                         ++(*Htop);
 
                     /* hydrogen bond lists */
-                    if ( control->hbond_cut > 0.1 && (ihb == 1 || ihb == 2) &&
+                    if ( control->hbond_cut > 0.1 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) &&
                             nbr_pj->d <= control->hbond_cut )
                     {
                         jhb = sbp_j->p_hbond;
-                        if ( ihb == 1 && jhb == 2 )
+                        if ( ihb == H_ATOM && jhb == H_BONDING_ATOM )
+                        {
                             ++hb_top[i];
-                        else if ( j < system->n && ihb == 2 && jhb == 1 )
+                        }
+                        else if ( j < system->n && ihb == H_BONDING_ATOM && jhb == H_ATOM )
+                        {
                             ++hb_top[j];
+                        }
                     }
                 }
 
@@ -1875,6 +1883,39 @@ int Cuda_Compute_Forces( reax_system *system, control_params *control,
     {
         retVal = Cuda_Init_Forces( system, control, data, workspace, lists, out_control );
         fprintf( stderr, "    [CUDA_INIT_FORCES: %d] STEP %d\n", retVal, data->step );
+
+//        int i;
+//        static reax_list **temp_lists;
+//       
+//        if ( data->step == 0 )
+//        {
+//            temp_lists = (reax_list **) smalloc( LIST_N * sizeof (reax_list *), "temp_lists" );
+//            for ( i = 0; i < LIST_N; ++i )
+//            {
+//                temp_lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" );
+//                temp_lists[i]->allocated = FALSE;
+//            }
+//            Make_List( (*dev_lists + BONDS)->n, (*dev_lists + BONDS)->num_intrs,
+//                    TYP_BOND, *temp_lists + BONDS );
+//            Make_List( (*dev_lists + HBONDS)->n, (*dev_lists + HBONDS)->num_intrs,
+//                    TYP_HBOND, *temp_lists + HBONDS );
+//        }
+//        else
+//        {
+//            Delete_List( *temp_lists + BONDS );
+//            Make_List( (*dev_lists + BONDS)->n, (*dev_lists + BONDS)->num_intrs,
+//                    TYP_BOND, *temp_lists + BONDS );
+//            Delete_List( *temp_lists + HBONDS );
+//            Make_List( (*dev_lists + HBONDS)->n, (*dev_lists + HBONDS)->num_intrs,
+//                    TYP_HBOND, *temp_lists + HBONDS );
+//
+//        }
+//        Output_Sync_Lists( *temp_lists + BONDS, *dev_lists + BONDS, TYP_BOND );
+//        Print_Bonds( system, temp_lists, control );
+//        Output_Sync_Lists( *temp_lists + HBONDS, *dev_lists + HBONDS, TYP_HBOND );
+//        Print_HBonds( system, temp_lists, control, data->step );
+//        Print_HBond_Indices( system, temp_lists, control, data->step );
+//        exit( 0 );
     }
     else
     {
diff --git a/PG-PuReMD/src/hydrogen_bonds.c b/PG-PuReMD/src/hydrogen_bonds.c
index 493379ce..6021f392 100644
--- a/PG-PuReMD/src/hydrogen_bonds.c
+++ b/PG-PuReMD/src/hydrogen_bonds.c
@@ -20,41 +20,41 @@
   ----------------------------------------------------------------------*/
 
 #include "reax_types.h"
+
 #include "index_utils.h"
+
 #if defined(PURE_REAX)
-#include "hydrogen_bonds.h"
-#include "bond_orders.h"
-#include "list.h"
-#include "valence_angles.h"
-#include "vector.h"
+  #include "hydrogen_bonds.h"
+  #include "bond_orders.h"
+  #include "list.h"
+  #include "valence_angles.h"
+  #include "vector.h"
 #elif defined(LAMMPS_REAX)
-#include "reax_hydrogen_bonds.h"
-#include "reax_bond_orders.h"
-#include "reax_list.h"
-#include "reax_valence_angles.h"
-#include "reax_vector.h"
+  #include "reax_hydrogen_bonds.h"
+  #include "reax_bond_orders.h"
+  #include "reax_list.h"
+  #include "reax_valence_angles.h"
+  #include "reax_vector.h"
 #endif
 
+
 // DANIEL
 // This function is taken straight from PuReMD, with minimal changes to accomodate the new datastructures
 // Attempting to fix ehb being way off in MPI_Not_GPU
-
 void Hydrogen_Bonds( reax_system *system, control_params *control,
-                     simulation_data *data, storage *workspace,
-                     reax_list **lists, output_controls *out_control )
+        simulation_data *data, storage *workspace,
+        reax_list **lists, output_controls *out_control )
 {
-    int  i, j, k, pi, pk;
-    int  type_i, type_j, type_k;
-    int  start_j, end_j, hb_start_j, hb_end_j;
-    int  hblist[MAX_BONDS];
-    int  itr, top;
-    int  num_hb_intrs = 0;
+    int i, j, k, pi, pk;
+    int type_i, type_j, type_k;
+    int start_j, end_j, hb_start_j, hb_end_j;
+    int hblist[MAX_BONDS];
+    int itr, top;
     ivec rel_jk;
     real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2;
     real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3;
     rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk;
     rvec dvec_jk, force, ext_press;
-    // rtensor temp_rtensor, total_rtensor;
     hbond_parameters *hbp;
     bond_order_data *bo_ij;
     bond_data *pbond_ij;
@@ -62,6 +62,9 @@ void Hydrogen_Bonds( reax_system *system, control_params *control,
     reax_list *bonds, *hbonds;
     bond_data *bond_list;
     hbond_data *hbond_list;
+#if defined(DEBUG)
+    int num_hb_intrs = 0;
+#endif
     
     bonds = (*lists) + BONDS;
     bond_list = bonds->select.bond_list;
@@ -69,22 +72,24 @@ void Hydrogen_Bonds( reax_system *system, control_params *control,
     hbond_list = hbonds->select.hbond_list;
 
     /* loops below discover the Hydrogen bonds between i-j-k triplets.
-       here j is H atom and there has to be some bond between i and j.
-       Hydrogen bond is between j and k.
-       so in this function i->X, j->H, k->Z when we map
-       variables onto the ones in the handout.*/
+     * here j is H atom and there has to be some bond between i and j.
+     * Hydrogen bond is between j and k.
+     * so in this function i->X, j->H, k->Z when we map
+     * variables onto the ones in the handout.*/
     for ( j = 0; j < system->n; ++j )
+    {
         /* j has to be of type H */
-        if ( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == 1 )
+        if ( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == H_ATOM )
         {
-            /*set j's variables */
-            type_j     = system->my_atoms[j].type;
-            start_j    = Start_Index(j, bonds);
-            end_j      = End_Index(j, bonds);
+            /* set j's variables */
+            type_j = system->my_atoms[j].type;
+            start_j = Start_Index( j, bonds );
+            end_j = End_Index( j, bonds );
             hb_start_j = Start_Index( system->my_atoms[j].Hindex, hbonds );
-            hb_end_j   = End_Index( system->my_atoms[j].Hindex, hbonds );
+            hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds );
 
             top = 0;
+            /* search bonded atoms to atom j (i.e., hydrogen atom) for potential hydrogen bonding */
             for ( pi = start_j; pi < end_j; ++pi )
             {
                 pbond_ij = &( bond_list[pi] );
@@ -92,14 +97,17 @@ void Hydrogen_Bonds( reax_system *system, control_params *control,
                 bo_ij = &(pbond_ij->bo_data);
                 type_i = system->my_atoms[i].type;
 
-                if ( system->reax_param.sbp[type_i].p_hbond == 2 &&
+                if ( system->reax_param.sbp[type_i].p_hbond == H_BONDING_ATOM &&
                         bo_ij->BO >= HB_THRESHOLD )
+                {
                     hblist[top++] = pi;
+                }
             }
 
             // fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n",
             //          j, top, hb_start_j, hb_end_j );
 
+            /* for each hbond of atom j */
             for ( pk = hb_start_j; pk < hb_end_j; ++pk )
             {
                 /* set k's varibles */
@@ -109,11 +117,12 @@ void Hydrogen_Bonds( reax_system *system, control_params *control,
                 r_jk = nbr_jk->d;
                 rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec );
 
+                /* find matching hbond to atom k */
                 for ( itr = 0; itr < top; ++itr )
                 {
                     pi = hblist[itr];
-		// DANIEL
-                //    pbond_ij = &( bonds->bond_list[pi] );
+                    //DANIEL
+                    //pbond_ij = &( bonds->bond_list[pi] );
                     pbond_ij = &( bonds->select.bond_list[pi] );
                     i = pbond_ij->nbr;
 
@@ -122,35 +131,34 @@ void Hydrogen_Bonds( reax_system *system, control_params *control,
                         bo_ij = &(pbond_ij->bo_data);
                         type_i = system->my_atoms[i].type;
                         r_ij = pbond_ij->d;
-                        //hbp = &(system->reax_param.hbp[ type_i ][ type_j ][ type_k ]);
-			// SUDHIR
-			hbp = &(system->reax_param.hbp[ index_hbp (type_i, type_j, type_k, system->reax_param.num_atom_types) ]);
+			hbp = &(system->reax_param.hbp[ index_hbp(type_i, type_j, type_k, system->reax_param.num_atom_types) ]);
 
+#if defined(DEBUG)
                         ++num_hb_intrs;
+#endif
 
                         Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk,
-                                         &theta, &cos_theta );
+                                &theta, &cos_theta );
                         /* the derivative of cos(theta) */
                         Calculate_dCos_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk,
-                                              &dcos_theta_di, &dcos_theta_dj,
-                                              &dcos_theta_dk );
+                                &dcos_theta_di, &dcos_theta_dj, &dcos_theta_dk );
 
-                        /* hyrogen bond energy*/
+                        /* hyrogen bond energy */
                         sin_theta2 = sin( theta / 2.0 );
                         sin_xhz4 = SQR(sin_theta2);
                         sin_xhz4 *= sin_xhz4;
                         cos_xhz1 = ( 1.0 - cos_theta );
                         exp_hb2 = exp( -hbp->p_hb2 * bo_ij->BO );
                         exp_hb3 = exp( -hbp->p_hb3 * ( hbp->r0_hb / r_jk +
-                                                       r_jk / hbp->r0_hb - 2.0 ) );
+                                    r_jk / hbp->r0_hb - 2.0 ) );
 
-                        data->my_en.e_hb += e_hb =
-                                                hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4;
+                        e_hb = hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4;
+                        data->my_en.e_hb += e_hb;
 
                         CEhb1 = hbp->p_hb1 * hbp->p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4;
                         CEhb2 = -hbp->p_hb1 / 2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1;
                         CEhb3 = -hbp->p_hb3 *
-                                (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb;
+                            (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb;
 
                         /*fprintf( stdout,
                           "%6d%6d%6d%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f\n",
@@ -175,7 +183,7 @@ void Hydrogen_Bonds( reax_system *system, control_params *control,
                         else
                         {
                             /* for pressure coupling, terms that are not related to bond order
-                            derivatives are added directly into pressure vector/tensor */
+                             * derivatives are added directly into pressure vector/tensor */
                             rvec_Scale( force, +CEhb2, dcos_theta_di ); // dcos terms
                             rvec_Add( workspace->f[i], force );
                             rvec_iMultiply( ext_press, pbond_ij->rel_box, force );
@@ -212,6 +220,7 @@ void Hydrogen_Bonds( reax_system *system, control_params *control,
                                  system->my_atoms[k].orig_id,
                                  r_jk, theta, bo_ij->BO, e_hb, data->my_en.e_hb );
 #endif
+
 #ifdef TEST_FORCES
                         Add_dBO( system, lists, j, pi, +CEhb1, workspace->f_hb ); //dbo term
                         // dcos terms
@@ -226,31 +235,31 @@ void Hydrogen_Bonds( reax_system *system, control_params *control,
                 }
             }
         }
+    }
 
 #if defined(DEBUG)
     fprintf( stderr, "Number of hydrogen bonds: %d\n", num_hb_intrs );
     fprintf( stderr, "Hydrogen Bond Energy: %g\n", data->my_en.e_hb );
     fprintf( stderr, "hydbonds: ext_press (%24.15e %24.15e %24.15e)\n",
-             data->ext_press[0], data->ext_press[1], data->ext_press[2] );
+            data->ext_press[0], data->ext_press[1], data->ext_press[2] );
 #endif
 }
                                                                                                                               
+
 void Old_Hydrogen_Bonds( reax_system *system, control_params *control,
-                     simulation_data *data, storage *workspace,
-                     reax_list **lists, output_controls *out_control )
+        simulation_data *data, storage *workspace, reax_list **lists,
+        output_controls *out_control )
 {
     int  i, j, k, pi, pk;
     int  type_i, type_j, type_k;
     int  start_j, end_j, hb_start_j, hb_end_j;
     int  hblist[MAX_BONDS];
     int  itr, top;
-    int  num_hb_intrs = 0;
     ivec rel_jk;
     real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2;
     real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3;
     rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk;
     rvec dvec_jk, force, ext_press;
-    // rtensor temp_rtensor, total_rtensor;
     hbond_parameters *hbp;
     bond_order_data *bo_ij;
     bond_data *pbond_ij;
@@ -258,6 +267,9 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control,
     reax_list *bonds, *hbonds;
     bond_data *bond_list;
     hbond_data *hbond_list;
+#if defined(DEBUG)
+    int num_hb_intrs = 0;
+#endif
 
     bonds = (*lists) + BONDS;
     bond_list = bonds->select.bond_list;
@@ -265,20 +277,21 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control,
     hbond_list = hbonds->select.hbond_list;
 
     /* loops below discover the Hydrogen bonds between i-j-k triplets.
-       here j is H atom and there has to be some bond between i and j.
-       Hydrogen bond is between j and k.
-       so in this function i->X, j->H, k->Z when we map
-       variables onto the ones in the handout.*/
+     * here j is H atom and there has to be some bond between i and j.
+     * Hydrogen bond is between j and k.
+     * so in this function i->X, j->H, k->Z when we map
+     * variables onto the ones in the handout.*/
     for ( j = 0; j < system->n; ++j )
+    {
         /* j has to be of type H */
-        if ( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == 1 )
+        if ( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == H_ATOM )
         {
             /*set j's variables */
-            type_j     = system->my_atoms[j].type;
-            start_j    = Start_Index(j, bonds);
-            end_j      = End_Index(j, bonds);
+            type_j = system->my_atoms[j].type;
+            start_j = Start_Index(j, bonds);
+            end_j = End_Index(j, bonds);
             hb_start_j = Start_Index( system->my_atoms[j].Hindex, hbonds );
-            hb_end_j   = End_Index( system->my_atoms[j].Hindex, hbonds );
+            hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds );
 
             top = 0;
             for ( pi = start_j; pi < end_j; ++pi )
@@ -288,9 +301,11 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control,
                 bo_ij = &(pbond_ij->bo_data);
                 type_i = system->my_atoms[i].type;
 
-                if ( system->reax_param.sbp[type_i].p_hbond == 2 &&
+                if ( system->reax_param.sbp[type_i].p_hbond == H_BONDING_ATOM &&
                         bo_ij->BO >= HB_THRESHOLD )
+                {
                     hblist[top++] = pi;
+                }
             }
 
             // fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n",
@@ -318,33 +333,34 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control,
                         r_ij = pbond_ij->d;
                         //SUDHIR
                         //hbp = &(system->reax_param.hbp[ type_i ][ type_j ][ type_k ]);
-                        hbp = &(system->reax_param.hbp[ index_hbp (type_i, type_j, type_k, system->reax_param.num_atom_types) ]);
+                        hbp = &(system->reax_param.hbp[ index_hbp(type_i, type_j, type_k, system->reax_param.num_atom_types) ]);
+
+#if defined(DEBUG)
                         ++num_hb_intrs;
+#endif
 
                         Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk,
-                                         &theta, &cos_theta );
+                                &theta, &cos_theta );
                         /* the derivative of cos(theta) */
                         Calculate_dCos_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk,
-                                              &dcos_theta_di, &dcos_theta_dj,
-                                              &dcos_theta_dk );
+                                &dcos_theta_di, &dcos_theta_dj, &dcos_theta_dk );
 
-                        /* hyrogen bond energy*/
+                        /* hyrogen bond energy */
                         sin_theta2 = SIN( theta / 2.0 );
                         sin_xhz4 = SQR(sin_theta2);
                         sin_xhz4 *= sin_xhz4;
                         cos_xhz1 = ( 1.0 - cos_theta );
                         exp_hb2 = EXP( -hbp->p_hb2 * bo_ij->BO );
                         exp_hb3 = EXP( -hbp->p_hb3 * ( hbp->r0_hb / r_jk +
-                                                       r_jk / hbp->r0_hb - 2.0 ) );
+                                    r_jk / hbp->r0_hb - 2.0 ) );
 
-                        //data->my_en.e_hb +=
                         e_hb = hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4;
-                        data->my_en.e_hb += 1;
+                        data->my_en.e_hb += e_hb;
 
                         CEhb1 = hbp->p_hb1 * hbp->p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4;
                         CEhb2 = -hbp->p_hb1 / 2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1;
                         CEhb3 = -hbp->p_hb3 *
-                                (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb;
+                            (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb;
 
                         /*fprintf( stdout,
                           "%6d%6d%6d%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f\n",
@@ -406,6 +422,7 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control,
                                  system->my_atoms[k].orig_id,
                                  r_jk, theta, bo_ij->BO, e_hb, data->my_en.e_hb );
 #endif
+
 #ifdef TEST_FORCES
                         Add_dBO( system, lists, j, pi, +CEhb1, workspace->f_hb ); //dbo term
                         // dcos terms
@@ -420,11 +437,12 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control,
                 }
             }
         }
+    }
 
 #if defined(DEBUG)
     fprintf( stderr, "Number of hydrogen bonds: %d\n", num_hb_intrs );
     fprintf( stderr, "Hydrogen Bond Energy: %g\n", data->my_en.e_hb );
     fprintf( stderr, "hydbonds: ext_press (%24.15e %24.15e %24.15e)\n",
-             data->ext_press[0], data->ext_press[1], data->ext_press[2] );
+            data->ext_press[0], data->ext_press[1], data->ext_press[2] );
 #endif
 }
diff --git a/PG-PuReMD/src/index_utils.h b/PG-PuReMD/src/index_utils.h
index 02677132..337e1844 100644
--- a/PG-PuReMD/src/index_utils.h
+++ b/PG-PuReMD/src/index_utils.h
@@ -45,20 +45,20 @@ static inline CUDA_HOST_DEVICE int index_tbp( int i, int j, int num_atom_types )
 /* Indexing routine for three body parameters */
 static inline CUDA_HOST_DEVICE int index_thbp( int i, int j, int k, int num_atom_types )
 {
-    return (i * num_atom_types * num_atom_types ) + (j * num_atom_types ) + k;
+    return (i * num_atom_types * num_atom_types) + (j * num_atom_types) + k;
 }
 
 /* Indexing routine for hydrogen bonding parameters */
 static inline CUDA_HOST_DEVICE int index_hbp( int i, int j, int k, int num_atom_types )
 {
-    return (i * num_atom_types * num_atom_types ) + (j * num_atom_types ) + k;
+    return (i * num_atom_types * num_atom_types) + (j * num_atom_types) + k;
 }
 
 /* Indexing routine for four body parameters */
 static inline CUDA_HOST_DEVICE int index_fbp( int i, int j, int k, int l, int num_atom_types )
 {
-    return (i * num_atom_types * num_atom_types * num_atom_types ) +
-        (j * num_atom_types * num_atom_types ) + (k * num_atom_types ) + l;
+    return (i * num_atom_types * num_atom_types * num_atom_types) +
+        (j * num_atom_types * num_atom_types) + (k * num_atom_types) + l;
 }
 
 /* Indexing routine for LR table (force tabulation) */
diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c
index 4eb65bbd..2d09ec4c 100644
--- a/PG-PuReMD/src/init_md.c
+++ b/PG-PuReMD/src/init_md.c
@@ -179,7 +179,7 @@ int Init_System( reax_system *system, control_params *control,
         {
             atom = &(system->my_atoms[i]);
 
-            if ( system->reax_param.sbp[ atom->type ].p_hbond == 1 )
+            if ( system->reax_param.sbp[ atom->type ].p_hbond == H_ATOM )
             {
                 atom->Hindex = system->numH++;
             }
@@ -200,7 +200,7 @@ int Init_System( reax_system *system, control_params *control,
         for ( i = 0; i < system->n; ++i )
         {
             atom = &(system->my_atoms[i]);
-            if ( system->reax_param.sbp[ atom->type ].p_hbond == 1 )
+            if ( system->reax_param.sbp[ atom->type ].p_hbond == H_ATOM )
                 atom->Hindex = system->numH++;
             else atom->Hindex = -1;
         }
@@ -275,32 +275,15 @@ int Cuda_Init_System( reax_system *system, control_params *control,
     Bin_Boundary_Atoms( system );
     fprintf( stderr, "    [BIN_BOUNDARY_ATOMS]\n" );
 
-    /* estimate numH and Hcap */
-    system->numH = 0;
-    if ( control->hbond_cut > 0.0 )
-    {
-        //TODO
-        //for( i = 0; i < system->n; ++i ) {
-        for ( i = 0; i < system->N; ++i )
-        {
-            atom = &(system->my_atoms[i]);
-            atom->Hindex = i;
-            //FIX - 4 - Added fix for HBond Issue
-            if ( system->reax_param.sbp[ atom->type ].p_hbond == 1 )
-            {
-                system->numH++;
-            }
-            //else atom->Hindex = -1;
-        }
-    }
-    system->Hcap = MAX( system->numH * SAFER_ZONE, MIN_CAP );
-
     /* Sync atoms here to continue the computation */
     dev_alloc_system( system );
     fprintf( stderr, "    [DEV ALLOC SYSTEM]\n" );
     Sync_System( system );
     fprintf( stderr, "    [SYNC SYSTEM]\n" );
 
+    /* estimate numH and Hcap */
+    Cuda_Reset_Atoms( system, control );
+
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "p%d: n=%d local_cap=%d\n",
              system->my_rank, system->n, system->local_cap );
@@ -312,8 +295,10 @@ int Cuda_Init_System( reax_system *system, control_params *control,
 
     Cuda_Compute_Total_Mass( system, data, mpi_data->comm_mesh3D );
     fprintf( stderr, "    [CUDA COMPUTE TOTAL MASS]\n" );
+
     Cuda_Compute_Center_of_Mass( system, data, mpi_data, mpi_data->comm_mesh3D );
     fprintf( stderr, "    [CUDA COMPUTE CENTER OF MASS]\n" );
+
 //    if( Reposition_Atoms( system, control, data, mpi_data, msg ) == FAILURE )
 //    {
 //        return FAILURE;
@@ -915,20 +900,12 @@ int Cuda_Init_Lists( reax_system *system, control_params *control,
         simulation_data *data, storage *workspace, reax_list **lists,
         mpi_datatypes *mpi_data, char *msg )
 {
-    int i, count, ret;
-    int total_hbonds, total_bonds, Htop;
-    int *hb_top;
+    int ret;
+    int Htop;
    
-    hb_top = (int*) scalloc( system->total_cap, sizeof(int),
-           "Cuda_Init_Lists::hb_top" );
-
     /* ignore returned error, as system->d_max_far_nbrs was not valid */
     ret = Cuda_Estimate_Neighbors( system, data->step );
 
-#if defined(DEBUG_FOCUS)
-    fprintf( stderr, "DEVICE total neighbors entries: %d \n", system->total_far_nbrs );
-#endif
-
     Dev_Make_List( system->total_cap, system->total_far_nbrs,
             TYP_FAR_NEIGHBOR, *dev_lists + FAR_NBRS );
 
@@ -944,8 +921,7 @@ int Cuda_Init_Lists( reax_system *system, control_params *control,
     Cuda_Generate_Neighbor_Lists( system, data, workspace, dev_lists );
 
     /* estimate storage for bonds and hbonds */
-    Cuda_Estimate_Storages( system, control, dev_lists, &Htop, hb_top,
-           data->step );
+    Cuda_Estimate_Storages( system, control, dev_lists, &Htop, data->step );
 
     /* estimate storage for charge sparse matrix */
     Cuda_Estimate_Storage_Sparse_Matrix( system, control, data, dev_lists );
@@ -967,52 +943,23 @@ int Cuda_Init_Lists( reax_system *system, control_params *control,
 #endif
 
     // FIX - 4 - Added addition check here for hydrogen Bonds
-    if ( control->hbond_cut > 0.0  &&  system->numH > 0 )
+    if ( control->hbond_cut > 0.0 &&  system->numH > 0 )
     {
-        /* init H indexes */
-        total_hbonds = 0;
-        count = 0;
-
-        for ( i = 0; i < system->N; ++i )
-        {
-            //system->my_atoms[i].num_hbonds = hb_top[i];
-            //TODO
-            hb_top[i] = MAX( hb_top[i] * 4, MIN_HBONDS * 4);
-            total_hbonds += hb_top[i];
-            if ( hb_top[i] > 0 )
-            {
-                ++count;
-            }
-        }
-        total_hbonds = MAX( total_hbonds, MIN_CAP * MIN_HBONDS );
-
-        Dev_Make_List( system->total_cap, system->total_cap *
-                system->max_hbonds, TYP_HBOND, *dev_lists + HBONDS );
+        Dev_Make_List( system->total_cap, system->total_hbonds, TYP_HBOND, *dev_lists + HBONDS );
+//        Make_List( system->total_cap, system->total_hbonds, TYP_HBOND, *lists + HBONDS );
 
-#if defined(DEBUG_FOCUS)
-        fprintf( stderr, "**** Total HBonds allocated --> %d total_cap: %d per atom: %d, max_hbonds: %d \n",
-                total_hbonds, system->total_cap, (total_hbonds /
-                    system->total_cap), system->max_hbonds );
-#endif
-
-        //TODO
-        //Cuda_Init_HBond_Indices (hb_top, system->n);
-        /****/
-        //THIS IS COMMENTED OUT - CHANGE ORIGINAL
-        //Cuda_Init_HBond_Indices (hb_top, system->N);
-        //THIS IS COMMENTED OUT - CHANGE ORIGINAL
-        /****/
+        Cuda_Init_HBond_Indices( system );
 
 #if defined(DEBUG_FOCUS)
         fprintf( stderr, "p%d: allocated hbonds: total_hbonds=%d, space=%dMB\n",
-                system->my_rank, total_hbonds,
-                (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) );
+                system->my_rank, system->total_hbonds,
+                (int)(system->total_hbonds * sizeof(hbond_data) / (1024 * 1024)) );
 #endif
     }
 
     /* bonds list */
     Dev_Make_List( system->total_cap, system->total_bonds, TYP_BOND, *dev_lists + BONDS );
-    Make_List( system->total_cap, system->total_bonds, TYP_BOND, *lists + BONDS );
+//    Make_List( system->total_cap, system->total_bonds, TYP_BOND, *lists + BONDS );
 
     Cuda_Init_Bond_Indices( system );
 
@@ -1026,8 +973,6 @@ int Cuda_Init_Lists( reax_system *system, control_params *control,
      * of three body interactions requires that bond orders have
      * been computed, delay estimation until for computation */
 
-    sfree( hb_top, "Cuda_Init_Lists::hb_top" );
-
     return SUCCESS;
 }
 #endif
diff --git a/PG-PuReMD/src/integrate.c b/PG-PuReMD/src/integrate.c
index 8537f50f..bfbbe1f7 100644
--- a/PG-PuReMD/src/integrate.c
+++ b/PG-PuReMD/src/integrate.c
@@ -345,7 +345,7 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con
         output_controls *out_control, mpi_datatypes *mpi_data )
 {
     int i, steps, renbr, ret;
-    static int verlet_part1_done = FALSE;
+    static int verlet_part1_done = FALSE, estimate_nbrs_done = 0;
     real inv_m, dt, lambda;
     rvec dx;
     reax_atom *atom;
@@ -414,13 +414,18 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con
         t_over_start  = Get_Time ();
 #endif
 
-        //TODO: move far_nbrs reallocation checks outside of renbr frequency check
-        ret = Cuda_Estimate_Neighbors( system, data->step );
-        fprintf( stderr, "  [CUDA_ESTIMATE_NEIGHBORS: %d] STEP %d\n", ret, data->step );
+        if ( estimate_nbrs_done == 0 )
+        {
+            //TODO: move far_nbrs reallocation checks outside of renbr frequency check
+            ret = Cuda_Estimate_Neighbors( system, data->step );
+            estimate_nbrs_done = 1;
+            fprintf( stderr, "  [CUDA_ESTIMATE_NEIGHBORS: %d] STEP %d\n", ret, data->step );
+        }
 
-        if ( ret == SUCCESS )
+        if ( ret == SUCCESS && estimate_nbrs_done == 1 )
         {
             Cuda_Generate_Neighbor_Lists( system, data, workspace, lists );
+            estimate_nbrs_done = 2;
     
 #if defined(DEBUG)
             t_over_elapsed  = Get_Timing_Info( t_over_start );
@@ -449,7 +454,6 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con
 #endif
 
         /* temperature scaler */
-        //Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D );
         Cuda_Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D );
         fprintf( stderr, "  [CUDA_COMPUTE_KINETIC_ENERGY] STEP %d\n", data->step );
 
@@ -476,6 +480,7 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con
 #endif
 
         verlet_part1_done = FALSE;
+        estimate_nbrs_done = 0;
     }
 
     return ret;
@@ -484,8 +489,8 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con
 
 
 /* uses Berendsen-type coupling for both T and P.
-   All box dimensions are scaled by the same amount,
-   there is no change in the angles between axes. */
+ * All box dimensions are scaled by the same amount,
+ * there is no change in the angles between axes. */
 int Velocity_Verlet_Berendsen_NPT( reax_system* system, control_params* control,
         simulation_data *data, storage *workspace, reax_list **lists,
         output_controls *out_control, mpi_datatypes *mpi_data )
diff --git a/PG-PuReMD/src/io_tools.c b/PG-PuReMD/src/io_tools.c
index 9393b642..f3a341db 100644
--- a/PG-PuReMD/src/io_tools.c
+++ b/PG-PuReMD/src/io_tools.c
@@ -846,7 +846,7 @@ void Print_My_Ext_Atoms( reax_system *system )
 
 
 void Print_Far_Neighbors( reax_system *system, reax_list **lists,
-                          control_params *control )
+        control_params *control )
 {
     char  fname[100];
     int   i, j, id_i, id_j, nbr, natoms;
@@ -1037,28 +1037,85 @@ void Print_Charges( reax_system *system )
 }
 
 
-void Print_Bonds( reax_system *system, reax_list *bonds, char *fname )
+void Print_HBonds( reax_system *system, reax_list **lists,
+        control_params *control, int step )
 {
-    int i, j, pj;
+    int i, pj; 
+    char fname[MAX_STR]; 
+    hbond_data *phbond;
+    FILE *fout;
+    reax_list *hbonds = (*lists + HBONDS);
+
+    sprintf( fname, "%s.hbonds.%d.%d", control->sim_name, step, system->my_rank );
+    fout = fopen( fname, "w" );
+
+    for ( i = 0; i < system->N; ++i )
+    {
+        for ( pj = Start_Index(i, hbonds); pj < End_Index(i, hbonds); ++pj )
+        {
+            phbond = &(hbonds->select.hbond_list[pj]);
+
+//            fprintf( fout, "%8d%8d %24.15e %24.15e %24.15e\n", i, phbond->nbr,
+//                    phbond->ptr->dvec[0], phbond->ptr->dvec[1], phbond->ptr->dvec[2] );
+            fprintf( fout, "%8d%8d %8d %8d\n", i, phbond->nbr,
+                  phbond->scl, phbond->sym_index );
+        }
+    }
+
+    fclose( fout );
+}
+
+ 
+void Print_HBond_Indices( reax_system *system, reax_list **lists,
+        control_params *control, int step )
+{
+    int i; 
+    char fname[MAX_STR]; 
+    FILE *fout;
+    reax_list *hbonds = (*lists + HBONDS);
+
+    sprintf( fname, "%s.hbonds_indices.%d.%d", control->sim_name, step, system->my_rank );
+    fout = fopen( fname, "w" );
+
+    for ( i = 0; i < system->N; ++i )
+    {
+        fprintf( fout, "%8d: start: %8d, end: %8d\n",
+                i, Start_Index(i, hbonds), End_Index(i, hbonds) );
+    }
+
+    fclose( fout );
+}
+
+
+void Print_Bonds( reax_system *system, reax_list **lists,
+        control_params *control )
+{
+    int i, pj; 
+    char fname[MAX_STR]; 
     bond_data *pbond;
     bond_order_data *bo_ij;
-    FILE *f = fopen( fname, "w" );
+    FILE *fout;
+    reax_list *bonds = (*lists + BONDS);
+
+    sprintf( fname, "%s.bonds.%d", control->sim_name, system->my_rank );
+    fout = fopen( fname, "w" );
 
     for ( i = 0; i < system->N; ++i )
+    {
         for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj )
         {
             pbond = &(bonds->select.bond_list[pj]);
             bo_ij = &(pbond->bo_data);
-            j = pbond->nbr;
-            //fprintf( f, "%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n",
-            //       system->my_atoms[i].orig_id, system->my_atoms[j].orig_id,
-            //       pbond->d, bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 );
-            fprintf( f, "%8d%8d %24.15f %24.15f\n",
-                     i, j,//system->my_atoms[i].orig_id, system->my_atoms[j].orig_id,
-                     pbond->d, bo_ij->BO );
+//            fprintf( fout, "%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n",
+//                    system->my_atoms[i].orig_id, system->my_atoms[j].orig_id,
+//                    pbond->d, bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 );
+            fprintf( fout, "%8d%8d %24.15f %24.15f\n",
+                    i, pbond->nbr, //system->my_atoms[i].orig_id, system->my_atoms[j].orig_id,
+                    pbond->d, bo_ij->BO );
         }
+    }
 
-    fclose(f);
+    fclose( fout );
 }
 
 
diff --git a/PG-PuReMD/src/io_tools.h b/PG-PuReMD/src/io_tools.h
index a0998ec5..789991c3 100644
--- a/PG-PuReMD/src/io_tools.h
+++ b/PG-PuReMD/src/io_tools.h
@@ -46,7 +46,9 @@ void  Print_Sparse_Matrix2( reax_system*, sparse_matrix*, char* );
 void  Print_Linear_System( reax_system*, control_params*, storage*, int );
 void  Print_LinSys_Soln( reax_system*, real*, real*, real* );
 void  Print_Charges( reax_system* );
-void  Print_Bonds( reax_system*, reax_list*, char* );
+void  Print_HBonds( reax_system*, reax_list**, control_params *, int );
+void  Print_HBond_Indices( reax_system*, reax_list**, control_params *, int );
+void  Print_Bonds( reax_system*, reax_list**, control_params *);
 void  Print_Bond_List2( reax_system*, reax_list*, char* );
 void  Print_Total_Force( reax_system*, simulation_data*, storage* );
 void  Output_Results( reax_system*, control_params*, simulation_data*,
diff --git a/PG-PuReMD/src/parallelreax.c b/PG-PuReMD/src/parallelreax.c
index 8a6dc67a..a3071712 100644
--- a/PG-PuReMD/src/parallelreax.c
+++ b/PG-PuReMD/src/parallelreax.c
@@ -337,6 +337,17 @@ int main( int argc, char* argv[] )
     Generate_Neighbor_Lists( system, data, workspace, lists );
 #endif
 
+//    reax_list **temp_lists = (reax_list **) smalloc( LIST_N * sizeof (reax_list *), "temp_lists" );
+//    for ( i = 0; i < LIST_N; ++i )
+//    {
+//        temp_lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" );
+//        temp_lists[i]->allocated = FALSE;
+//    }
+//    Make_List( (*dev_lists + FAR_NBRS)->n, (*dev_lists + FAR_NBRS)->num_intrs,
+//            TYP_FAR_NEIGHBOR, (*temp_lists + FAR_NBRS) );
+//    Output_Sync_Lists( (*temp_lists + FAR_NBRS), (*dev_lists + FAR_NBRS), TYP_FAR_NEIGHBOR );
+//    Print_Far_Neighbors( system, temp_lists, control );
+
 #if defined(DEBUG)
     fprintf( stderr, "p%d: Cuda_Generate_Neighbor_Lists done...\n", system->my_rank );
 #endif
@@ -395,6 +406,7 @@ int main( int argc, char* argv[] )
         if ( control->T_mode && retries == 0 )
         {
             Temperature_Control( control, data );
+            fprintf( stderr, "  [TEMPERATURE CONTROL] STEP %d\n", data->step );
         }
 
 #if defined(DEBUG)
@@ -512,7 +524,7 @@ int main( int argc, char* argv[] )
     //TODO: remove?
     /* allocate the cuda auxiliary data structures */
     dev_workspace = (storage *) smalloc( sizeof(storage), "dev_workspace" );
-    dev_lists = (reax_list **) smalloc ( LIST_N * sizeof (reax_list *), "dev_lists" );
+    dev_lists = (reax_list **) smalloc( LIST_N * sizeof(reax_list *), "dev_lists" );
     for ( i = 0; i < LIST_N; ++i )
     {
         dev_lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" );
@@ -642,29 +654,22 @@ int main( int argc, char* argv[] )
         fprintf( out_control->out, "Total Simulation Time: %.2f secs\n", t_elapsed );
     }
 
-    // Write_PDB( &system, &(lists[BOND]), &out_control );
+//    Write_PDB( &system, &(lists[BOND]), &out_control );
     Close_Output_Files( system, control, out_control, mpi_data );
 
     MPI_Finalize( );
 
-    /* de-allocate data structures */
-    //for( i = 0; i < LIST_N; ++i ) {
-    //if (lists[i]->index) free (lists[i]->index);
-    //if (lists[i]->end_index) free (lists[i]->end_index);
-    //if (lists[i]->select.v) free (lists[i]->select.v);
-    //free (lists[i] );
-    //}
-
-    free( system );
-    free( control );
-    free( data );
-    free( workspace );
-    free( lists );
-    free( out_control );
-    free( mpi_data );
+    /* deallocate data structures */
+    sfree( system, "main::system" );
+    sfree( control, "main::control" );
+    sfree( data, "main::data" );
+    sfree( workspace, "main::workspace" );
+    sfree( lists, "main::lists" );
+    sfree( out_control, "main::out_control" );
+    sfree( mpi_data, "main::mpi_data" );
 
 #if defined(TEST_ENERGY) || defined(TEST_FORCES)
-//  Integrate_Results(control);
+//    Integrate_Results(control);
 #endif
 
 #if defined(DEBUG)
diff --git a/PG-PuReMD/src/reax_types.h b/PG-PuReMD/src/reax_types.h
index 4570d43f..2ca3be7c 100644
--- a/PG-PuReMD/src/reax_types.h
+++ b/PG-PuReMD/src/reax_types.h
@@ -292,7 +292,8 @@ enum restart_formats
 };
 
 /* ensemble type */
-enum ensembles {
+enum ensembles
+{
     NVE = 0,
     bNVT = 1,
     nhNVT = 2,
@@ -385,20 +386,15 @@ enum gcell_types
     NATIVE = 8,
 };
 
-/* ??? */
-enum atoms
+/* atom types as pertains to hydrogen bonding */
+enum hydrogen_bonding_atom_types
 {
-    C_ATOM = 0,
+    NON_H_BONDING_ATOM = -1,
     H_ATOM = 1,
-    O_ATOM = 2,
-    N_ATOM = 3,
-    S_ATOM = 4,
-    SI_ATOM = 5,
-    GE_ATOM = 6,
-    X_ATOM = 7,
+    H_BONDING_ATOM = 2,
 };
 
-/* ??? */
+/* trajectory file formats */
 enum traj_methods
 {
     REG_TRAJ = 0,
@@ -634,13 +630,13 @@ typedef struct
 {
     /* num. of global parameters, from the force field file */
     int n_global;
-    /* global parameters */
+    /* global parameters, see above mapping */
     real* l;
     /* van der Waals interaction type, values:
-     * 0: ???
-     * 1: ???
-     * 2: ???
-     * 3: ???
+     * 0: none (???)
+     * 1: shielded Morse, no inner-wall
+     * 2: inner wall, no shielding
+     * 3: inner wall + shielding
      * */
     int vdw_type;
 } global_parameters;
@@ -684,8 +680,12 @@ typedef struct
     real chi;
     /**/
     real eta;
-    /* 1 for H, 2 for hbonding atoms (O,S,P,N), 0 for others */
-    int  p_hbond;
+    /* info related to hydrogen bonding
+     * (values correspond to hydrogen_bonding_atom_types enum above):
+     *  0: non-hydrogen bonding atom
+     *  1: H atom
+     *  2: hydrogen bonding atom (e.g., O, S, P, N) */
+    int p_hbond;
 
     /* Line three in field file */
     /**/
@@ -1123,6 +1123,7 @@ typedef struct
     int bigN;
     /* num. hydrogen atoms */
     int numH;
+    int *d_numH;
     /**/
     int local_cap;
     /**/
diff --git a/PG-PuReMD/src/reset_tools.c b/PG-PuReMD/src/reset_tools.c
index 59dffae5..a605cc79 100644
--- a/PG-PuReMD/src/reset_tools.c
+++ b/PG-PuReMD/src/reset_tools.c
@@ -42,13 +42,13 @@ void Reset_Atoms( reax_system* system, control_params *control )
     reax_atom *atom;
 
     system->numH = 0;
-    if ( control->hbond_cut > 0 )
+    if ( control->hbond_cut > 0.0 )
     {
         //TODO
         for ( i = 0; i < system->N; ++i )
         {
             atom = &(system->my_atoms[i]);
-            //if( system->reax_param.sbp[ atom->type ].p_hbond == 1 )
+            //if( system->reax_param.sbp[ atom->type ].p_hbond == H_ATOM )
             atom->Hindex = system->numH++;
             //else atom->Hindex = -1;
         }
diff --git a/PG-PuReMD/src/traj.c b/PG-PuReMD/src/traj.c
index 9761494a..d561a45f 100644
--- a/PG-PuReMD/src/traj.c
+++ b/PG-PuReMD/src/traj.c
@@ -79,13 +79,16 @@ int Set_My_Trajectory_View( MPI_File trj, int offset, MPI_Datatype etype,
 
 
 int Reallocate_Output_Buffer( output_controls *out_control, int req_space,
-                              MPI_Comm comm )
+        MPI_Comm comm )
 {
     if ( out_control->buffer_len > 0 )
-        free( out_control->buffer );
+    {
+        sfree( out_control->buffer, "Reallocate_Output_Buffer::out_control->buffer" );
+    }
 
     out_control->buffer_len = req_space * SAFE_ZONE;
-    out_control->buffer = (char*) malloc(out_control->buffer_len * sizeof(char));
+    out_control->buffer = (char*) smalloc( out_control->buffer_len * sizeof(char),
+            "Reallocate_Output_Buffer::out_control->buffer" );
     if ( out_control->buffer == NULL )
     {
         fprintf( stderr,
@@ -99,7 +102,7 @@ int Reallocate_Output_Buffer( output_controls *out_control, int req_space,
 
 
 void Write_Skip_Line( output_controls *out_control, mpi_datatypes *mpi_data,
-                      int my_rank, int skip, int num_section )
+        int my_rank, int skip, int num_section )
 {
     MPI_Status status;
 
@@ -131,33 +134,41 @@ int Write_Header( reax_system *system, control_params *control,
 {
     int  num_hdr_lines, my_hdr_lines, buffer_req;
     MPI_Status status;
-    char ensembles[ens_N][25] =  { "NVE", "NVT", "fully flexible NPT",
-                                   "semi isotropic NPT", "isotropic NPT"
-                                 };
-    char reposition[3][25] = { "fit to periodic box", "CoM to center of box",
-                               "CoM to origin"
-                             };
-    char t_regime[3][25] = { "T-coupling only", "step-wise", "constant slope" };
-
-    char traj_methods[TF_N][10] = { "custom", "xyz" };
-    char atom_formats[8][40] =  { "none", "invalid", "invalid", "invalid",
-                                  "xyz_q",
-                                  "xyz_q_fxfyfz",
-                                  "xyz_q_vxvyvz",
-                                  "detailed_atom_info"
-                                };
-    char bond_formats[3][30] = { "none",
-                                 "basic_bond_info",
-                                 "detailed_bond_info"
-                               };
-    char angle_formats[2][30] = { "none", "basic_angle_info" };
+    char ensembles[ens_N][25] = {
+        "NVE", "NVT", "fully flexible NPT",
+        "semi isotropic NPT", "isotropic NPT",
+    };
+    char reposition[3][25] = {
+        "fit to periodic box", "CoM to center of box",
+        "CoM to origin",
+    };
+    char t_regime[3][25] = {
+        "T-coupling only", "step-wise", "constant slope",
+    };
+
+    char traj_methods[TF_N][10] = {
+        "custom", "xyz",
+    };
+    char atom_formats[8][40] =  {
+        "none", "invalid", "invalid", "invalid",
+        "xyz_q", "xyz_q_fxfyfz", "xyz_q_vxvyvz",
+        "detailed_atom_info",
+    };
+    char bond_formats[3][30] = {
+        "none", "basic_bond_info", "detailed_bond_info",
+    };
+    char angle_formats[2][30] = {
+        "none", "basic_angle_info",
+    };
 
     /* set header lengths */
     num_hdr_lines = NUM_HEADER_LINES;
     my_hdr_lines = num_hdr_lines * ( system->my_rank == MASTER_NODE );
     buffer_req = my_hdr_lines * HEADER_LINE_LEN;
     if ( buffer_req > out_control->buffer_len * DANGER_ZONE )
+    {
         Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world );
+    }
 
     /* only the master node writes into trajectory header */
     if ( system->my_rank == MASTER_NODE )
@@ -357,7 +368,7 @@ int Write_Header( reax_system *system, control_params *control,
 
 
 int Write_Init_Desc( reax_system *system, control_params *control,
-                     output_controls *out_control, mpi_datatypes *mpi_data )
+        output_controls *out_control, mpi_datatypes *mpi_data )
 {
     int i, me, np, cnt, buffer_len, buffer_req;
     reax_atom *p_atom;
@@ -372,11 +383,18 @@ int Write_Init_Desc( reax_system *system, control_params *control,
                      system->bigN * INIT_DESC_LEN, system->bigN );
 
     if ( out_control->traj_method == REG_TRAJ && me == MASTER_NODE )
+    {
         buffer_req = system->bigN * INIT_DESC_LEN + 1;
-    else buffer_req = system->n * INIT_DESC_LEN + 1;
+    }
+    else
+    {
+        buffer_req = system->n * INIT_DESC_LEN + 1;
+    }
 
     if ( buffer_req > out_control->buffer_len * DANGER_ZONE )
+    {
         Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world );
+    }
 
     out_control->line[0] = 0;
     out_control->buffer[0] = 0;
@@ -425,8 +443,7 @@ int Write_Init_Desc( reax_system *system, control_params *control,
 
 
 int Init_Traj( reax_system *system, control_params *control,
-               output_controls *out_control, mpi_datatypes *mpi_data,
-               char *msg )
+        output_controls *out_control, mpi_datatypes *mpi_data, char *msg )
 {
     char fname[MAX_STR];
     int  atom_line_len[ NR_OPT_ATOM ] = { 0, 0, 0, 0,
@@ -450,7 +467,8 @@ int Init_Traj( reax_system *system, control_params *control,
     out_control->write_angles = ( out_control->angle_line_len ? 1 : 0 );
 
     /* allocate line & buffer space */
-    out_control->line = (char*) calloc( MAX_TRJ_LINE_LEN + 1, sizeof(char) );
+    out_control->line = (char*) scalloc( MAX_TRJ_LINE_LEN + 1, sizeof(char),
+           "Init_Traj::out_control->line" );
     out_control->buffer_len = 0;
     out_control->buffer = NULL;
 
@@ -527,8 +545,8 @@ int Init_Traj( reax_system *system, control_params *control,
 
 
 int Write_Frame_Header( reax_system *system, control_params *control,
-                        simulation_data *data, output_controls *out_control,
-                        mpi_datatypes *mpi_data )
+        simulation_data *data, output_controls *out_control,
+        mpi_datatypes *mpi_data )
 {
     int me, num_frm_hdr_lines, my_frm_hdr_lines, buffer_req;
     MPI_Status status;
@@ -539,7 +557,9 @@ int Write_Frame_Header( reax_system *system, control_params *control,
     my_frm_hdr_lines = num_frm_hdr_lines * ( me == MASTER_NODE );
     buffer_req = my_frm_hdr_lines * HEADER_LINE_LEN;
     if ( buffer_req > out_control->buffer_len * DANGER_ZONE )
+    {
         Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world );
+    }
 
     /* only the master node writes into trajectory header */
     if ( me == MASTER_NODE )
@@ -666,27 +686,33 @@ int Write_Frame_Header( reax_system *system, control_params *control,
 }
 
 
-
 int Write_Atoms( reax_system *system, control_params *control,
-                 output_controls *out_control, mpi_datatypes *mpi_data )
+        output_controls *out_control, mpi_datatypes *mpi_data )
 {
     int i, me, np, line_len, buffer_len, buffer_req, cnt;
-    MPI_Status  status;
-    reax_atom  *p_atom;
+    MPI_Status status;
+    reax_atom *p_atom;
 
     me = system->my_rank;
     np = control->nprocs;
     line_len = out_control->atom_line_len;
 
     Write_Skip_Line( out_control, mpi_data, me,
-                     system->bigN * line_len, system->bigN );
+            system->bigN * line_len, system->bigN );
 
     if ( out_control->traj_method == REG_TRAJ && me == MASTER_NODE )
+    {
         buffer_req = system->bigN * line_len + 1;
-    else buffer_req = system->n * line_len + 1;
+    }
+    else
+    {
+        buffer_req = system->n * line_len + 1;
+    }
 
     if ( buffer_req > out_control->buffer_len * DANGER_ZONE )
+    {
         Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world );
+    }
 
     /* fill in buffer */
     out_control->line[0] = 0;
@@ -761,8 +787,8 @@ int Write_Atoms( reax_system *system, control_params *control,
 }
 
 
-int Write_Bonds(reax_system *system, control_params *control, reax_list *bonds,
-                output_controls *out_control, mpi_datatypes *mpi_data)
+int Write_Bonds( reax_system *system, control_params *control, reax_list *bonds,
+        output_controls *out_control, mpi_datatypes *mpi_data )
 {
     int i, j, pj, me, np;
     int my_bonds, num_bonds;
@@ -790,11 +816,18 @@ int Write_Bonds(reax_system *system, control_params *control, reax_list *bonds,
     Write_Skip_Line( out_control, mpi_data, me, num_bonds * line_len, num_bonds );
 
     if ( out_control->traj_method == REG_TRAJ && me == MASTER_NODE )
+    {
         buffer_req = num_bonds * line_len + 1;
-    else buffer_req = my_bonds * line_len + 1;
+    }
+    else
+    {
+        buffer_req = my_bonds * line_len + 1;
+    }
 
     if ( buffer_req > out_control->buffer_len * DANGER_ZONE )
+    {
         Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world );
+    }
 
     /* fill in the buffer */
     my_bonds = 0;
@@ -868,8 +901,8 @@ int Write_Bonds(reax_system *system, control_params *control, reax_list *bonds,
 
 
 int Write_Angles( reax_system *system, control_params *control,
-                  reax_list *bonds, reax_list *thb_intrs,
-                  output_controls *out_control, mpi_datatypes *mpi_data )
+        reax_list *bonds, reax_list *thb_intrs,
+        output_controls *out_control, mpi_datatypes *mpi_data )
 {
     int i, j, k, pi, pk, me, np;
     int my_angles, num_angles;
@@ -909,11 +942,18 @@ int Write_Angles( reax_system *system, control_params *control,
     Write_Skip_Line( out_control, mpi_data, me, num_angles * line_len, num_angles );
 
     if ( out_control->traj_method == REG_TRAJ && me == MASTER_NODE )
+    {
         buffer_req = num_angles * line_len + 1;
-    else buffer_req = my_angles * line_len + 1;
+    }
+    else
+    {
+        buffer_req = my_angles * line_len + 1;
+    }
 
     if ( buffer_req > out_control->buffer_len * DANGER_ZONE )
+    {
         Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world );
+    }
 
     /* fill in the buffer */
     my_angles = 0;
@@ -982,8 +1022,8 @@ int Write_Angles( reax_system *system, control_params *control,
 
 
 int Append_Frame( reax_system *system, control_params *control,
-                  simulation_data *data, reax_list **lists,
-                  output_controls *out_control, mpi_datatypes *mpi_data )
+        simulation_data *data, reax_list **lists,
+        output_controls *out_control, mpi_datatypes *mpi_data )
 {
 #if defined(DEBUG_FOCUS)
     fprintf( stderr, "p%d: appending frame %d\n", system->my_rank, data->step );
@@ -1028,12 +1068,16 @@ int Append_Frame( reax_system *system, control_params *control,
 int End_Traj( int my_rank, output_controls *out_control )
 {
     if ( out_control->traj_method == MPI_TRAJ )
+    {
         MPI_File_close( &(out_control->trj) );
+    }
     else if ( my_rank == MASTER_NODE )
+    {
         fclose( out_control->strj );
+    }
 
-    free( out_control->buffer );
-    free( out_control->line );
+    sfree( out_control->buffer, "End_Traj::out_control->buffer" );
+    sfree( out_control->line, "End_Traj::out_control->line" );
 
     return SUCCESS;
 }
diff --git a/PG-PuReMD/src/traj.h b/PG-PuReMD/src/traj.h
index 7ce82e0f..8f09c4a7 100644
--- a/PG-PuReMD/src/traj.h
+++ b/PG-PuReMD/src/traj.h
@@ -22,8 +22,10 @@
 #ifndef __TRAJ_H__
 #define __TRAJ_H__
 
+
 #include "reax_types.h"
 
+
 #define MAX_TRJ_LINE_LEN     120
 #define MAX_TRJ_BUFFER_SIZE  (MAX_TRJ_LINE_LEN * 100)
 
@@ -62,16 +64,28 @@
 #define ANGLE_BASIC "%9d%9d%9d%10.3f\n" // Atom1 Atom2 Atom3 Theta
 #define ANGLE_BASIC_LEN 38
 
-enum ATOM_LINE_OPTS  { OPT_NOATOM = 0, OPT_ATOM_BASIC = 4, OPT_ATOM_wF = 5, OPT_ATOM_wV = 6, OPT_ATOM_FULL = 7, NR_OPT_ATOM = 8 };
-enum BOND_LINE_OPTS  { OPT_NOBOND, OPT_BOND_BASIC, OPT_BOND_FULL, NR_OPT_BOND };
-enum ANGLE_LINE_OPTS { OPT_NOANGLE, OPT_ANGLE_BASIC, NR_OPT_ANGLE };
 
+enum ATOM_LINE_OPTS
+{
+    OPT_NOATOM = 0, OPT_ATOM_BASIC = 4, OPT_ATOM_wF = 5, OPT_ATOM_wV = 6,
+    OPT_ATOM_FULL = 7, NR_OPT_ATOM = 8,
+};
+enum BOND_LINE_OPTS
+{
+    OPT_NOBOND = 0, OPT_BOND_BASIC = 1, OPT_BOND_FULL = 2, NR_OPT_BOND = 3,
+};
+enum ANGLE_LINE_OPTS
+{
+    OPT_NOANGLE = 0, OPT_ANGLE_BASIC = 1, NR_OPT_ANGLE = 2,
+};
+
+
+int Init_Traj( reax_system*, control_params*, output_controls*, mpi_datatypes*, char* );
+
+int End_Traj( int, output_controls* );
 
-int  Init_Traj( reax_system*, control_params*, output_controls*,
-                mpi_datatypes*, char* );
-int  End_Traj( int, output_controls* );
+int Append_Frame( reax_system*, control_params*, simulation_data*, reax_list**,
+        output_controls*, mpi_datatypes* );
 
-int  Append_Frame( reax_system*, control_params*, simulation_data*,
-                   reax_list**, output_controls*, mpi_datatypes* );
 
 #endif
-- 
GitLab