diff --git a/PG-PuReMD/src/cuda/cuda_forces.cu b/PG-PuReMD/src/cuda/cuda_forces.cu
index e8895e60e8743310bf7713c8851c75a1b629f1a9..703dcbcff714b54e8332a925a6236147e65ed86b 100644
--- a/PG-PuReMD/src/cuda/cuda_forces.cu
+++ b/PG-PuReMD/src/cuda/cuda_forces.cu
@@ -262,7 +262,7 @@ CUDA_GLOBAL void k_init_distance( reax_atom *my_atoms, reax_list far_nbr_list, i
 CUDA_GLOBAL void k_init_distance_opt( reax_atom *my_atoms, reax_list far_nbr_list, int N )
 {
     int j, pj, start_i, end_i, thread_id, warp_id, lane_id, itr;
-    __shared__ rvec x_i;
+    rvec x_i;
 
     thread_id = blockIdx.x * blockDim.x + threadIdx.x;
     warp_id = thread_id >> 5;
@@ -275,11 +275,7 @@ CUDA_GLOBAL void k_init_distance_opt( reax_atom *my_atoms, reax_list far_nbr_lis
     lane_id = thread_id & 0x0000001F; 
     start_i = Start_Index( warp_id, &far_nbr_list );
     end_i = End_Index( warp_id, &far_nbr_list );
-    if ( lane_id == 0 )
-    {
-        rvec_Copy( x_i, my_atoms[warp_id].x );
-    }
-    __syncthreads( );
+    rvec_Copy( x_i, my_atoms[warp_id].x );
 
     /* update distance and displacement vector between atoms i and j (i-j) */
     for ( itr = 0, pj = start_i + lane_id; itr < (end_i - start_i + 0x0000001F) >> 5; ++itr )
@@ -1762,14 +1758,15 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
         simulation_data *data, storage *workspace, 
         reax_list **lists, output_controls *out_control )
 {
-    int update_energy, ret;
+    int ret;
 //    int hbs, hnbrs_blocks;
     int *thbody;
     static int compute_bonded_part1 = FALSE;
+#if !defined(CUDA_ACCUM_ATOMIC)
+    int update_energy;
     real *spad;
     rvec *rvec_spad;
 
-#if !defined(CUDA_ACCUM_ATOMIC)
     cuda_check_malloc( &workspace->scratch, &workspace->scratch_size,
             MAX( sizeof(real) * system->n,
                 MAX( sizeof(real) * 3 * system->n,
@@ -1778,9 +1775,10 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
                             (sizeof(real) + sizeof(rvec)) * system->n + sizeof(rvec) * control->blocks )))),
             "Cuda_Compute_Bonded_Forces::workspace->scratch" );
     spad = (real *) workspace->scratch;
-#endif
     update_energy = (out_control->energy_update_freq > 0
             && data->step % out_control->energy_update_freq == 0) ? TRUE : FALSE;
+#endif
+
     ret = SUCCESS;
 
     if ( compute_bonded_part1 == FALSE )
@@ -1888,8 +1886,10 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
             "Cuda_Compute_Bonded_Forces::workspace->scratch" );
 
     thbody = (int *) workspace->scratch;
+#if !defined(CUDA_ACCUM_ATOMIC)
     /* in case scratch gets reallocated above, reassign scratch pointer */
     spad = (real *) workspace->scratch;
+#endif
 
     ret = Cuda_Estimate_Storage_Three_Body( system, control, data, workspace,
             lists, thbody );