Skip to content
Snippets Groups Projects
Commit 74f5c1c6 authored by Kurt A. O'Hearn's avatar Kurt A. O'Hearn
Browse files

PG-PuReMD: fix atomic force reductions and reduce the number of atomic operations.

parent 7c451993
No related branches found
No related tags found
No related merge requests found
......@@ -2120,16 +2120,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control,
}
void Cuda_Compute_NonBonded_Forces( reax_system *system, control_params *control,
simulation_data *data, storage *workspace,
reax_list **lists, output_controls *out_control,
mpi_datatypes *mpi_data )
{
Cuda_NonBonded_Energy( system, control, workspace, data, lists, out_control );
}
void Cuda_Compute_Total_Force( reax_system *system, control_params *control,
static void Cuda_Compute_Total_Force( reax_system *system, control_params *control,
simulation_data *data, storage *workspace,
reax_list **lists, mpi_datatypes *mpi_data )
{
......@@ -2237,7 +2228,7 @@ extern "C" int Cuda_Compute_Forces( reax_system *system, control_params *control
#endif
Cuda_Compute_NonBonded_Forces( system, control, data, workspace,
lists, out_control, mpi_data );
lists, out_control );
#if defined(LOG_PERFORMANCE)
cudaEventRecord( time_event[4] );
......
......@@ -51,7 +51,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1( reax_atom *my_atoms, single_body_par
rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk;
rvec dvec_jk, temp, ext_press_l;
#if defined(CUDA_ACCUM_ATOMIC)
rvec f_i_l, f_j_l, f_k_l;
rvec f_j_l;
#endif
hbond_parameters *hbp;
bond_order_data *bo_ij;
......@@ -69,9 +69,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1( reax_atom *my_atoms, single_body_par
e_hb_l = 0.0;
rvec_MakeZero( ext_press_l );
#if defined(CUDA_ACCUM_ATOMIC)
rvec_MakeZero( f_i_l );
rvec_MakeZero( f_j_l );
rvec_MakeZero( f_k_l );
#endif
/* discover the Hydrogen bonds between i-j-k triplets.
......@@ -185,13 +183,13 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1( reax_atom *my_atoms, single_body_par
rvec_ScaledAdd( phbond_jk->hb_f, CEhb3 / r_jk, dvec_jk );
#else
/* dcos terms */
rvec_ScaledAdd( f_i_l, CEhb2, dcos_theta_di );
atomic_rvecScaledAdd( workspace.f[i], CEhb2, dcos_theta_di );
rvec_ScaledAdd( f_j_l, CEhb2, dcos_theta_dj );
rvec_ScaledAdd( f_k_l, CEhb2, dcos_theta_dk );
atomic_rvecScaledAdd( workspace.f[k], CEhb2, dcos_theta_dk );
/* dr terms */
rvec_ScaledAdd( f_j_l, -1.0 * CEhb3 / r_jk, dvec_jk );
rvec_ScaledAdd( f_k_l, CEhb3 / r_jk, dvec_jk );
atomic_rvecScaledAdd( workspace.f[k], CEhb3 / r_jk, dvec_jk );
#endif
}
else
......@@ -226,7 +224,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1( reax_atom *my_atoms, single_body_par
* derivatives are added directly into pressure vector/tensor */
/* dcos terms */
rvec_Scale( temp, CEhb2, dcos_theta_di );
rvec_Add( f_i_l, temp );
atomic_rvecAdd( workspace.f[i], temp );
rvec_iMultiply( temp, pbond_ij->rel_box, temp );
rvec_Add( ext_press_l, temp );
......@@ -235,7 +233,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1( reax_atom *my_atoms, single_body_par
ivec_Scale( rel_jk, hbond_list.hbond_list[pk].scl,
far_nbr_list.far_nbr_list.rel_box[nbr_jk] );
rvec_Scale( temp, CEhb2, dcos_theta_dk );
rvec_Add( f_k_l, temp );
atomic_rvecAdd( workspace.f[k], temp );
rvec_iMultiply( temp, rel_jk, temp );
rvec_Add( ext_press_l, temp );
......@@ -243,7 +241,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1( reax_atom *my_atoms, single_body_par
rvec_ScaledAdd( f_j_l, -1.0 * CEhb3 / r_jk, dvec_jk );
rvec_Scale( temp, CEhb3 / r_jk, dvec_jk );
rvec_Add( f_k_l, temp );
atomic_rvecAdd( workspace.f[k], temp );
rvec_iMultiply( temp, rel_jk, temp );
rvec_Add( ext_press_l, temp );
#endif
......@@ -260,12 +258,11 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1( reax_atom *my_atoms, single_body_par
#if !defined(CUDA_ACCUM_ATOMIC)
/* write conflicts for accumulating partial forces resolved by subsequent kernels */
rvecCopy( workspace.f[j], f_j_l );
e_hb_g[j] = e_hb_l;
rvecCopy( ext_press_g[j], ext_press_l );
#else
atomic_rvecAdd( workspace.f[i], f_i_l );
atomic_rvecAdd( workspace.f[j], f_j_l );
atomic_rvecAdd( workspace.f[k], f_k_l );
atomicAdd( (double *) e_hb_g, (double) e_hb_l );
atomic_rvecAdd( *ext_press_g, ext_press_l );
#endif
......@@ -289,11 +286,9 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1_opt( reax_atom *my_atoms, single_body
real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2;
real e_hb, e_hb_l, exp_hb2, exp_hb3, CEhb1, CEhb1_l, CEhb2, CEhb3;
rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk;
rvec dvec_jk, temp, ext_press_l;
rvec dvec_jk, temp, f_j_l, ext_press_l;
#if !defined(CUDA_ACCUM_ATOMIC)
rvec f_l, hb_f_l;
#else
rvec f_i_l, f_j_l, f_k_l;
rvec hb_f_l;
#endif
hbond_parameters *hbp;
bond_order_data *bo_ij;
......@@ -322,14 +317,8 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1_opt( reax_atom *my_atoms, single_body
* so in this function i->X, j->H, k->Z when we map
* variables onto the ones in the handout.*/
e_hb_l = 0.0;
rvec_MakeZero( ext_press_l );
#if !defined(CUDA_ACCUM_ATOMIC)
rvec_MakeZero( f_l );
#else
rvec_MakeZero( f_i_l );
rvec_MakeZero( f_j_l );
rvec_MakeZero( f_k_l );
#endif
rvec_MakeZero( ext_press_l );
/* j has to be of type H */
if ( sbp[ my_atoms[j].type ].p_hbond == H_ATOM )
......@@ -435,21 +424,21 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1_opt( reax_atom *my_atoms, single_body
#if !defined(CUDA_ACCUM_ATOMIC)
/* dcos terms */
rvec_ScaledAdd( hb_f_l, CEhb2, dcos_theta_di );
rvec_ScaledAdd( f_l, CEhb2, dcos_theta_dj );
rvec_ScaledAdd( f_j_l, CEhb2, dcos_theta_dj );
rvec_ScaledAdd( phbond_jk->hb_f, CEhb2, dcos_theta_dk );
/* dr terms */
rvec_ScaledAdd( f_l, -1.0 * CEhb3 / r_jk, dvec_jk );
rvec_ScaledAdd( f_j_l, -1.0 * CEhb3 / r_jk, dvec_jk );
rvec_ScaledAdd( phbond_jk->hb_f, CEhb3 / r_jk, dvec_jk );
#else
/* dcos terms */
rvec_ScaledAdd( f_i_l, CEhb2, dcos_theta_di );
atomic_rvecScaledAdd( workspace.f[i], CEhb2, dcos_theta_di );
rvec_ScaledAdd( f_j_l, CEhb2, dcos_theta_dj );
rvec_ScaledAdd( f_k_l, CEhb2, dcos_theta_dk );
atomic_rvecScaledAdd( workspace.f[k], CEhb2, dcos_theta_dk );
/* dr terms */
rvec_ScaledAdd( f_j_l, -1.0 * CEhb3 / r_jk, dvec_jk );
rvec_ScaledAdd( f_k_l, CEhb3 / r_jk, dvec_jk );
atomic_rvecScaledAdd( workspace.f[k], CEhb3 / r_jk, dvec_jk );
#endif
}
else
......@@ -484,7 +473,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1_opt( reax_atom *my_atoms, single_body
* derivatives are added directly into pressure vector/tensor */
/* dcos terms */
rvec_Scale( temp, CEhb2, dcos_theta_di );
rvec_Add( f_i_l, temp );
atomic_rvecAdd( workspace.f[i], temp );
rvec_iMultiply( temp, pbond_ij->rel_box, temp );
rvec_Add( ext_press_l, temp );
......@@ -493,7 +482,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1_opt( reax_atom *my_atoms, single_body
ivec_Scale( rel_jk, hbond_list.hbond_list[pk].scl,
far_nbr_list.far_nbr_list.rel_box[nbr_jk] );
rvec_Scale( temp, CEhb2, dcos_theta_dk );
rvec_Add( f_k_l, temp );
atomic_rvecAdd( workspace.f[k], temp );
rvec_iMultiply( temp, rel_jk, temp );
rvec_Add( ext_press_l, temp );
......@@ -501,7 +490,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1_opt( reax_atom *my_atoms, single_body
rvec_ScaledAdd( f_j_l, -1.0 * CEhb3 / r_jk, dvec_jk );
rvec_Scale( temp, CEhb3 / r_jk, dvec_jk );
rvec_Add( f_k_l, temp );
atomic_rvecAdd( workspace.f[k], temp );
rvec_iMultiply( temp, rel_jk, temp );
rvec_Add( ext_press_l, temp );
#endif
......@@ -539,21 +528,9 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1_opt( reax_atom *my_atoms, single_body
/* warp-level sums using registers within a warp */
for ( offset = warpSize >> 1; offset > 0; offset /= 2 )
{
#if !defined(CUDA_ACCUM_ATOMIC)
f_l[0] += __shfl_down_sync( mask, f_l[0], offset );
f_l[1] += __shfl_down_sync( mask, f_l[1], offset );
f_l[2] += __shfl_down_sync( mask, f_l[2], offset );
#else
f_i_l[0] += __shfl_down_sync( mask, f_i_l[0], offset );
f_i_l[1] += __shfl_down_sync( mask, f_i_l[1], offset );
f_i_l[2] += __shfl_down_sync( mask, f_i_l[2], offset );
f_j_l[0] += __shfl_down_sync( mask, f_j_l[0], offset );
f_j_l[1] += __shfl_down_sync( mask, f_j_l[1], offset );
f_j_l[2] += __shfl_down_sync( mask, f_j_l[2], offset );
f_k_l[0] += __shfl_down_sync( mask, f_k_l[0], offset );
f_k_l[1] += __shfl_down_sync( mask, f_k_l[1], offset );
f_k_l[2] += __shfl_down_sync( mask, f_k_l[2], offset );
#endif
e_hb_l += __shfl_down_sync( mask, e_hb_l, offset );
ext_press_l[0] += __shfl_down_sync( mask, ext_press_l[0], offset );
ext_press_l[1] += __shfl_down_sync( mask, ext_press_l[1], offset );
......@@ -564,13 +541,11 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_Part1_opt( reax_atom *my_atoms, single_body
if ( lane_id == 0 )
{
#if !defined(CUDA_ACCUM_ATOMIC)
rvec_Add( workspace.f[j], f_l );
rvec_Add( workspace.f[j], f_j_l );
e_hb_g[j] = e_hb_l;
rvecCopy( ext_press_g[j], ext_press_l );
#else
atomic_rvecAdd( workspace.f[i], f_i_l );
atomic_rvecAdd( workspace.f[j], f_j_l );
atomic_rvecAdd( workspace.f[k], f_k_l );
atomicAdd( (double *) e_hb_g, (double) e_hb_l );
atomic_rvecAdd( *ext_press_g, ext_press_l );
#endif
......
......@@ -202,11 +202,6 @@ CUDA_GLOBAL void Cuda_Atom_Energy_Part1( reax_atom *my_atoms, global_parameters
workspace.CdDelta[i] += CEover3; // OvCoor - 2nd term
workspace.CdDelta[i] += CEunder3; // UnCoor - 1st term
#if defined(TEST_FORCES)
Add_dDelta( system, lists, i, CEover3, workspace.f_ov ); // OvCoor 2nd
Add_dDelta( system, lists, i, CEunder3, workspace.f_un ); // UnCoor 1st
#endif
for ( pj = Start_Index(i, &bond_list); pj < End_Index(i, &bond_list); ++pj )
{
pbond_ij = &bond_list.bond_list[pj];
......@@ -240,55 +235,6 @@ CUDA_GLOBAL void Cuda_Atom_Energy_Part1( reax_atom *my_atoms, global_parameters
* workspace.Delta_lp_temp[j]); // UnCoor-2b
bo_ij->Cdbopi2 += CEunder4 * (workspace.Delta[j] - dfvl
* workspace.Delta_lp_temp[j]); // UnCoor-2b
#if defined(TEST_ENERGY)
/* fprintf( out_control->eov, "%6d%12.6f\n",
workspace.reverse_map[j],
// CEover1 * twbp->p_ovun1 * twbp->De_s, CEover3,
CEover4 * (1.0 - workspace.dDelta_lp[j]) *
(bo_ij->BO_pi + bo_ij->BO_pi2)
*/// /*CEover4 * (workspace.Delta[j]-workspace.Delta_lp[j])*/);
// fprintf( out_control->eov, "%6d%12.6f\n",
// fprintf( out_control->eov, "%6d%24.15e\n",
// system->my_atoms[j].orig_id,
// CEover1 * twbp->p_ovun1 * twbp->De_s, CEover3,
// CEover4 * (1.0 - workspace.dDelta_lp[j]) *
// (bo_ij->BO_pi + bo_ij->BO_pi2)
// /*CEover4 * (workspace.Delta[j]-workspace.Delta_lp[j])*/);
// CEunder4 * (1.0 - workspace.dDelta_lp[j]) *
// (bo_ij->BO_pi + bo_ij->BO_pi2),
// CEunder4 * (workspace.Delta[j] - workspace.Delta_lp[j]) );
#endif
#if defined(TEST_FORCES)
Add_dBO( system, lists, i, pj, CEover1 * twbp->p_ovun1 * twbp->De_s,
workspace.f_ov ); // OvCoor - 1st term
Add_dDelta( system, lists, j,
CEover4 * (1.0 - dfvl*workspace.dDelta_lp[j]) *
(bo_ij->BO_pi + bo_ij->BO_pi2),
workspace.f_ov ); // OvCoor - 3a
Add_dBOpinpi2( system, lists, i, pj,
CEover4 * (workspace.Delta[j] -
dfvl * workspace.Delta_lp_temp[j]),
CEover4 * (workspace.Delta[j] -
dfvl * workspace.Delta_lp_temp[j]),
workspace.f_ov, workspace.f_ov ); // OvCoor - 3b
Add_dDelta( system, lists, j,
CEunder4 * (1.0 - dfvl*workspace.dDelta_lp[j]) *
(bo_ij->BO_pi + bo_ij->BO_pi2),
workspace.f_un ); // UnCoor - 2a
Add_dBOpinpi2( system, lists, i, pj,
CEunder4 * (workspace.Delta[j] -
dfvl * workspace.Delta_lp_temp[j]),
CEunder4 * (workspace.Delta[j] -
dfvl * workspace.Delta_lp_temp[j]),
workspace.f_un, workspace.f_un ); // UnCoor - 2b
#endif
}
}
......
......@@ -35,7 +35,7 @@
CUDA_GLOBAL void k_compute_polarization_energy( reax_atom *my_atoms,
single_body_parameters *sbp, int n, real *data_e_pol )
single_body_parameters *sbp, int n, real *e_pol_g )
{
int i, type_i;
real q;
......@@ -50,7 +50,7 @@ CUDA_GLOBAL void k_compute_polarization_energy( reax_atom *my_atoms,
q = my_atoms[i].q;
type_i = my_atoms[i].type;
data_e_pol[i] = KCALpMOL_to_EV * (sbp[type_i].chi * q
e_pol_g[i] = KCALpMOL_to_EV * (sbp[type_i].chi * q
+ (sbp[type_i].eta / 2.0) * SQR(q));
}
......@@ -59,7 +59,7 @@ CUDA_GLOBAL void k_compute_polarization_energy( reax_atom *my_atoms,
CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
two_body_parameters *tbp, global_parameters gp, control_params *control,
storage workspace, reax_list far_nbr_list, int n, int num_atom_types,
real *data_e_vdW, real *data_e_ele, rvec *data_ext_press )
real *e_vdW_g, real *e_ele_g, rvec *ext_press_g )
{
int i, j, pj;
int start_i, end_i, orig_i, orig_j;
......@@ -69,8 +69,8 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
real r_ij, fn13, exp1, exp2, e_base, de_base;
real Tap, dTap, dfn13, CEvd, CEclmb;
real dr3gamij_1, dr3gamij_3;
real e_ele, e_vdW, e_core, de_core, e_clb, de_clb;
rvec temp, ext_press;
real e_ele_l, e_vdW_l, e_core, de_core, e_clb, de_clb;
rvec temp, f_i_l, ext_press_l;
two_body_parameters *twbp;
i = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -82,9 +82,10 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
p_vdW1 = gp.l[28];
p_vdW1i = 1.0 / p_vdW1;
e_vdW = 0.0;
e_ele = 0.0;
rvec_MakeZero( ext_press );
e_vdW_l = 0.0;
e_ele_l = 0.0;
rvec_MakeZero( f_i_l );
rvec_MakeZero( ext_press_l );
start_i = Start_Index( i, &far_nbr_list );
end_i = End_Index( i, &far_nbr_list );
......@@ -137,7 +138,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
exp2 = EXP( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) );
e_base = twbp->D * (exp1 - 2.0 * exp2);
e_vdW += self_coef * (e_base * Tap);
e_vdW_l += self_coef * (e_base * Tap);
dfn13 = POW( r_ij, p_vdW1 - 1.0 )
* POW( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0 );
......@@ -150,7 +151,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
exp2 = EXP( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) );
e_base = twbp->D * (exp1 - 2.0 * exp2);
e_vdW += self_coef * (e_base * Tap);
e_vdW_l += self_coef * (e_base * Tap);
de_base = (twbp->D * twbp->alpha / twbp->r_vdW) * (exp2 - exp1);
}
......@@ -159,7 +160,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
if ( gp.vdw_type == 2 || gp.vdw_type == 3 )
{
e_core = twbp->ecore * EXP( twbp->acore * (1.0 - (r_ij / twbp->rcore)) );
e_vdW += self_coef * (e_core * Tap);
e_vdW_l += self_coef * (e_core * Tap);
de_core = -(twbp->acore / twbp->rcore) * e_core;
}
......@@ -172,28 +173,17 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
CEvd = self_coef * ( (de_base + de_core) * Tap
+ (e_base + e_core) * dTap );
#if defined(DEBUG_FOCUS)
printf( "%6d%6d%24.12f%24.12f%24.12f%24.12f\n",
i + 1, j + 1,
e_base, de_base, e_core, de_core );
#endif
/* Coulomb Calculations */
dr3gamij_1 = r_ij * r_ij * r_ij
+ POW( twbp->gamma, -3.0 );
dr3gamij_3 = POW( dr3gamij_1, 1.0 / 3.0 );
e_clb = C_ELE * (my_atoms[i].q * my_atoms[j].q) / dr3gamij_3;
e_ele += self_coef * (e_clb * Tap);
e_ele_l += self_coef * (e_clb * Tap);
de_clb = -C_ELE * (my_atoms[i].q * my_atoms[j].q)
* (r_ij * r_ij) / POW( dr3gamij_1, 4.0 / 3.0 );
CEclmb = self_coef * (de_clb * Tap + e_clb * dTap);
#if defined(DEBUG_FOCUS)
printf( "%6d%6d%24.12f%24.12f\n",
i + 1, j + 1, e_clb, de_clb );
#endif
if ( control->virial == 0 )
{
if ( i < j )
......@@ -204,7 +194,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
{
rvec_Scale( temp, (CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] );
}
atomic_rvecAdd( workspace.f[i], temp );
rvec_Add( f_i_l, temp );
rvec_Scale( temp, -1.0, temp );
atomic_rvecAdd( workspace.f[j], temp );
}
......@@ -223,48 +213,29 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
rvec_Scale( temp,
(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] );
}
atomic_rvecAdd( workspace.f[i], temp );
rvec_Add( f_i_l, temp );
rvec_Scale( temp, -1.0, temp );
atomic_rvecAdd( workspace.f[j], temp );
rvec_iMultiply( ext_press,
rvec_iMultiply( temp,
far_nbr_list.far_nbr_list.rel_box[pj], temp );
rvec_Add( data_ext_press[i], ext_press );
rvec_Add( ext_press_l, temp );
}
#if defined(TEST_ENERGY)
// fprintf( out_control->evdw,
// "%12.9f%12.9f%12.9f%12.9f%12.9f%12.9f%12.9f%12.9f\n",
// workspace.Tap[7], workspace.Tap[6], workspace.Tap[5],
// workspace.Tap[4], workspace.Tap[3], workspace.Tap[2],
// workspace.Tap[1], Tap );
//fprintf( out_control->evdw, "%6d%6d%24.15e%24.15e%24.15e\n",
fprintf( out_control->evdw, "%6d%6d%12.4f%12.4f%12.4f\n",
my_atoms[i].orig_id, my_atoms[j].orig_id,
r_ij, e_vdW, data->my_en.e_vdW );
//fprintf(out_control->ecou,"%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n",
fprintf( out_control->ecou, "%6d%6d%12.4f%12.4f%12.4f%12.4f%12.4f\n",
my_atoms[i].orig_id, my_atoms[j].orig_id,
r_ij, my_atoms[i].q, my_atoms[j].q,
e_ele, data->my_en.e_ele );
#endif
#if defined(TEST_FORCES)
rvec_ScaledAdd( workspace.f_vdw[i], -CEvd,
far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f_vdw[j], +CEvd,
far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f_ele[i], -CEclmb,
far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f_ele[j], +CEclmb,
far_nbr_list.far_nbr_list.dvec[pj] );
#endif
}
}
__syncthreads( );
data_e_vdW[i] = e_vdW;
data_e_ele[i] = e_ele;
atomic_rvecAdd( workspace.f[i], f_i_l );
#if !defined(CUDA_ACCUM_ATOMIC)
e_vdW_g[i] = e_vdW_l;
e_ele_g[i] = e_ele_l;
if ( control->virial == 1 )
rvec_Copy( ext_press_g[j], ext_press_l );
#else
atomicAdd( (double *) e_vdW_g, (double) e_vdW_l );
atomicAdd( (double *) e_ele_g, (double) e_ele_l );
if ( control->virial == 1 )
atomic_rvecAdd( *ext_press_g, ext_press_l );
#endif
}
......@@ -272,7 +243,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms,
CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
two_body_parameters *tbp, global_parameters gp, control_params *control,
storage workspace, reax_list far_nbr_list, int n, int num_atom_types,
real *data_e_vdW, real *data_e_ele, rvec *data_ext_press )
real *e_vdW_g, real *e_ele_g, rvec *ext_press_g )
{
int i, j, pj;
int start_i, end_i, orig_i, orig_j;
......@@ -282,12 +253,10 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
real r_ij, fn13, exp1, exp2, e_base, de_base;
real Tap, dTap, dfn13, CEvd, CEclmb;
real dr3gamij_1, dr3gamij_3;
real e_ele, e_vdW, e_core, de_core, e_clb, de_clb;
rvec temp, ext_press;
real e_vdW_l, e_ele_l, e_core, de_core, e_clb, de_clb;
rvec temp, f_i_l, ext_press_l;
two_body_parameters *twbp;
int thread_id, warp_id, lane_id, offset;
real e_vdW_l, e_ele_l;
rvec f_l;
thread_id = blockIdx.x * blockDim.x + threadIdx.x;
warp_id = thread_id >> 5;
......@@ -301,12 +270,10 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
i = warp_id;
p_vdW1 = gp.l[28];
p_vdW1i = 1.0 / p_vdW1;
e_vdW = 0.0;
e_ele = 0.0;
rvec_MakeZero( ext_press );
e_vdW_l = 0.0;
e_ele_l = 0.0;
rvec_MakeZero( f_l );
rvec_MakeZero( f_i_l );
rvec_MakeZero( ext_press_l );
start_i = Start_Index( i, &far_nbr_list );
end_i = End_Index( i, &far_nbr_list );
......@@ -360,8 +327,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
exp2 = EXP( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) );
e_base = twbp->D * (exp1 - 2.0 * exp2);
e_vdW = self_coef * (e_base * Tap);
e_vdW_l += e_vdW;
e_vdW_l += self_coef * (e_base * Tap);
dfn13 = POW( r_ij, p_vdW1 - 1.0 )
* POW( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0 );
......@@ -374,8 +340,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
exp2 = EXP( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) );
e_base = twbp->D * (exp1 - 2.0 * exp2);
e_vdW = self_coef * (e_base * Tap);
e_vdW_l += e_vdW;
e_vdW_l += self_coef * (e_base * Tap);
de_base = (twbp->D * twbp->alpha / twbp->r_vdW) * (exp2 - exp1);
}
......@@ -384,8 +349,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
if ( gp.vdw_type == 2 || gp.vdw_type == 3 )
{
e_core = twbp->ecore * EXP( twbp->acore * (1.0 - (r_ij / twbp->rcore)) );
e_vdW += self_coef * (e_core * Tap);
e_vdW_l += (self_coef * (e_core * Tap));
e_vdW_l += self_coef * (e_core * Tap);
de_core = -(twbp->acore / twbp->rcore) * e_core;
}
......@@ -398,29 +362,17 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
CEvd = self_coef * ( (de_base + de_core) * Tap
+ (e_base + e_core) * dTap );
#if defined(DEBUG_FOCUS)
printf( "%6d%6d%24.12f%24.12f%24.12f%24.12f\n",
i + 1, j + 1,
e_base, de_base, e_core, de_core );
#endif
/* Coulomb Calculations */
dr3gamij_1 = r_ij * r_ij * r_ij
+ POW( twbp->gamma, -3.0 );
dr3gamij_3 = POW( dr3gamij_1, 1.0 / 3.0 );
e_clb = C_ELE * (my_atoms[i].q * my_atoms[j].q) / dr3gamij_3;
e_ele = self_coef * (e_clb * Tap);
e_ele_l += e_ele;
e_ele_l += self_coef * (e_clb * Tap);
de_clb = -C_ELE * (my_atoms[i].q * my_atoms[j].q)
* (r_ij * r_ij) / POW( dr3gamij_1, 4.0 / 3.0 );
CEclmb = self_coef * (de_clb * Tap + e_clb * dTap);
#if defined(DEBUG_FOCUS)
printf( "%6d%6d%24.12f%24.12f\n",
i + 1, j + 1, e_clb, de_clb );
#endif
if ( control->virial == 0 )
{
if ( i < j )
......@@ -431,7 +383,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
{
rvec_Scale( temp, (CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] );
}
rvec_Add( f_l, temp );
rvec_Add( f_i_l, temp );
rvec_Scale( temp, -1.0, temp );
atomic_rvecAdd( workspace.f[j], temp );
}
......@@ -450,42 +402,14 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
rvec_Scale( temp,
(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] );
}
rvec_Add( f_l, temp );
rvec_Add( f_i_l, temp );
rvec_Scale( temp, -1.0, temp );
atomic_rvecAdd( workspace.f[j], temp );
rvec_iMultiply( ext_press,
rvec_iMultiply( temp,
far_nbr_list.far_nbr_list.rel_box[pj], temp );
rvec_Add( data_ext_press[i], ext_press );
rvec_Add( ext_press_l, temp );
}
#if defined(TEST_ENERGY)
// fprintf( out_control->evdw,
// "%12.9f%12.9f%12.9f%12.9f%12.9f%12.9f%12.9f%12.9f\n",
// workspace.Tap[7], workspace.Tap[6], workspace.Tap[5],
// workspace.Tap[4], workspace.Tap[3], workspace.Tap[2],
// workspace.Tap[1], Tap );
//fprintf( out_control->evdw, "%6d%6d%24.15e%24.15e%24.15e\n",
fprintf( out_control->evdw, "%6d%6d%12.4f%12.4f%12.4f\n",
my_atoms[i].orig_id, my_atoms[j].orig_id,
r_ij, e_vdW, data->my_en.e_vdW );
//fprintf(out_control->ecou,"%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n",
fprintf( out_control->ecou, "%6d%6d%12.4f%12.4f%12.4f%12.4f%12.4f\n",
my_atoms[i].orig_id, my_atoms[j].orig_id,
r_ij, my_atoms[i].q, my_atoms[j].q,
e_ele, data->my_en.e_ele );
#endif
#if defined(TEST_FORCES)
rvec_ScaledAdd( workspace.f_vdw[i], -CEvd,
far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f_vdw[j], +CEvd,
far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f_ele[i], -CEclmb,
far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f_ele[j], +CEclmb,
far_nbr_list.far_nbr_list.dvec[pj] );
#endif
}
pj += warpSize;
......@@ -496,17 +420,26 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms,
{
e_vdW_l += __shfl_down_sync( FULL_MASK, e_vdW_l, offset );
e_ele_l += __shfl_down_sync( FULL_MASK, e_ele_l, offset );
f_l[0] += __shfl_down_sync( FULL_MASK, f_l[0], offset );
f_l[1] += __shfl_down_sync( FULL_MASK, f_l[1], offset );
f_l[2] += __shfl_down_sync( FULL_MASK, f_l[2], offset );
f_i_l[0] += __shfl_down_sync( FULL_MASK, f_i_l[0], offset );
f_i_l[1] += __shfl_down_sync( FULL_MASK, f_i_l[1], offset );
f_i_l[2] += __shfl_down_sync( FULL_MASK, f_i_l[2], offset );
}
/* first thread within a warp writes warp-level sum to global memory */
if ( lane_id == 0 )
{
data_e_vdW[i] = e_vdW_l;
data_e_ele[i] = e_ele_l;
atomic_rvecAdd( workspace.f[i], f_l );
atomic_rvecAdd( workspace.f[i], f_i_l );
#if !defined(CUDA_ACCUM_ATOMIC)
e_vdW_g[i] = e_vdW_l;
e_ele_g[i] = e_ele_l;
if ( control->virial == 1 )
rvec_Copy( ext_press_g[j], ext_press_l );
#else
atomicAdd( (double *) e_vdW_g, (double) e_vdW_l );
atomicAdd( (double *) e_ele_g, (double) e_ele_l );
if ( control->virial == 1 )
atomic_rvecAdd( *ext_press_g, ext_press_l );
#endif
}
}
......@@ -517,15 +450,15 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_tab( reax_atom *my_atoms,
storage workspace, reax_list far_nbr_list,
LR_lookup_table *t_LR, int n, int num_atom_types,
int step, int prev_steps, int energy_update_freq,
real *data_e_vdW, real *data_e_ele, rvec *data_ext_press )
real *e_vdW_g, real *e_ele_g, rvec *ext_press_g )
{
int i, j, pj, r, steps, update_freq, update_energies;
int type_i, type_j, tmin, tmax;
int start_i, end_i, orig_i, orig_j;
real r_ij, self_coef, base, dif;
real e_vdW, e_ele;
real e_vdW_l, e_ele_l;
real CEvd, CEclmb;
rvec temp, ext_press;
rvec temp, f_i_l, ext_press_l;
LR_lookup_table *t;
i = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -538,10 +471,10 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_tab( reax_atom *my_atoms,
steps = step - prev_steps;
update_freq = energy_update_freq;
update_energies = update_freq > 0 && steps % update_freq == 0;
e_ele = 0.0;
e_vdW = 0.0;
data_e_vdW[i] = 0.0;
data_e_ele[i] = 0.0;
e_ele_l = 0.0;
e_vdW_l = 0.0;
rvec_MakeZero( f_i_l );
rvec_MakeZero( ext_press_l );
type_i = my_atoms[i].type;
start_i = Start_Index( i, &far_nbr_list );
......@@ -575,16 +508,11 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_tab( reax_atom *my_atoms,
if ( update_energies )
{
e_vdW = ((t->vdW[r].d * dif + t->vdW[r].c) * dif + t->vdW[r].b)
* dif + t->vdW[r].a;
e_vdW *= self_coef;
e_ele = ((t->ele[r].d * dif + t->ele[r].c) * dif + t->ele[r].b)
* dif + t->ele[r].a;
e_ele *= self_coef * my_atoms[i].q * my_atoms[j].q;
e_vdW_l += self_coef * (((t->vdW[r].d * dif + t->vdW[r].c) * dif + t->vdW[r].b)
* dif + t->vdW[r].a);
data_e_vdW[i] += e_vdW;
data_e_ele[i] += e_ele;
e_ele_l += (((t->ele[r].d * dif + t->ele[r].c) * dif + t->ele[r].b)
* dif + t->ele[r].a) * self_coef * my_atoms[i].q * my_atoms[j].q;
}
CEvd = ((t->CEvd[r].d * dif + t->CEvd[r].c) * dif + t->CEvd[r].b)
......@@ -599,54 +527,56 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_tab( reax_atom *my_atoms,
{
if ( i < j )
{
rvec_ScaledAdd( workspace.f[i],
rvec_ScaledAdd( temp,
-(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] );
}
else
{
rvec_ScaledAdd( workspace.f[i],
rvec_ScaledAdd( temp,
(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] );
}
rvec_Add( f_i_l, temp );
rvec_Scale( temp, -1.0, temp );
atomic_rvecAdd( workspace.f[j], temp );
}
/* NPT, iNPT or sNPT */
else
{
/* for pressure coupling, terms not related to bond order derivatives
are added directly into pressure vector/tensor */
rvec_Scale( temp,
(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f[i], -1.0, temp );
rvec_Add( workspace.f[j], temp );
if ( i < j )
{
rvec_Scale( temp,
-(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] );
}
else
{
rvec_Scale( temp,
(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] );
}
rvec_Add( f_i_l, temp );
rvec_ScaledAdd( temp, -1.0, temp );
atomic_rvecAdd( workspace.f[j], temp );
rvec_iMultiply( ext_press, far_nbr_list.far_nbr_list.rel_box[pj], temp );
rvec_Add( data_ext_press[i], ext_press );
rvec_iMultiply( temp, far_nbr_list.far_nbr_list.rel_box[pj], temp );
rvec_Add( ext_press_l, temp );
}
#if defined(TEST_ENERGY)
//fprintf( out_control->evdw, "%6d%6d%24.15e%24.15e%24.15e\n",
fprintf( out_control->evdw, "%6d%6d%12.4f%12.4f%12.4f\n",
my_atoms[i].orig_id, my_atoms[j].orig_id,
r_ij, e_vdW, data->my_en.e_vdW );
//fprintf(out_control->ecou,"%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n",
fprintf( out_control->ecou, "%6d%6d%12.4f%12.4f%12.4f%12.4f%12.4f\n",
my_atoms[i].orig_id, my_atoms[j].orig_id,
r_ij, my_atoms[i].q, my_atoms[j].q,
e_ele, data->my_en.e_ele );
#endif
#if defined(TEST_FORCES)
rvec_ScaledAdd( workspace.f_vdw[i], -CEvd,
far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f_vdw[j], +CEvd,
far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f_ele[i], -CEclmb,
far_nbr_list.far_nbr_list.dvec[pj] );
rvec_ScaledAdd( workspace.f_ele[j], +CEclmb,
far_nbr_list.far_nbr_list.dvec[pj] );
#endif
}
}
atomic_rvecAdd( workspace.f[i], f_i_l );
#if !defined(CUDA_ACCUM_ATOMIC)
__syncthreads( );
e_vdW_g[i] = e_vdW_l;
e_ele_g[i] = e_ele_l;
if ( control->virial == 1 )
rvec_Copy( ext_press_g[j], ext_press_l );
#else
atomicAdd( (double *) e_vdW_g, (double) e_vdW_l );
atomicAdd( (double *) e_ele_g, (double) e_ele_l );
if ( control->virial == 1 )
atomic_rvecAdd( *ext_press_g, ext_press_l );
#endif
}
......@@ -675,21 +605,34 @@ static void Cuda_Compute_Polarization_Energy( reax_system *system, storage *work
}
void Cuda_NonBonded_Energy( reax_system *system, control_params *control,
storage *workspace, simulation_data *data, reax_list **lists,
void Cuda_Compute_NonBonded_Forces( reax_system *system, control_params *control,
simulation_data *data, storage *workspace, reax_list **lists,
output_controls *out_control )
{
int update_energy, blocks;
#if !defined(CUDA_ACCUM_ATOMIC)
real *spad;
rvec *spad_rvec;
#endif
update_energy = (out_control->energy_update_freq > 0
&& data->step % out_control->energy_update_freq == 0) ? TRUE : FALSE;
#if !defined(CUDA_ACCUM_ATOMIC)
cuda_check_malloc( &workspace->scratch, &workspace->scratch_size,
(sizeof(real) * 2 + sizeof(rvec)) * system->n + sizeof(rvec) * control->blocks,
"Cuda_NonBonded_Energy::workspace->scratch" );
"Cuda_Compute_NonBonded_Forces::workspace->scratch" );
spad = (real *) workspace->scratch;
#endif
#if defined(CUDA_ACCUM_ATOMIC)
cuda_memset( &((simulation_data *)data->d_simulation_data)->my_en.e_vdW,
0, sizeof(real), "Cuda_Compute_Bonded_Forces::e_vdW" );
cuda_memset( &((simulation_data *)data->d_simulation_data)->my_en.e_ele,
0, sizeof(real), "Cuda_Compute_Bonded_Forces::e_ele" );
cuda_memset( &((simulation_data *)data->d_simulation_data)->my_ext_press,
0, sizeof(rvec), "Cuda_Compute_Bonded_Forces::my_ext_press" );
#endif
blocks = system->n * 32 / DEF_BLOCK_SIZE
+ (system->n * 32 % DEF_BLOCK_SIZE == 0 ? 0 : 1);
......@@ -701,14 +644,28 @@ void Cuda_NonBonded_Energy( reax_system *system, control_params *control,
// system->reax_param.d_gp, (control_params *) control->d_control_params,
// *(workspace->d_workspace), *(lists[FAR_NBRS]),
// system->n, system->reax_param.num_atom_types,
// spad, &spad[system->n], (rvec *) (&spad[2 * system->n]) );
//#if !defined(CUDA_ACCUM_ATOMIC)
// spad, &spad[system->n], (rvec *) (&spad[2 * system->n])
//#else
// &((simulation_data *)data->d_simulation_data)->my_en.e_vdW,
// &((simulation_data *)data->d_simulation_data)->my_en.e_ele,
// &((simulation_data *)data->d_simulation_data)->my_ext_press
//#endif
// );
k_vdW_coulomb_energy_opt <<< blocks, DEF_BLOCK_SIZE >>>
( system->d_my_atoms, system->reax_param.d_tbp,
system->reax_param.d_gp, (control_params *) control->d_control_params,
*(workspace->d_workspace), *(lists[FAR_NBRS]),
system->n, system->reax_param.num_atom_types,
spad, &spad[system->n], (rvec *) (&spad[2 * system->n]) );
#if !defined(CUDA_ACCUM_ATOMIC)
spad, &spad[system->n], (rvec *) (&spad[2 * system->n])
#else
&((simulation_data *)data->d_simulation_data)->my_en.e_vdW,
&((simulation_data *)data->d_simulation_data)->my_en.e_ele,
&((simulation_data *)data->d_simulation_data)->my_ext_press
#endif
);
cudaCheckError( );
}
else
......@@ -721,10 +678,18 @@ void Cuda_NonBonded_Energy( reax_system *system, control_params *control,
system->reax_param.num_atom_types,
data->step, data->prev_steps,
out_control->energy_update_freq,
spad, &spad[system->n], (rvec *) (&spad[2 * system->n]));
#if !defined(CUDA_ACCUM_ATOMIC)
spad, &spad[system->n], (rvec *) (&spad[2 * system->n])
#else
&((simulation_data *)data->d_simulation_data)->my_en.e_vdW,
&((simulation_data *)data->d_simulation_data)->my_en.e_ele,
&((simulation_data *)data->d_simulation_data)->my_ext_press
#endif
);
cudaCheckError( );
}
#if !defined(CUDA_ACCUM_ATOMIC)
if ( update_energy == TRUE )
{
/* reduction for vdw */
......@@ -755,6 +720,7 @@ void Cuda_NonBonded_Energy( reax_system *system, control_params *control,
control->blocks );
cudaCheckError( );
}
#endif
if ( update_energy == TRUE )
{
......
......@@ -25,8 +25,8 @@
#include "../reax_types.h"
void Cuda_NonBonded_Energy( reax_system *, control_params *,
storage *, simulation_data *, reax_list **,
void Cuda_Compute_NonBonded_Forces( reax_system *, control_params *,
simulation_data *, storage *, reax_list **,
output_controls * );
......
......@@ -170,9 +170,8 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
real CEtors5, CEtors6, CEtors7, CEtors8, CEtors9;
real Cconj, CEconj1, CEconj2, CEconj3;
real CEconj4, CEconj5, CEconj6, e_tor_l, e_con_l;
rvec dvec_li, temp, ext_press_l;
rvec dvec_li, temp, f_j_l, ext_press_l;
ivec rel_box_jl;
// rtensor total_rtensor, temp_rtensor;
four_body_header *fbh;
four_body_parameters *fbp;
bond_data *pbond_ij, *pbond_jk, *pbond_kl;
......@@ -193,6 +192,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
p_cot2 = gp.l[27];
e_tor_l = 0.0;
e_con_l = 0.0;
rvec_MakeZero( f_j_l );
rvec_MakeZero( ext_press_l );
#if defined(DEBUG_FOCUS)
num_frb_intrs = 0;
......@@ -431,13 +431,13 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
/* dcos_theta_ijk */
atomic_rvecScaledAdd( pbond_ij->ta_f,
CEtors7 + CEconj4, p_ijk->dcos_dk );
rvec_ScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors7 + CEconj4, p_ijk->dcos_dj );
atomic_rvecScaledAdd( pbond_jk->ta_f,
CEtors7 + CEconj4, p_ijk->dcos_di );
/* dcos_theta_jkl */
rvec_ScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors8 + CEconj5, p_jkl->dcos_di );
atomic_rvecScaledAdd( pbond_jk->ta_f,
CEtors8 + CEconj5, p_jkl->dcos_dj );
......@@ -447,7 +447,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
/* dcos_omega */
atomic_rvecScaledAdd( pbond_ij->ta_f,
CEtors9 + CEconj6, dcos_omega_di );
rvec_ScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors9 + CEconj6, dcos_omega_dj );
atomic_rvecScaledAdd( pbond_jk->ta_f,
CEtors9 + CEconj6, dcos_omega_dk );
......@@ -457,13 +457,13 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
/* dcos_theta_ijk */
atomic_rvecScaledAdd( workspace.f[i],
CEtors7 + CEconj4, p_ijk->dcos_dk );
atomic_rvecScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors7 + CEconj4, p_ijk->dcos_dj );
atomic_rvecScaledAdd( workspace.f[k],
CEtors7 + CEconj4, p_ijk->dcos_di );
/* dcos_theta_jkl */
atomic_rvecScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors8 + CEconj5, p_jkl->dcos_di );
atomic_rvecScaledAdd( workspace.f[k],
CEtors8 + CEconj5, p_jkl->dcos_dj );
......@@ -473,7 +473,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
/* dcos_omega */
atomic_rvecScaledAdd( workspace.f[i],
CEtors9 + CEconj6, dcos_omega_di );
atomic_rvecScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors9 + CEconj6, dcos_omega_dj );
atomic_rvecScaledAdd( workspace.f[k],
CEtors9 + CEconj6, dcos_omega_dk );
......@@ -492,7 +492,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
rvec_iMultiply( temp, pbond_ij->rel_box, temp );
rvec_Add( ext_press_l, temp );
rvec_ScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors7 + CEconj4, p_ijk->dcos_dj );
rvec_Scale( temp, CEtors7 + CEconj4, p_ijk->dcos_di );
......@@ -501,7 +501,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
rvec_Add( ext_press_l, temp );
/* dcos_theta_jkl */
rvec_ScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors8 + CEconj5, p_jkl->dcos_di );
rvec_Scale( temp, CEtors8 + CEconj5, p_jkl->dcos_dj );
......@@ -520,7 +520,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
rvec_iMultiply( temp, pbond_ij->rel_box, temp );
rvec_Add( ext_press_l, temp );
rvec_ScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors9 + CEconj6, dcos_omega_dj );
rvec_Scale( temp, CEtors9 + CEconj6, dcos_omega_dk );
......@@ -541,7 +541,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
rvec_iMultiply( temp, pbond_ij->rel_box, temp );
rvec_Add( ext_press_l, temp );
atomic_rvecScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors7 + CEconj4, p_ijk->dcos_dj );
rvec_Scale( temp, CEtors7 + CEconj4, p_ijk->dcos_di );
......@@ -550,7 +550,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
rvec_Add( ext_press_l, temp );
/* dcos_theta_jkl */
atomic_rvecScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors8 + CEconj5, p_jkl->dcos_di );
rvec_Scale( temp, CEtors8 + CEconj5, p_jkl->dcos_dj );
......@@ -569,7 +569,7 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
rvec_iMultiply( temp, pbond_ij->rel_box, temp );
rvec_Add( ext_press_l, temp );
atomic_rvecScaledAdd( workspace.f[j],
rvec_ScaledAdd( f_j_l,
CEtors9 + CEconj6, dcos_omega_dj );
rvec_Scale( temp, CEtors9 + CEconj6, dcos_omega_dk );
......@@ -593,10 +593,12 @@ CUDA_GLOBAL void Cuda_Torsion_Angles_Part1( reax_atom *my_atoms, global_paramete
// } // j loop
#if !defined(CUDA_ACCUM_ATOMIC)
rvec_Add( workspace.f[j], f_j_l );
e_tor_g[j] = e_tor_l;
e_con_g[j] = e_con_l;
rvec_Copy( e_ext_press_g[j], e_ext_press_l );
#else
atomic_rvecAdd( workspace.f[j], f_j_l );
atomicAdd( (double *) e_tor_g, (double) e_tor_l );
atomicAdd( (double *) e_con_g, (double) e_con_l );
atomic_rvecAdd( *ext_press_g, ext_press_l );
......
......@@ -59,7 +59,7 @@ CUDA_GLOBAL void Cuda_Valence_Angles_Part1( reax_atom *my_atoms,
real f7_ij, f7_jk, f8_Dj, f9_Dj;
real Ctheta_0, theta_0, theta_00, theta, cos_theta, sin_theta;
real BOA_ij, BOA_jk;
rvec rvec_temp, ext_press_l;
rvec rvec_temp, f_j_l, ext_press_l;
three_body_header *thbh;
three_body_parameters *thbp;
three_body_interaction_data *p_ijk;
......@@ -88,6 +88,7 @@ CUDA_GLOBAL void Cuda_Valence_Angles_Part1( reax_atom *my_atoms,
e_ang_l = 0.0;
e_coa_l = 0.0;
e_pen_l = 0.0;
rvec_MakeZero( f_j_l );
rvec_MakeZero( ext_press_l );
type_j = my_atoms[j].type;
......@@ -406,11 +407,11 @@ CUDA_GLOBAL void Cuda_Valence_Angles_Part1( reax_atom *my_atoms,
{
#if !defined(CUDA_ACCUM_ATOMIC)
rvec_ScaledAdd( pbond_ij->va_f, CEval8, p_ijk->dcos_di );
rvec_ScaledAdd( workspace.f[j], CEval8, p_ijk->dcos_dj );
rvec_ScaledAdd( f_j_l, CEval8, p_ijk->dcos_dj );
rvec_ScaledAdd( pbond_jk->va_f, CEval8, p_ijk->dcos_dk );
#else
atomic_rvecScaledAdd( workspace.f[i], CEval8, p_ijk->dcos_di );
atomic_rvecScaledAdd( workspace.f[j], CEval8, p_ijk->dcos_dj );
rvec_ScaledAdd( f_j_l, CEval8, p_ijk->dcos_dj );
atomic_rvecScaledAdd( workspace.f[k], CEval8, p_ijk->dcos_dk );
#endif
}
......@@ -424,7 +425,7 @@ CUDA_GLOBAL void Cuda_Valence_Angles_Part1( reax_atom *my_atoms,
rvec_iMultiply( rvec_temp, pbond_ij->rel_box, rvec_temp );
rvec_Add( ext_press_l, rvec_temp );
rvec_ScaledAdd( workspace.f[j], CEval8, p_ijk->dcos_dj );
rvec_ScaledAdd( f_j_l, CEval8, p_ijk->dcos_dj );
rvec_Scale( rvec_temp, CEval8, p_ijk->dcos_dk );
rvec_Add( pbond_jk->va_f, rvec_temp );
......@@ -438,7 +439,7 @@ CUDA_GLOBAL void Cuda_Valence_Angles_Part1( reax_atom *my_atoms,
rvec_iMultiply( rvec_temp, pbond_ij->rel_box, rvec_temp );
rvec_Add( ext_press_l, rvec_temp );
rvec_ScaledAdd( workspace.f[j], CEval8, p_ijk->dcos_dj );
rvec_ScaledAdd( f_j_l, CEval8, p_ijk->dcos_dj );
rvec_Scale( rvec_temp, CEval8, p_ijk->dcos_dk );
atomic_rvecAdd( workspace.f[k], rvec_temp );
......@@ -455,11 +456,13 @@ CUDA_GLOBAL void Cuda_Valence_Angles_Part1( reax_atom *my_atoms,
}
#if !defined(CUDA_ACCUM_ATOMIC)
rvec_Add( workspace.f[j], f_j_l );
e_ang_g[j] = e_ang_l;
e_coa_g[j] = e_coa_l;
e_pen_g[j] = e_pen_l;
rvec_Copy( ext_press_g[j], ext_press_l );
#else
atomic_rvecAdd( workspace.f[j], f_j_l );
atomicAdd( (double *) e_ang_g, (double) e_ang_l );
atomicAdd( (double *) e_coa_g, (double) e_coa_l );
atomicAdd( (double *) e_pen_g, (double) e_pen_l );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment