From add5b60ada27d874c89f34cad6a901dcbed8e8bb Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@msu.edu> Date: Fri, 9 Apr 2021 13:21:39 -0400 Subject: [PATCH] PG-PuReMD: split bond and hbond initialization kernels for future optimizations. Cleanup CUDA code related to far neighbor list being in full format (remove unnecessary conditionals and arithmetic). --- PG-PuReMD/src/allocate.c | 45 +- PG-PuReMD/src/cuda/cuda_allocate.cu | 285 +++--- PG-PuReMD/src/cuda/cuda_allocate.h | 4 +- PG-PuReMD/src/cuda/cuda_bond_orders.cu | 6 +- PG-PuReMD/src/cuda/cuda_bond_orders.h | 225 ++--- PG-PuReMD/src/cuda/cuda_forces.cu | 892 +++++++++++------- PG-PuReMD/src/cuda/cuda_init_md.cu | 4 +- PG-PuReMD/src/cuda/cuda_integrate.cu | 16 +- PG-PuReMD/src/cuda/cuda_neighbors.cu | 138 +-- PG-PuReMD/src/cuda/cuda_nonbonded.cu | 111 +-- PG-PuReMD/src/cuda/cuda_reset_tools.cu | 26 +- PG-PuReMD/src/cuda/cuda_valence_angles.cu | 92 +- PG-PuReMD/src/forces.c | 1027 ++++++++++++--------- PG-PuReMD/src/forces.h | 2 +- PG-PuReMD/src/init_md.c | 5 +- PG-PuReMD/src/reset_tools.c | 52 +- PG-PuReMD/src/reset_tools.h | 3 - 17 files changed, 1500 insertions(+), 1433 deletions(-) diff --git a/PG-PuReMD/src/allocate.c b/PG-PuReMD/src/allocate.c index 62704ed3..9a02f538 100644 --- a/PG-PuReMD/src/allocate.c +++ b/PG-PuReMD/src/allocate.c @@ -605,14 +605,11 @@ static void Reallocate_Matrix( sparse_matrix * const H, int n, int n_max, int m } -void Reallocate_HBonds_List( reax_system * const system, reax_list * const hbond_list ) +static void Reallocate_List( reax_list * const list, int n, int max_intrs, + int type, int format ) { - int format; - - format = hbond_list->format; - - Delete_List( hbond_list ); - Make_List( system->total_cap, system->total_hbonds, TYP_HBOND, format, hbond_list ); + Delete_List( list ); + Make_List( n, max_intrs, type, format, list ); } @@ -864,7 +861,7 @@ void Reallocate_Part2( reax_system * const system, control_params * const contro mpi_datatypes * const mpi_data ) { int nflag, Nflag; - int renbr, format; + int renbr; reallocate_data * const realloc = &workspace->realloc; sparse_matrix * const H = &workspace->H; @@ -930,36 +927,36 @@ void Reallocate_Part2( reax_system * const system, control_params * const contro #else Reallocate_Matrix( H, system->n, system->local_cap, system->total_cm_entries ); #endif - Init_Matrix_Row_Indices( H, system->max_cm_entries ); - realloc->cm = FALSE; - } - /* hydrogen bonds list */ - if ( control->hbond_cut > 0.0 - && (Nflag == TRUE || realloc->hbonds == TRUE) ) - { - Reallocate_HBonds_List( system, lists[HBONDS] ); - Init_List_Indices( lists[HBONDS], system->max_hbonds ); - realloc->hbonds = FALSE; + realloc->cm = FALSE; } /* bonds list */ if ( Nflag == TRUE || realloc->bonds == TRUE ) { - Reallocate_Bonds_List( system, lists[BONDS] ); - Init_List_Indices( lists[BONDS], system->max_bonds ); + Reallocate_List( lists[BONDS], system->total_cap, system->total_bonds, + TYP_BOND, lists[BONDS]->format ); + realloc->bonds = FALSE; realloc->thbody = TRUE; } + /* hydrogen bonds list */ + if ( control->hbond_cut > 0.0 + && (Nflag == TRUE || realloc->hbonds == TRUE) ) + { + Reallocate_List( lists[HBONDS], system->total_cap, system->total_hbonds, + TYP_HBOND, lists[HBONDS]->format ); + + realloc->hbonds = FALSE; + } + /* 3-body list */ if ( realloc->thbody == TRUE ) { - format = lists[THREE_BODIES]->format; + Reallocate_List( lists[THREE_BODIES], system->total_bonds, system->total_thbodies, + TYP_THREE_BODY, lists[THREE_BODIES]->format ); - Delete_List( lists[THREE_BODIES] ); - Make_List( system->total_bonds, system->total_thbodies, - TYP_THREE_BODY, format, lists[THREE_BODIES] ); realloc->thbody = FALSE; } } diff --git a/PG-PuReMD/src/cuda/cuda_allocate.cu b/PG-PuReMD/src/cuda/cuda_allocate.cu index a0f9e714..50f256e7 100644 --- a/PG-PuReMD/src/cuda/cuda_allocate.cu +++ b/PG-PuReMD/src/cuda/cuda_allocate.cu @@ -30,6 +30,120 @@ CUDA_GLOBAL void k_init_nbrs( ivec *nbrs, int N ) } +static void Cuda_Reallocate_List( reax_list *list, size_t n, size_t max_intrs, int type ) +{ + Cuda_Delete_List( list ); + Cuda_Make_List( n, max_intrs, type, list ); +} + + +static void Cuda_Reallocate_System_Part1( reax_system *system, storage *workspace, + int local_cap_old ) +{ + int *temp; + + cuda_check_malloc( &workspace->scratch, &workspace->scratch_size, + sizeof(int) * local_cap_old, + "Cuda_Reallocate_System_Part1::workspace->scratch" ); + temp = (int *) workspace->scratch; + + copy_device( temp, system->d_cm_entries, sizeof(int) * local_cap_old, + "Cuda_Reallocate_System_Part1::temp" ); + cuda_free( system->d_cm_entries, "Cuda_Reallocate_System_Part1::d_cm_entries" ); + cuda_malloc( (void **) &system->d_cm_entries, + sizeof(int) * system->local_cap, TRUE, "Cuda_Reallocate_System_Part1::d_cm_entries" ); + copy_device( system->d_cm_entries, temp, sizeof(int) * local_cap_old, + "Cuda_Reallocate_System_Part1::temp" ); + + copy_device( temp, system->d_max_cm_entries, sizeof(int) * local_cap_old, + "Cuda_Reallocate_System_Part1::temp" ); + cuda_free( system->d_max_cm_entries, "Cuda_Reallocate_System_Part1::d_max_cm_entries" ); + cuda_malloc( (void **) &system->d_max_cm_entries, + sizeof(int) * system->local_cap, TRUE, "Cuda_Reallocate_System_Part1::d_max_cm_entries" ); + copy_device( system->d_max_cm_entries, temp, sizeof(int) * local_cap_old, + "Cuda_Reallocate_System_Part1::temp" ); +} + + +static void Cuda_Reallocate_System_Part2( reax_system *system, storage *workspace, + int total_cap_old ) +{ + int *temp; + reax_atom *temp_atom; + + cuda_check_malloc( &workspace->scratch, &workspace->scratch_size, + MAX( sizeof(reax_atom), sizeof(int) ) * total_cap_old, + "Cuda_Reallocate_System_Part2::workspace->scratch" ); + temp = (int *) workspace->scratch; + temp_atom = (reax_atom *) workspace->scratch; + + /* free the existing storage for atoms, leave other info allocated */ + copy_device( temp_atom, system->d_my_atoms, sizeof(reax_atom) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp_atom" ); + cuda_free( system->d_my_atoms, "system::d_my_atoms" ); + cuda_malloc( (void **) &system->d_my_atoms, + sizeof(reax_atom) * system->total_cap, TRUE, + "Cuda_Reallocate_System_Part2::d_my_atoms" ); + copy_device( system->d_my_atoms, temp_atom, sizeof(reax_atom) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp_atom" ); + + /* list management */ + copy_device( temp, system->d_far_nbrs, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + cuda_free( system->d_far_nbrs, "Cuda_Reallocate_System_Part2::d_far_nbrs" ); + cuda_malloc( (void **) &system->d_far_nbrs, + sizeof(int) * system->total_cap, TRUE, + "Cuda_Reallocate_System_Part2::d_far_nbrs" ); + copy_device( system->d_far_nbrs, temp, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + + copy_device( temp, system->d_max_far_nbrs, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + cuda_free( system->d_max_far_nbrs, "Cuda_Reallocate_System_Part2::d_max_far_nbrs" ); + cuda_malloc( (void **) &system->d_max_far_nbrs, + sizeof(int) * system->total_cap, TRUE, + "Cuda_Reallocate_System_Part2::d_max_far_nbrs" ); + copy_device( system->d_max_far_nbrs, temp, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + + copy_device( temp, system->d_bonds, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + cuda_free( system->d_bonds, "Cuda_Reallocate_System_Part2::d_bonds" ); + cuda_malloc( (void **) &system->d_bonds, + sizeof(int) * system->total_cap, TRUE, + "Cuda_Reallocate_System_Part2::d_bonds" ); + copy_device( system->d_bonds, temp, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + + copy_device( temp, system->d_max_bonds, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + cuda_free( system->d_max_bonds, "Cuda_Reallocate_System_Part2::d_max_bonds" ); + cuda_malloc( (void **) &system->d_max_bonds, + sizeof(int) * system->total_cap, TRUE, + "Cuda_Reallocate_System_Part2::d_max_bonds" ); + copy_device( system->d_max_bonds, temp, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + + copy_device( temp, system->d_hbonds, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + cuda_free( system->d_hbonds, "system::d_hbonds" ); + cuda_malloc( (void **) &system->d_hbonds, + sizeof(int) * system->total_cap, TRUE, + "Cuda_Reallocate_System_Part2::d_hbonds" ); + copy_device( system->d_hbonds, temp, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + + copy_device( temp, system->d_max_hbonds, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); + cuda_free( system->d_max_hbonds, "system::d_max_hbonds" ); + cuda_malloc( (void **) &system->d_max_hbonds, + sizeof(int) * system->total_cap, TRUE, + "Cuda_Reallocate_System_Part2::d_max_hbonds" ); + copy_device( system->d_max_hbonds, temp, sizeof(int) * total_cap_old, + "Cuda_Reallocate_System_Part2::temp" ); +} + + void Cuda_Allocate_Control( control_params *control ) { cuda_malloc( (void **)&control->d_control_params, @@ -251,113 +365,6 @@ void Cuda_Allocate_System( reax_system *system ) } -static void Cuda_Reallocate_System_Part1( reax_system *system, storage *workspace, - int local_cap_old ) -{ - int *temp; - - cuda_check_malloc( &workspace->scratch, &workspace->scratch_size, - sizeof(int) * local_cap_old, - "Cuda_Reallocate_System_Part1::workspace->scratch" ); - temp = (int *) workspace->scratch; - - copy_device( temp, system->d_cm_entries, sizeof(int) * local_cap_old, - "Cuda_Reallocate_System_Part1::temp" ); - cuda_free( system->d_cm_entries, "Cuda_Reallocate_System_Part1::d_cm_entries" ); - cuda_malloc( (void **) &system->d_cm_entries, - sizeof(int) * system->local_cap, TRUE, "Cuda_Reallocate_System_Part1::d_cm_entries" ); - copy_device( system->d_cm_entries, temp, sizeof(int) * local_cap_old, - "Cuda_Reallocate_System_Part1::temp" ); - - copy_device( temp, system->d_max_cm_entries, sizeof(int) * local_cap_old, - "Cuda_Reallocate_System_Part1::temp" ); - cuda_free( system->d_max_cm_entries, "Cuda_Reallocate_System_Part1::d_max_cm_entries" ); - cuda_malloc( (void **) &system->d_max_cm_entries, - sizeof(int) * system->local_cap, TRUE, "Cuda_Reallocate_System_Part1::d_max_cm_entries" ); - copy_device( system->d_max_cm_entries, temp, sizeof(int) * local_cap_old, - "Cuda_Reallocate_System_Part1::temp" ); -} - - -static void Cuda_Reallocate_System_Part2( reax_system *system, storage *workspace, - int total_cap_old ) -{ - int *temp; - reax_atom *temp_atom; - - cuda_check_malloc( &workspace->scratch, &workspace->scratch_size, - MAX( sizeof(reax_atom), sizeof(int) ) * total_cap_old, - "Cuda_Reallocate_System_Part2::workspace->scratch" ); - temp = (int *) workspace->scratch; - temp_atom = (reax_atom *) workspace->scratch; - - /* free the existing storage for atoms, leave other info allocated */ - copy_device( temp_atom, system->d_my_atoms, sizeof(reax_atom) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp_atom" ); - cuda_free( system->d_my_atoms, "system::d_my_atoms" ); - cuda_malloc( (void **) &system->d_my_atoms, - sizeof(reax_atom) * system->total_cap, TRUE, - "Cuda_Reallocate_System_Part2::d_my_atoms" ); - copy_device( system->d_my_atoms, temp_atom, sizeof(reax_atom) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp_atom" ); - - /* list management */ - copy_device( temp, system->d_far_nbrs, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - cuda_free( system->d_far_nbrs, "Cuda_Reallocate_System_Part2::d_far_nbrs" ); - cuda_malloc( (void **) &system->d_far_nbrs, - sizeof(int) * system->total_cap, TRUE, - "Cuda_Reallocate_System_Part2::d_far_nbrs" ); - copy_device( system->d_far_nbrs, temp, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - - copy_device( temp, system->d_max_far_nbrs, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - cuda_free( system->d_max_far_nbrs, "Cuda_Reallocate_System_Part2::d_max_far_nbrs" ); - cuda_malloc( (void **) &system->d_max_far_nbrs, - sizeof(int) * system->total_cap, TRUE, - "Cuda_Reallocate_System_Part2::d_max_far_nbrs" ); - copy_device( system->d_max_far_nbrs, temp, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - - copy_device( temp, system->d_bonds, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - cuda_free( system->d_bonds, "Cuda_Reallocate_System_Part2::d_bonds" ); - cuda_malloc( (void **) &system->d_bonds, - sizeof(int) * system->total_cap, TRUE, - "Cuda_Reallocate_System_Part2::d_bonds" ); - copy_device( system->d_bonds, temp, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - - copy_device( temp, system->d_max_bonds, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - cuda_free( system->d_max_bonds, "Cuda_Reallocate_System_Part2::d_max_bonds" ); - cuda_malloc( (void **) &system->d_max_bonds, - sizeof(int) * system->total_cap, TRUE, - "Cuda_Reallocate_System_Part2::d_max_bonds" ); - copy_device( system->d_max_bonds, temp, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - - copy_device( temp, system->d_hbonds, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - cuda_free( system->d_hbonds, "system::d_hbonds" ); - cuda_malloc( (void **) &system->d_hbonds, - sizeof(int) * system->total_cap, TRUE, - "Cuda_Reallocate_System_Part2::d_hbonds" ); - copy_device( system->d_hbonds, temp, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - - copy_device( temp, system->d_max_hbonds, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); - cuda_free( system->d_max_hbonds, "system::d_max_hbonds" ); - cuda_malloc( (void **) &system->d_max_hbonds, - sizeof(int) * system->total_cap, TRUE, - "Cuda_Reallocate_System_Part2::d_max_hbonds" ); - copy_device( system->d_max_hbonds, temp, sizeof(int) * total_cap_old, - "Cuda_Reallocate_System_Part2::temp" ); -} - - void Cuda_Allocate_Simulation_Data( simulation_data *data ) { cuda_malloc( (void **) &data->d_simulation_data, @@ -729,35 +736,6 @@ void Cuda_Deallocate_Matrix( sparse_matrix *H ) } -void Cuda_Reallocate_Neighbor_List( reax_list *far_nbr_list, size_t n, size_t max_intrs ) -{ - Cuda_Delete_List( far_nbr_list ); - Cuda_Make_List( n, max_intrs, TYP_FAR_NEIGHBOR, far_nbr_list ); -} - - -void Cuda_Reallocate_HBonds_List( reax_list *hbond_list, size_t n, size_t max_intrs ) -{ - Cuda_Delete_List( hbond_list ); - Cuda_Make_List( n, max_intrs, TYP_HBOND, hbond_list ); -} - - -void Cuda_Reallocate_Bonds_List( reax_list *bond_list, size_t n, size_t max_intrs ) -{ - Cuda_Delete_List( bond_list ); - Cuda_Make_List( n, max_intrs, TYP_BOND, bond_list ); -} - - -void Cuda_Reallocate_Thbodies_List( reax_list *thbodies, size_t n, size_t max_intrs ) -{ - Cuda_Delete_List( thbodies ); - Cuda_Make_List( n, max_intrs, TYP_THREE_BODY, thbodies ); - -} - - void Cuda_Reallocate_Part1( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, mpi_datatypes *mpi_data ) @@ -851,8 +829,8 @@ void Cuda_Reallocate_Part2( reax_system *system, control_params *control, /* far neighbors */ if ( renbr == TRUE && (Nflag == TRUE || realloc->far_nbrs == TRUE) ) { - Cuda_Reallocate_Neighbor_List( lists[FAR_NBRS], - system->total_cap, system->total_far_nbrs ); + Cuda_Reallocate_List( lists[FAR_NBRS], system->total_cap, + system->total_far_nbrs, TYP_FAR_NEIGHBOR ); Cuda_Init_Neighbor_Indices( system, lists[FAR_NBRS] ); realloc->far_nbrs = FALSE; } @@ -865,34 +843,35 @@ void Cuda_Reallocate_Part2( reax_system *system, control_params *control, Cuda_Deallocate_Matrix( H ); Cuda_Allocate_Matrix( H, system->n, system->local_cap, system->total_cm_entries, format ); - Cuda_Init_Sparse_Matrix_Indices( system, H ); + realloc->cm = FALSE; } + /* bonds list */ + if ( Nflag == TRUE || realloc->bonds == TRUE ) + { + Cuda_Reallocate_List( lists[BONDS], system->total_cap, + system->total_bonds, TYP_BOND ); + + realloc->bonds = FALSE; + } + /* hydrogen bonds list */ if ( control->hbond_cut > 0.0 && system->numH > 0 && (Nflag == TRUE || realloc->hbonds == TRUE) ) { - Cuda_Reallocate_HBonds_List( lists[HBONDS], - system->total_cap, system->total_hbonds ); - Cuda_Init_HBond_Indices( system, workspace, lists[HBONDS] ); - realloc->hbonds = FALSE; - } + Cuda_Reallocate_List( lists[HBONDS], system->total_cap, + system->total_hbonds, TYP_HBOND ); - /* bonds list */ - if ( Nflag == TRUE || realloc->bonds == TRUE ) - { - Cuda_Reallocate_Bonds_List( lists[BONDS], - system->total_cap, system->total_bonds ); - Cuda_Init_Bond_Indices( system, lists[BONDS] ); - realloc->bonds = FALSE; + realloc->hbonds = FALSE; } /* 3-body list */ if ( Nflag == TRUE || realloc->thbody == TRUE ) { - Cuda_Reallocate_Thbodies_List( lists[THREE_BODIES], - system->total_thbodies_indices, system->total_thbodies ); + Cuda_Reallocate_List( lists[THREE_BODIES], system->total_thbodies_indices, + system->total_thbodies, TYP_THREE_BODY ); + realloc->thbody = FALSE; } } diff --git a/PG-PuReMD/src/cuda/cuda_allocate.h b/PG-PuReMD/src/cuda/cuda_allocate.h index 82329aca..83e6a9a6 100644 --- a/PG-PuReMD/src/cuda/cuda_allocate.h +++ b/PG-PuReMD/src/cuda/cuda_allocate.h @@ -10,14 +10,14 @@ void Cuda_Allocate_Grid( reax_system * ); void Cuda_Allocate_Simulation_Data( simulation_data * ); +void Cuda_Allocate_Control( control_params * ); + void Cuda_Allocate_Workspace_Part1( reax_system *, control_params *, storage *, int ); void Cuda_Allocate_Workspace_Part2( reax_system *, control_params *, storage *, int ); void Cuda_Allocate_Matrix( sparse_matrix * const, int, int, int, int ); -void Cuda_Allocate_Control( control_params * ); - void Cuda_Deallocate_Grid_Cell_Atoms( reax_system * ); void Cuda_Allocate_Grid_Cell_Atoms( reax_system *, int ); diff --git a/PG-PuReMD/src/cuda/cuda_bond_orders.cu b/PG-PuReMD/src/cuda/cuda_bond_orders.cu index 540c7b40..c6d8b57b 100644 --- a/PG-PuReMD/src/cuda/cuda_bond_orders.cu +++ b/PG-PuReMD/src/cuda/cuda_bond_orders.cu @@ -410,7 +410,6 @@ CUDA_GLOBAL void k_bond_order_part2( reax_atom *my_atoms, global_parameters gp, type_j = my_atoms[j].type; bo_ij = &bond_list.bond_list[pj].bo_data; - //TODO //if ( i < j || workspace.bond_mark[j] > 3 ) if ( i < j ) { @@ -592,11 +591,10 @@ CUDA_GLOBAL void k_bond_order_part3( storage workspace, reax_list bond_list, int for ( pj = start_i; pj < end_i; ++pj ) { - j = bond_list.bond_list[pj].nbr; bo_ij = &bond_list.bond_list[pj].bo_data; - //if ( i >= j || workspace.bond_mark [i] <= 3 ) + //if ( i >= j || workspace.bond_mark[i] <= 3 ) if ( i >= j ) { /* We only need to update bond orders from bo_ji @@ -684,7 +682,6 @@ CUDA_GLOBAL void k_total_forces_part1( storage workspace, reax_list bond_list, return; } - //if ( i < bond_list.bond_list[pj].nbr ) { if ( control->virial == 0 ) { for ( pj = Start_Index( i, &bond_list ); pj < End_Index( i, &bond_list ); ++pj ) @@ -722,7 +719,6 @@ CUDA_GLOBAL void k_total_forces_part1_2( reax_atom *my_atoms, reax_list bond_lis nbr_k = &bond_list.bond_list[pk]; nbr_k_sym = &bond_list.bond_list[nbr_k->sym_index]; - //rvec_Add( atoms[i].f, nbr_k_sym->tf_f ); rvec_Add( workspace.f[i], nbr_k_sym->tf_f ); } } diff --git a/PG-PuReMD/src/cuda/cuda_bond_orders.h b/PG-PuReMD/src/cuda/cuda_bond_orders.h index d419b3b3..a896eee6 100644 --- a/PG-PuReMD/src/cuda/cuda_bond_orders.h +++ b/PG-PuReMD/src/cuda/cuda_bond_orders.h @@ -4,6 +4,8 @@ #include "../reax_types.h" +#include "cuda_helpers.h" + #include "../vector.h" @@ -20,179 +22,72 @@ void Cuda_Total_Forces_Part2( reax_system *, storage * ); * and if this term exceeds the cutoff bo_cut, then adds * BOTH atoms the bonds list (i.e., compute term once * and copy to avoid redundant computation) */ -CUDA_DEVICE static inline int Cuda_BOp( reax_list bond_list, real bo_cut, - int i, int btop_i, int j, ivec *rel_box, - real d, rvec *dvec, int format, single_body_parameters *sbp_i, - single_body_parameters *sbp_j, two_body_parameters *twbp, - rvec *dDeltap_self, real *total_bond_order ) +CUDA_DEVICE static inline void Cuda_Compute_BOp( reax_list bond_list, real bo_cut, + int i, int btop_i, int j, real C12, real C34, real C56, real BO_s, + real BO_pi, real BO_pi2, real BO, ivec *rel_box, real d, rvec *dvec, + int format, two_body_parameters *twbp, rvec *dDeltap_self, + real *total_bond_order ) { - int btop_j, ret; - real r2, C12, C34, C56; + real r2; real Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2; - real BO, BO_s, BO_pi, BO_pi2; - bond_data *ibond, *jbond; - bond_order_data *bo_ij, *bo_ji; + bond_data *ibond; + bond_order_data *bo_ij; - ret = FALSE; r2 = SQR( d ); - if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) - { - C12 = twbp->p_bo1 * POW( d / twbp->r_s, twbp->p_bo2 ); - BO_s = (1.0 + bo_cut) * exp( C12 ); - } - else - { - C12 = 0.0; - BO_s = 0.0; - } - - if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) - { - C34 = twbp->p_bo3 * POW( d / twbp->r_p, twbp->p_bo4 ); - BO_pi = exp( C34 ); - } - else - { - C34 = 0.0; - BO_pi = 0.0; - } - - if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) - { - C56 = twbp->p_bo5 * POW( d / twbp->r_pp, twbp->p_bo6 ); - BO_pi2 = exp( C56 ); - } - else - { - C56 = 0.0; - BO_pi2 = 0.0; - } - - /* Initially BO values are the uncorrected ones, page 1 */ - BO = BO_s + BO_pi + BO_pi2; - - if ( BO >= bo_cut ) - { - /* Bond Order page2-3, derivative of total bond order prime */ - Cln_BOp_s = twbp->p_bo2 * C12 / r2; - Cln_BOp_pi = twbp->p_bo4 * C34 / r2; - Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2; - - /****** bonds i-j and j-i ******/ - if ( i < j ) - { - /****** bond i-j ONLY ******/ - ibond = &bond_list.bond_list[btop_i]; - ibond->nbr = j; - ibond->d = d; - - rvec_Copy( ibond->dvec, *dvec ); - ivec_Copy( ibond->rel_box, *rel_box ); - - //ibond->dbond_index = btop_i; - //ibond->sym_index = btop_j; - - bo_ij = &ibond->bo_data; - bo_ij->BO = BO; - bo_ij->BO_s = BO_s; - bo_ij->BO_pi = BO_pi; - bo_ij->BO_pi2 = BO_pi2; - - /* Only dln_BOp_xx wrt. dr_i is stored here, note that - * dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ - rvec_Scale( bo_ij->dln_BOp_s, -1.0 * bo_ij->BO_s * Cln_BOp_s, ibond->dvec ); - rvec_Scale( bo_ij->dln_BOp_pi, -1.0 * bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec ); - rvec_Scale( bo_ij->dln_BOp_pi2, -1.0 * bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec ); - - /* Only dBOp wrt. dr_i is stored here, note that - * dBOp/dr_i = -dBOp/dr_j and all others are 0 */ - rvec_Scale( bo_ij->dBOp, -(bo_ij->BO_s * Cln_BOp_s - + bo_ij->BO_pi * Cln_BOp_pi - + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); - - rvec_Add( dDeltap_self[i], bo_ij->dBOp ); - - bo_ij->BO_s -= bo_cut; - bo_ij->BO -= bo_cut; - /* currently total_BOp */ - total_bond_order[i] += bo_ij->BO; - bo_ij->Cdbo = 0.0; - bo_ij->Cdbopi = 0.0; - bo_ij->Cdbopi2 = 0.0; - -#if !defined(CUDA_ACCUM_ATOMIC) - ibond->ae_CdDelta = 0.0; - ibond->va_CdDelta = 0.0; - rvec_MakeZero( ibond->va_f ); - ibond->ta_CdDelta = 0.0; - ibond->ta_Cdbo = 0.0; - rvec_MakeZero( ibond->ta_f ); - rvec_MakeZero( ibond->hb_f ); - rvec_MakeZero( ibond->tf_f ); -#endif - } - else - { - /****** bond j-i ONLY ******/ - //btop_j = End_Index( j, &bond_list ); - btop_j = btop_i; - jbond = &bond_list.bond_list[btop_j]; - - jbond->nbr = j; - jbond->d = d; - rvec_Scale( jbond->dvec, -1.0, *dvec ); - ivec_Scale( jbond->rel_box, -1.0, *rel_box ); - //jbond->dbond_index = btop_i; - //jbond->sym_index = btop_i; - - //Set_End_Index( j, btop_j + 1, &bond_list ); - - bo_ji = &jbond->bo_data; - bo_ji->BO = BO; - bo_ji->BO_s = BO_s; - bo_ji->BO_pi = BO_pi; - bo_ji->BO_pi2 = BO_pi2; - - /* Only dln_BOp_xx wrt. dr_i is stored here, note that - * dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ - rvec_Scale( bo_ji->dln_BOp_s, BO_s * Cln_BOp_s, *dvec ); - rvec_Scale( bo_ji->dln_BOp_pi, BO_pi * Cln_BOp_pi, *dvec ); - rvec_Scale( bo_ji->dln_BOp_pi2, BO_pi2 * Cln_BOp_pi2, *dvec ); - - /* Only dBOp wrt. dr_i is stored here, note that - * dBOp/dr_i = -dBOp/dr_j and all others are 0 */ - rvec_Scale( bo_ji->dBOp, BO_s * Cln_BOp_s - + BO_pi * Cln_BOp_pi - + BO_pi2 * Cln_BOp_pi2, *dvec ); - - rvec_Add( dDeltap_self[i], bo_ji->dBOp ); - - bo_ji->BO_s -= bo_cut; - bo_ji->BO -= bo_cut; - - /* currently total_BOp */ - total_bond_order[i] += bo_ji->BO; - bo_ji->Cdbo = 0.0; - bo_ji->Cdbopi = 0.0; - bo_ji->Cdbopi2 = 0.0; + /****** bond i-j ONLY ******/ + ibond = &bond_list.bond_list[btop_i]; + ibond->nbr = j; + ibond->d = d; + + rvec_Copy( ibond->dvec, *dvec ); + ivec_Copy( ibond->rel_box, *rel_box ); + + bo_ij = &ibond->bo_data; + bo_ij->BO = BO; + bo_ij->BO_s = BO_s; + bo_ij->BO_pi = BO_pi; + bo_ij->BO_pi2 = BO_pi2; + + /* Bond Order page2-3, derivative of total bond order prime */ + Cln_BOp_s = twbp->p_bo2 * C12 / r2; + Cln_BOp_pi = twbp->p_bo4 * C34 / r2; + Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2; + + /* Only dln_BOp_xx wrt. dr_i is stored here, note that + * dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ + rvec_Scale( bo_ij->dln_BOp_s, -1.0 * bo_ij->BO_s * Cln_BOp_s, ibond->dvec ); + rvec_Scale( bo_ij->dln_BOp_pi, -1.0 * bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec ); + rvec_Scale( bo_ij->dln_BOp_pi2, -1.0 * bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec ); + + /* Only dBOp wrt. dr_i is stored here, note that + * dBOp/dr_i = -dBOp/dr_j and all others are 0 */ + rvec_Scale( bo_ij->dBOp, -1.0 * (bo_ij->BO_s * Cln_BOp_s + + bo_ij->BO_pi * Cln_BOp_pi + + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); + + rvec_Add( dDeltap_self[i], bo_ij->dBOp ); +// atomic_rvecAdd( dDeltap_self[i], bo_ij->dBOp ); + + bo_ij->BO_s -= bo_cut; + bo_ij->BO -= bo_cut; + /* currently total_BOp */ + total_bond_order[i] += bo_ij->BO; +// atomicAdd( (double *) &total_bond_order[i], bo_ij->BO ); + bo_ij->Cdbo = 0.0; + bo_ij->Cdbopi = 0.0; + bo_ij->Cdbopi2 = 0.0; #if !defined(CUDA_ACCUM_ATOMIC) - jbond->ae_CdDelta = 0.0; - jbond->va_CdDelta = 0.0; - rvec_MakeZero( jbond->va_f ); - jbond->ta_CdDelta = 0.0; - jbond->ta_Cdbo = 0.0; - rvec_MakeZero( jbond->ta_f ); - rvec_MakeZero( jbond->hb_f ); - rvec_MakeZero( jbond->tf_f ); + ibond->ae_CdDelta = 0.0; + ibond->va_CdDelta = 0.0; + rvec_MakeZero( ibond->va_f ); + ibond->ta_CdDelta = 0.0; + ibond->ta_Cdbo = 0.0; + rvec_MakeZero( ibond->ta_f ); + rvec_MakeZero( ibond->hb_f ); + rvec_MakeZero( ibond->tf_f ); #endif - } - - ret = TRUE; - } - - return ret; } diff --git a/PG-PuReMD/src/cuda/cuda_forces.cu b/PG-PuReMD/src/cuda/cuda_forces.cu index 8e4f5eb7..3826b36f 100644 --- a/PG-PuReMD/src/cuda/cuda_forces.cu +++ b/PG-PuReMD/src/cuda/cuda_forces.cu @@ -139,25 +139,10 @@ CUDA_GLOBAL void k_init_end_index( int * intr_cnt, int *indices, int *end_indice } -CUDA_GLOBAL void k_init_hindex( reax_atom *my_atoms, int N ) -{ - int i; - - i = blockIdx.x * blockDim.x + threadIdx.x; - - if ( i >= N ) - { - return; - } - - my_atoms[i].Hindex = i; -} - - CUDA_GLOBAL void k_init_hbond_indices( reax_atom * atoms, single_body_parameters *sbp, int *hbonds, int *max_hbonds, int *indices, int *end_indices, int N ) { - int i, hindex, my_hbonds; + int i, hindex, flag; i = blockIdx.x * blockDim.x + threadIdx.x; @@ -168,20 +153,12 @@ CUDA_GLOBAL void k_init_hbond_indices( reax_atom * atoms, single_body_parameters hindex = atoms[i].Hindex; - if ( sbp[ atoms[i].type ].p_hbond == H_ATOM - || sbp[ atoms[i].type ].p_hbond == H_BONDING_ATOM ) - { - my_hbonds = hbonds[i]; - indices[hindex] = max_hbonds[i]; - end_indices[hindex] = indices[hindex] + hbonds[i]; - } - else - { - my_hbonds = 0; - indices[hindex] = 0; - end_indices[hindex] = 0; - } - atoms[i].num_hbonds = my_hbonds; + flag = (sbp[ atoms[i].type ].p_hbond == H_ATOM + || sbp[ atoms[i].type ].p_hbond == H_BONDING_ATOM ? TRUE : FALSE); + + indices[hindex] = (flag == TRUE ? max_hbonds[i] : 0); + end_indices[hindex] = (flag == TRUE ? indices[hindex] + hbonds[i] : 0); + atoms[i].num_hbonds = (flag == TRUE ? hbonds[i] : 0); } @@ -190,7 +167,6 @@ CUDA_GLOBAL void k_print_hbond_info( reax_atom *my_atoms, single_body_parameters { int i; int type_i; - int ihb, ihb_top; single_body_parameters *sbp_i; reax_atom *atom_i; @@ -205,21 +181,8 @@ CUDA_GLOBAL void k_print_hbond_info( reax_atom *my_atoms, single_body_parameters type_i = atom_i->type; sbp_i = &sbp[type_i]; - if ( control->hbond_cut > 0.0 ) - { - ihb = sbp_i->p_hbond; - - if ( ihb == H_ATOM || ihb == H_BONDING_ATOM ) - { - ihb_top = Start_Index( atom_i->Hindex, &hbond_list ); - } - else - { - ihb_top = -1; - } - } - - printf( "atom %6d: ihb = %2d, ihb_top = %2d\n", i, ihb, ihb_top ); + printf( "atom %6d: ihb = %2d, ihb_top = %2d\n", i, sbp_i->p_hbond, + Start_Index( atom_i->Hindex, &hbond_list ) ); } @@ -247,18 +210,9 @@ CUDA_GLOBAL void k_init_dist( reax_atom *my_atoms, reax_list far_nbr_list, int N { j = far_nbr_list.far_nbr_list.nbr[pj]; - if ( i < j ) - { - far_nbr_list.far_nbr_list.dvec[pj][0] = my_atoms[j].x[0] - x_i[0]; - far_nbr_list.far_nbr_list.dvec[pj][1] = my_atoms[j].x[1] - x_i[1]; - far_nbr_list.far_nbr_list.dvec[pj][2] = my_atoms[j].x[2] - x_i[2]; - } - else - { - far_nbr_list.far_nbr_list.dvec[pj][0] = x_i[0] - my_atoms[j].x[0]; - far_nbr_list.far_nbr_list.dvec[pj][1] = x_i[1] - my_atoms[j].x[1]; - far_nbr_list.far_nbr_list.dvec[pj][2] = x_i[2] - my_atoms[j].x[2]; - } + far_nbr_list.far_nbr_list.dvec[pj][0] = my_atoms[j].x[0] - x_i[0]; + far_nbr_list.far_nbr_list.dvec[pj][1] = my_atoms[j].x[1] - x_i[1]; + far_nbr_list.far_nbr_list.dvec[pj][2] = my_atoms[j].x[2] - x_i[2]; far_nbr_list.far_nbr_list.d[pj] = rvec_Norm( far_nbr_list.far_nbr_list.dvec[pj] ); } } @@ -269,44 +223,52 @@ CUDA_GLOBAL void k_init_dist( reax_atom *my_atoms, reax_list far_nbr_list, int N */ CUDA_GLOBAL void k_init_dist_opt( reax_atom *my_atoms, reax_list far_nbr_list, int N ) { - int j, pj, start_i, end_i, thread_id, warp_id, lane_id; + int j, pj, start_i, end_i, thread_id, i, lane_id; rvec x_i; thread_id = blockIdx.x * blockDim.x + threadIdx.x; - warp_id = thread_id >> 5; + i = thread_id / warpSize; - if ( warp_id >= N ) + if ( i >= N ) { return; } - lane_id = thread_id & 0x0000001F; - start_i = Start_Index( warp_id, &far_nbr_list ); - end_i = End_Index( warp_id, &far_nbr_list ); - rvec_Copy( x_i, my_atoms[warp_id].x ); + lane_id = thread_id % warpSize; + start_i = Start_Index( i, &far_nbr_list ); + end_i = End_Index( i, &far_nbr_list ); + rvec_Copy( x_i, my_atoms[i].x ); /* update distance and displacement vector between atoms i and j (i-j) */ for ( pj = start_i + lane_id; pj < end_i; pj += warpSize ) { j = far_nbr_list.far_nbr_list.nbr[pj]; - if ( warp_id < j ) - { - far_nbr_list.far_nbr_list.dvec[pj][0] = my_atoms[j].x[0] - x_i[0]; - far_nbr_list.far_nbr_list.dvec[pj][1] = my_atoms[j].x[1] - x_i[1]; - far_nbr_list.far_nbr_list.dvec[pj][2] = my_atoms[j].x[2] - x_i[2]; - } - else - { - far_nbr_list.far_nbr_list.dvec[pj][0] = x_i[0] - my_atoms[j].x[0]; - far_nbr_list.far_nbr_list.dvec[pj][1] = x_i[1] - my_atoms[j].x[1]; - far_nbr_list.far_nbr_list.dvec[pj][2] = x_i[2] - my_atoms[j].x[2]; - } + far_nbr_list.far_nbr_list.dvec[pj][0] = my_atoms[j].x[0] - x_i[0]; + far_nbr_list.far_nbr_list.dvec[pj][1] = my_atoms[j].x[1] - x_i[1]; + far_nbr_list.far_nbr_list.dvec[pj][2] = my_atoms[j].x[2] - x_i[2]; far_nbr_list.far_nbr_list.d[pj] = rvec_Norm( far_nbr_list.far_nbr_list.dvec[pj] ); } } + +CUDA_GLOBAL void k_reset_bond_orders( storage workspace, int N ) +{ + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= N ) + { + return; + } + + workspace.total_bond_order[i] = 0.0; + rvec_MakeZero( workspace.dDeltap_self[i] ); +} + + /* Compute the charge matrix entries and store the matrix in half format * using the far neighbors list (stored in full format) and according to * the full shell communication method */ @@ -378,7 +340,7 @@ CUDA_GLOBAL void k_init_cm_half_fs( reax_atom *my_atoms, single_body_parameters H->end[i] = cm_top; num_cm_entries = cm_top - H->start[i]; - /* reallocation checks */ + /* reallocation check */ if ( num_cm_entries > max_cm_entries[i] ) { *realloc_cm_entries = TRUE; @@ -456,7 +418,7 @@ CUDA_GLOBAL void k_init_cm_half_fs_tab( reax_atom *my_atoms, single_body_paramet H->end[i] = cm_top; num_cm_entries = cm_top - H->start[i]; - /* reallocation checks */ + /* reallocation check */ if ( num_cm_entries > max_cm_entries[i] ) { *realloc_cm_entries = TRUE; @@ -531,7 +493,7 @@ CUDA_GLOBAL void k_init_cm_full_fs( reax_atom *my_atoms, single_body_parameters H->end[i] = cm_top; num_cm_entries = cm_top - H->start[i]; - /* reallocation checks */ + /* reallocation check */ if ( num_cm_entries > max_cm_entries[i] ) { *realloc_cm_entries = TRUE; @@ -621,7 +583,7 @@ CUDA_GLOBAL void k_init_cm_full_fs_opt( reax_atom *my_atoms, single_body_paramet H->end[i] = cm_top; num_cm_entries = cm_top - H->start[i]; - /* reallocation checks */ + /* reallocation check */ if ( num_cm_entries > max_cm_entries[i] ) { *realloc_cm_entries = TRUE; @@ -696,7 +658,7 @@ CUDA_GLOBAL void k_init_cm_full_fs_tab( reax_atom *my_atoms, single_body_paramet H->end[i] = cm_top; num_cm_entries = cm_top - H->start[i]; - /* reallocation checks */ + /* reallocation check */ if ( num_cm_entries > max_cm_entries[i] ) { *realloc_cm_entries = TRUE; @@ -706,17 +668,17 @@ CUDA_GLOBAL void k_init_cm_full_fs_tab( reax_atom *my_atoms, single_body_paramet CUDA_GLOBAL void k_init_bonds( reax_atom *my_atoms, single_body_parameters *sbp, two_body_parameters *tbp, storage workspace, control_params *control, - reax_list far_nbr_list, reax_list bond_list, reax_list hbond_list, - LR_lookup_table *t_LR, int n, int N, int num_atom_types, - int *max_bonds, int *realloc_bonds, - int *max_hbonds, int *realloc_hbonds ) + reax_list far_nbr_list, reax_list bond_list, int n, int N, + int num_atom_types, int *max_bonds, int *realloc_bonds ) { int i, j, pj; int start_i, end_i; int type_i, type_j; - int btop_i, ihb, jhb, ihb_top; - int num_bonds, num_hbonds; - real cutoff; + int btop_i; + int num_bonds; + real cutoff, r_ij; + real C12, C34, C56; + real BO_s, BO_pi, BO_pi2, BO; single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; reax_atom *atom_i, *atom_j; @@ -734,12 +696,10 @@ CUDA_GLOBAL void k_init_bonds( reax_atom *my_atoms, single_body_parameters *sbp, end_i = End_Index( i, &far_nbr_list ); btop_i = Start_Index( i, &bond_list ); sbp_i = &sbp[type_i]; - ihb = NON_H_BONDING_ATOM; - ihb_top = -1; if ( i < n ) { - cutoff = control->nonb_cut; + cutoff = MIN( control->nonb_cut, control->bond_cut ); // workspace.bond_mark[i] = 0; } else @@ -749,154 +709,358 @@ CUDA_GLOBAL void k_init_bonds( reax_atom *my_atoms, single_body_parameters *sbp, // workspace.bond_mark[i] = 1000; } - if ( control->hbond_cut > 0.0 ) + /* check if j is within cutoff */ + for ( pj = start_i; pj < end_i; ++pj ) { - ihb = sbp_i->p_hbond; - - if ( ihb == H_ATOM || ihb == H_BONDING_ATOM ) - { - ihb_top = Start_Index( atom_i->Hindex, &hbond_list ); - } - else + /* uncorrected bond orders */ + if ( far_nbr_list.far_nbr_list.d[pj] <= cutoff ) { - ihb_top = -1; + j = far_nbr_list.far_nbr_list.nbr[pj]; + atom_j = &my_atoms[j]; + type_j = atom_j->type; + r_ij = far_nbr_list.far_nbr_list.d[pj]; + sbp_j = &sbp[type_j]; + twbp = &tbp[ index_tbp(type_i, type_j, num_atom_types) ]; + + /* uncorrected bond orders */ + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) + { + C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 ); + BO_s = (1.0 + control->bo_cut) * EXP( C12 ); + } + else + { + C12 = 0.0; + BO_s = 0.0; + } + + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) + { + C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 ); + BO_pi = EXP( C34 ); + } + else + { + C34 = 0.0; + BO_pi = 0.0; + } + + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) + { + C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); + BO_pi2 = EXP( C56 ); + } + else + { + C56 = 0.0; + BO_pi2 = 0.0; + } + + /* initially BO values are the uncorrected ones, page 1 */ + BO = BO_s + BO_pi + BO_pi2; + + if ( BO >= control->bo_cut ) + { + /* compute and append bond info to list */ + Cuda_Compute_BOp( bond_list, control->bo_cut, i, btop_i, + far_nbr_list.far_nbr_list.nbr[pj], + C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, + &far_nbr_list.far_nbr_list.rel_box[pj], + far_nbr_list.far_nbr_list.d[pj], + &far_nbr_list.far_nbr_list.dvec[pj], far_nbr_list.format, + twbp, workspace.dDeltap_self, workspace.total_bond_order ); + + ++btop_i; + + /* TODO: future optimization if bond_mark implemented */ +// if ( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) +// { +// workspace->bond_mark[j] = workspace->bond_mark[i] + 1; +// } +// else if ( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) +// { +// workspace->bond_mark[i] = workspace->bond_mark[j] + 1; +// } + } } } - /* update i-j distance - check if j is within cutoff */ - for ( pj = start_i; pj < end_i; ++pj ) + Set_End_Index( i, btop_i, &bond_list ); + + num_bonds = btop_i - Start_Index( i, &bond_list ); + + /* copy bond info to atom structure + * (needed for atom ownership transfer via MPI) */ + my_atoms[i].num_bonds = num_bonds; + + /* reallocation check */ + if ( num_bonds > max_bonds[i] ) { - j = far_nbr_list.far_nbr_list.nbr[pj]; - atom_j = &my_atoms[j]; + *realloc_bonds = TRUE; + } +} + + +CUDA_GLOBAL void k_init_bonds_opt( reax_atom *my_atoms, single_body_parameters *sbp, + two_body_parameters *tbp, storage workspace, control_params *control, + reax_list far_nbr_list, reax_list bond_list, int n, int N, + int num_atom_types, int *max_bonds, int *realloc_bonds ) +{ + extern __shared__ cub::WarpScan<int>::TempStorage temp[]; + int i, j, pj, thread_id, lane_id, itr; + int start_i, end_i; + int type_i, type_j; + int btop_i, offset, flag; + int num_bonds; + real cutoff, r_ij; + real C12, C34, C56; + real BO_s, BO_pi, BO_pi2, BO; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + reax_atom *atom_i, *atom_j; + + thread_id = blockIdx.x * blockDim.x + threadIdx.x; + /* all threads within a warp are assigned the bonds + * for a unique atom */ + i = thread_id / warpSize; + + if ( i >= N ) + { + return; + } + + lane_id = thread_id % warpSize; + atom_i = &my_atoms[i]; + type_i = atom_i->type; + start_i = Start_Index( i, &far_nbr_list ); + end_i = End_Index( i, &far_nbr_list ); + btop_i = Start_Index( i, &bond_list ); + sbp_i = &sbp[type_i]; + + if ( i < n ) + { + cutoff = MIN( control->nonb_cut, control->bond_cut ); +// workspace.bond_mark[i] = 0; + } + else + { + cutoff = control->bond_cut; + /* put ghost atoms to an infinite distance (i.e., 1000) */ +// workspace.bond_mark[i] = 1000; + } - if ( far_nbr_list.far_nbr_list.d[pj] <= control->nonb_cut ) + for ( itr = 0, pj = start_i + lane_id; itr < (end_i - start_i + warpSize - 1) / warpSize; ++itr ) + { + /* uncorrected bond orders */ + if ( pj < end_i && far_nbr_list.far_nbr_list.d[pj] <= cutoff ) { + j = far_nbr_list.far_nbr_list.nbr[pj]; + atom_j = &my_atoms[j]; type_j = atom_j->type; + r_ij = far_nbr_list.far_nbr_list.d[pj]; sbp_j = &sbp[type_j]; - ihb = sbp_i->p_hbond; - jhb = sbp_j->p_hbond; - - /* atom i: H bonding, ghost - * atom j: H atom, native */ - if ( control->hbond_cut > 0.0 && i >= n && j < n - && ihb == H_BONDING_ATOM && jhb == H_ATOM - && far_nbr_list.far_nbr_list.d[pj] <= control->hbond_cut ) + twbp = &tbp[ index_tbp(type_i, type_j, num_atom_types) ]; + + /* uncorrected bond orders */ + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) { - hbond_list.hbond_list[ihb_top].nbr = j; - hbond_list.hbond_list[ihb_top].scl = -1; - hbond_list.hbond_list[ihb_top].ptr = pj; + C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 ); + BO_s = (1.0 + control->bo_cut) * EXP( C12 ); + } + else + { + C12 = 0.0; + BO_s = 0.0; + } -#if !defined(CUDA_ACCUM_ATOMIC) - hbond_list.hbond_list[ihb_top].sym_index = -1; - rvec_MakeZero( hbond_list.hbond_list[ihb_top].hb_f ); -#endif + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) + { + C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 ); + BO_pi = EXP( C34 ); + } + else + { + C34 = 0.0; + BO_pi = 0.0; + } - ++ihb_top; + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) + { + C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); + BO_pi2 = EXP( C56 ); + } + else + { + C56 = 0.0; + BO_pi2 = 0.0; } + + /* initially BO values are the uncorrected ones, page 1 */ + BO = BO_s + BO_pi + BO_pi2; + } + else + { + BO = 0.0; } - if ( far_nbr_list.far_nbr_list.d[pj] <= cutoff ) + offset = (pj < end_i && far_nbr_list.far_nbr_list.d[pj] <= cutoff && BO >= control->bo_cut) ? 1 : 0; + flag = (offset == 1) ? TRUE : FALSE; + cub::WarpScan<int>(temp[i % (blockDim.x / warpSize)]).ExclusiveSum(offset, offset); + + if ( flag == TRUE ) { - type_j = atom_j->type; - sbp_j = &sbp[type_j]; - twbp = &tbp[ index_tbp(type_i, type_j, num_atom_types) ]; + /* compute and append bond info to list */ + Cuda_Compute_BOp( bond_list, control->bo_cut, i, btop_i + offset, + far_nbr_list.far_nbr_list.nbr[pj], + C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, + &far_nbr_list.far_nbr_list.rel_box[pj], + far_nbr_list.far_nbr_list.d[pj], + &far_nbr_list.far_nbr_list.dvec[pj], far_nbr_list.format, + twbp, workspace.dDeltap_self, workspace.total_bond_order ); + + /* TODO: future optimization if bond_mark implemented */ +// if ( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) +// { +// workspace->bond_mark[j] = workspace->bond_mark[i] + 1; +// } +// else if ( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) +// { +// workspace->bond_mark[i] = workspace->bond_mark[j] + 1; +// } + } + + /* get btop_i from thread in last lane */ + btop_i = btop_i + offset + (flag == TRUE ? 1 : 0); + btop_i = __shfl_sync( FULL_MASK, btop_i, warpSize - 1 ); + + pj += warpSize; + } + + if ( lane_id == 0 ) + { + Set_End_Index( i, btop_i, &bond_list ); + + num_bonds = btop_i - Start_Index( i, &bond_list ); + + /* copy bond info to atom structure + * (needed for atom ownership transfer via MPI) */ + my_atoms[i].num_bonds = num_bonds; + + /* reallocation check */ + if ( num_bonds > max_bonds[i] ) + { + *realloc_bonds = TRUE; + } + } +} + + +/* Construct the interaction list for hydrogen bonds */ +CUDA_GLOBAL void k_init_hbonds( reax_atom *my_atoms, single_body_parameters *sbp, + control_params *control, reax_list far_nbr_list, reax_list hbond_list, + int n, int N, int num_atom_types, int *max_hbonds, int *realloc_hbonds ) +{ + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int ihb, jhb, ihb_top; + int num_hbonds; + real cutoff; + single_body_parameters *sbp_i, *sbp_j; + reax_atom *atom_i, *atom_j; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= N ) + { + return; + } + + atom_i = &my_atoms[i]; + type_i = atom_i->type; + start_i = Start_Index( i, &far_nbr_list ); + end_i = End_Index( i, &far_nbr_list ); + sbp_i = &sbp[type_i]; + ihb = sbp_i->p_hbond; + + cutoff = MIN( control->nonb_cut, control->hbond_cut ); - if ( i < n ) + ihb_top = Start_Index( atom_i->Hindex, &hbond_list ); + + if ( i < n && ihb == H_ATOM || ihb == H_BONDING_ATOM ) + { + /* check if j is within cutoff */ + for ( pj = start_i; pj < end_i; ++pj ) + { + if ( far_nbr_list.far_nbr_list.d[pj] <= cutoff ) { - /* hydrogen bond lists */ - if ( control->hbond_cut > 0.0 - && (ihb == H_ATOM || ihb == H_BONDING_ATOM) - && far_nbr_list.far_nbr_list.d[pj] <= control->hbond_cut ) - { - jhb = sbp_j->p_hbond; + j = far_nbr_list.far_nbr_list.nbr[pj]; + atom_j = &my_atoms[j]; + type_j = atom_j->type; + sbp_j = &sbp[type_j]; + jhb = sbp_j->p_hbond; - /* atom i: H atom, native - * atom j: H bonding atom */ - if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) - { - hbond_list.hbond_list[ihb_top].nbr = j; - - if ( i < j ) - { - hbond_list.hbond_list[ihb_top].scl = 1; - } - else - { - hbond_list.hbond_list[ihb_top].scl = -1; - } - hbond_list.hbond_list[ihb_top].ptr = pj; + /* atom i: H bonding, ghost + * atom j: H atom, native */ + if ( i >= n && j < n + && ihb == H_BONDING_ATOM && jhb == H_ATOM ) + { + hbond_list.hbond_list[ihb_top].nbr = j; + hbond_list.hbond_list[ihb_top].scl = -1; + hbond_list.hbond_list[ihb_top].ptr = pj; #if !defined(CUDA_ACCUM_ATOMIC) - hbond_list.hbond_list[ihb_top].sym_index = -1; - rvec_MakeZero( hbond_list.hbond_list[ihb_top].hb_f ); + hbond_list.hbond_list[ihb_top].sym_index = -1; + rvec_MakeZero( hbond_list.hbond_list[ihb_top].hb_f ); #endif - ++ihb_top; - } - /* atom i: H bonding atom, native - * atom j: H atom, native */ - else if ( ihb == H_BONDING_ATOM && jhb == H_ATOM && j < n ) - { - //jhb_top = End_Index( atom_j->Hindex, &hbond_list ); - hbond_list.hbond_list[ihb_top].nbr = j; - hbond_list.hbond_list[ihb_top].scl = -1; - hbond_list.hbond_list[ihb_top].ptr = pj; + ++ihb_top; + } + /* atom i: H atom, native + * atom j: H bonding atom */ + else if ( i < n + && ihb == H_ATOM && jhb == H_BONDING_ATOM ) + { + hbond_list.hbond_list[ihb_top].nbr = j; + hbond_list.hbond_list[ihb_top].scl = 1; + hbond_list.hbond_list[ihb_top].ptr = pj; #if !defined(CUDA_ACCUM_ATOMIC) - hbond_list.hbond_list[ihb_top].sym_index = -1; - rvec_MakeZero( hbond_list.hbond_list[ihb_top].hb_f ); + hbond_list.hbond_list[ihb_top].sym_index = -1; + rvec_MakeZero( hbond_list.hbond_list[ihb_top].hb_f ); #endif - ++ihb_top; - } + ++ihb_top; } - } + /* atom i: H bonding atom, native + * atom j: H atom, native */ + else if ( i < n + && ihb == H_BONDING_ATOM && jhb == H_ATOM && j < n ) + { + hbond_list.hbond_list[ihb_top].nbr = j; + hbond_list.hbond_list[ihb_top].scl = -1; + hbond_list.hbond_list[ihb_top].ptr = pj; - /* uncorrected bond orders */ - if ( far_nbr_list.far_nbr_list.d[pj] <= control->bond_cut - && Cuda_BOp( bond_list, control->bo_cut, - i, btop_i, far_nbr_list.far_nbr_list.nbr[pj], - &far_nbr_list.far_nbr_list.rel_box[pj], far_nbr_list.far_nbr_list.d[pj], - &far_nbr_list.far_nbr_list.dvec[pj], far_nbr_list.format, - sbp_i, sbp_j, twbp, workspace.dDeltap_self, - workspace.total_bond_order ) == TRUE ) - { - ++btop_i; +#if !defined(CUDA_ACCUM_ATOMIC) + hbond_list.hbond_list[ihb_top].sym_index = -1; + rvec_MakeZero( hbond_list.hbond_list[ihb_top].hb_f ); +#endif - /* TODO: Need to do later... since i and j are parallel */ -// if( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) -// { -// workspace->bond_mark[j] = workspace->bond_mark[i] + 1; -// } -// else if( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) -// { -// workspace->bond_mark[i] = workspace->bond_mark[j] + 1; -// } + ++ihb_top; + } } } } - Set_End_Index( i, btop_i, &bond_list ); -// if ( control->hbond_cut > 0.0 && ihb_top > 0 -// && (ihb == H_ATOM || ihb == H_BONDING_ATOM) ) -// { - Set_End_Index( atom_i->Hindex, ihb_top, &hbond_list ); -// } + Set_End_Index( atom_i->Hindex, ihb_top, &hbond_list ); - num_bonds = btop_i - Start_Index( i, &bond_list ); num_hbonds = ihb_top - Start_Index( atom_i->Hindex, &hbond_list ); - /* copy (h)bond info to atom structure + /* copy hbond info to atom structure * (needed for atom ownership transfer via MPI) */ - my_atoms[i].num_bonds = num_bonds; my_atoms[i].num_hbonds = num_hbonds; - /* reallocation checks */ - if ( num_bonds > max_bonds[i] ) - { - *realloc_bonds = TRUE; - } - + /* reallocation check */ if ( num_hbonds > max_hbonds[i] ) { *realloc_hbonds = TRUE; @@ -904,6 +1068,7 @@ CUDA_GLOBAL void k_init_bonds( reax_atom *my_atoms, single_body_parameters *sbp, } +/* Construct the interaction list for bonds */ CUDA_GLOBAL void k_estimate_storages_cm_half( reax_atom *my_atoms, control_params *control, reax_list far_nbr_list, int cm_n, int cm_n_max, int *cm_entries, int *max_cm_entries ) @@ -947,7 +1112,7 @@ CUDA_GLOBAL void k_estimate_storages_cm_half( reax_atom *my_atoms, /* round up to the nearest multiple of 32 to ensure that reads along * rows can be coalesced for 1 warp per row SpMV implementation */ max_cm_entries[i] = MAX( ((int) CEIL( num_cm_entries * SAFE_ZONE ) - + 0x0000001F) & 0xFFFFFFE0, MIN_CM_ENTRIES ); + + warpSize - 1) / warpSize * warpSize, MIN_CM_ENTRIES ); } @@ -991,26 +1156,23 @@ CUDA_GLOBAL void k_estimate_storages_cm_full( control_params *control, /* round up to the nearest multiple of 32 to ensure that reads along * rows can be coalesced for 1 warp per row SpMV implementation */ max_cm_entries[i] = MAX( ((int) CEIL( num_cm_entries * SAFE_ZONE ) - + 0x0000001F) & 0xFFFFFFE0, MIN_CM_ENTRIES ); + + warpSize - 1) / warpSize * warpSize, MIN_CM_ENTRIES ); } -CUDA_GLOBAL void k_estimate_storages_bonds( reax_atom *my_atoms, +CUDA_GLOBAL void k_estimate_storage_bonds( reax_atom *my_atoms, single_body_parameters *sbp, two_body_parameters *tbp, control_params *control, reax_list far_nbr_list, int num_atom_types, int n, int N, int total_cap, - int *bonds, int *max_bonds, - int *hbonds, int *max_hbonds ) + int *bonds, int *max_bonds ) { int i, j, pj; int start_i, end_i; int type_i, type_j; - int ihb, jhb; - int num_bonds, num_hbonds; - real cutoff; - real r_ij; + int num_bonds; + real cutoff, r_ij; real C12, C34, C56; - real BO, BO_s, BO_pi, BO_pi2; + real BO_s, BO_pi, BO_pi2; single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; reax_atom *atom_i; @@ -1023,7 +1185,6 @@ CUDA_GLOBAL void k_estimate_storages_bonds( reax_atom *my_atoms, } num_bonds = 0; - num_hbonds = 0; if ( i < N ) { @@ -1032,111 +1193,148 @@ CUDA_GLOBAL void k_estimate_storages_bonds( reax_atom *my_atoms, start_i = Start_Index( i, &far_nbr_list ); end_i = End_Index( i, &far_nbr_list ); sbp_i = &sbp[type_i]; - ihb = NON_H_BONDING_ATOM; if ( i < n ) - { - cutoff = control->nonb_cut; - } + { + cutoff = MIN( control->nonb_cut, control->bond_cut ); + } else { cutoff = control->bond_cut; - } + } for ( pj = start_i; pj < end_i; ++pj ) { - j = far_nbr_list.far_nbr_list.nbr[pj]; - - if ( far_nbr_list.far_nbr_list.d[pj] <= control->nonb_cut ) + if ( far_nbr_list.far_nbr_list.d[pj] <= cutoff ) { + j = far_nbr_list.far_nbr_list.nbr[pj]; type_j = my_atoms[j].type; + r_ij = far_nbr_list.far_nbr_list.d[pj]; sbp_j = &sbp[type_j]; - ihb = sbp_i->p_hbond; - jhb = sbp_j->p_hbond; + twbp = &tbp[ index_tbp(type_i ,type_j, num_atom_types) ]; - /* atom i: H bonding, ghost - * atom j: H atom, native */ - if ( control->hbond_cut > 0.0 && i >= n && j < n - && ihb == H_BONDING_ATOM && jhb == H_ATOM - && far_nbr_list.far_nbr_list.d[pj] <= control->hbond_cut ) + /* uncorrected bond orders */ + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) { - ++num_hbonds; + C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 ); + BO_s = (1.0 + control->bo_cut) * EXP( C12 ); + } + else + { + C12 = 0.0; + BO_s = 0.0; } - } - if ( far_nbr_list.far_nbr_list.d[pj] <= cutoff ) - { - type_j = my_atoms[j].type; - r_ij = far_nbr_list.far_nbr_list.d[pj]; - sbp_j = &sbp[type_j]; - twbp = &tbp[ index_tbp(type_i ,type_j, num_atom_types) ]; + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) + { + C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 ); + BO_pi = EXP( C34 ); + } + else + { + C34 = 0.0; + BO_pi = 0.0; + } - if ( i < n ) + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) { - /* atom i: H atom OR H bonding atom, native */ - if ( control->hbond_cut > 0.0 - && (ihb == H_ATOM || ihb == H_BONDING_ATOM) - && far_nbr_list.far_nbr_list.d[pj] <= control->hbond_cut ) - { - jhb = sbp_j->p_hbond; - - /* atom i: H atom, native - * atom j: H bonding atom */ - if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) - { - ++num_hbonds; - } - /* atom i: H bonding atom, native - * atom j: H atom, native */ - else if ( ihb == H_BONDING_ATOM && jhb == H_ATOM && j < n ) - { - ++num_hbonds; - } - } + C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); + BO_pi2= EXP( C56 ); + } + else + { + C56 = 0.0; + BO_pi2 = 0.0; } - /* uncorrected bond orders */ - if ( far_nbr_list.far_nbr_list.d[pj] <= control->bond_cut ) + /* initially BO values are the uncorrected ones, page 1 */ + if ( BO_s + BO_pi + BO_pi2 >= control->bo_cut ) { - if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) - { - C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 ); - BO_s = (1.0 + control->bo_cut) * EXP( C12 ); - } - else - { - C12 = 0.0; - BO_s = 0.0; - } + ++num_bonds; + } + } + } + } - if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) - { - C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 ); - BO_pi = EXP( C34 ); - } - else - { - C34 = 0.0; - BO_pi = 0.0; - } + __syncthreads( ); - if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) - { - C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); - BO_pi2= EXP( C56 ); - } - else - { - C56 = 0.0; - BO_pi2 = 0.0; - } + bonds[i] = num_bonds; + max_bonds[i] = MAX( (int) CEIL(2 * num_bonds * SAFE_ZONE), MIN_BONDS ); +} + + +CUDA_GLOBAL void k_estimate_storage_hbonds( reax_atom *my_atoms, + single_body_parameters *sbp, control_params *control, + reax_list far_nbr_list, int num_atom_types, int n, int N, + int total_cap, int *hbonds, int *max_hbonds ) +{ + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int ihb, jhb; + int num_hbonds; + real cutoff; + single_body_parameters *sbp_i, *sbp_j; + reax_atom *atom_i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= total_cap ) + { + return; + } + + num_hbonds = 0; + + if ( i < N ) + { + atom_i = &my_atoms[i]; + type_i = atom_i->type; + start_i = Start_Index( i, &far_nbr_list ); + end_i = End_Index( i, &far_nbr_list ); + sbp_i = &sbp[type_i]; + ihb = sbp_i->p_hbond; + + if ( i < n ) + { + cutoff = control->nonb_cut; + } + else + { + cutoff = control->bond_cut; + } - /* initially BO values are the uncorrected ones, page 1 */ - BO = BO_s + BO_pi + BO_pi2; + if ( i < n && ihb == H_ATOM || ihb == H_BONDING_ATOM ) + { + for ( pj = start_i; pj < end_i; ++pj ) + { + j = far_nbr_list.far_nbr_list.nbr[pj]; + type_j = my_atoms[j].type; + sbp_j = &sbp[type_j]; + jhb = sbp_j->p_hbond; - if ( BO >= control->bo_cut ) + /* atom i: H bonding, ghost + * atom j: H atom, native */ + if ( i >= n && j < n && ihb == H_BONDING_ATOM && jhb == H_ATOM + && far_nbr_list.far_nbr_list.d[pj] <= control->nonb_cut + && far_nbr_list.far_nbr_list.d[pj] <= control->hbond_cut ) + { + ++num_hbonds; + } + else if ( i < n && far_nbr_list.far_nbr_list.d[pj] <= cutoff + && far_nbr_list.far_nbr_list.d[pj] <= control->hbond_cut ) + { + /* atom i: H atom, native + * atom j: H bonding atom */ + if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) { - ++num_bonds; + ++num_hbonds; + } + /* atom i: H bonding atom, native + * atom j: H atom, native */ + else if ( ihb == H_BONDING_ATOM && jhb == H_ATOM && j < n ) + { + ++num_hbonds; } } } @@ -1145,9 +1343,6 @@ CUDA_GLOBAL void k_estimate_storages_bonds( reax_atom *my_atoms, __syncthreads( ); - bonds[i] = num_bonds; - max_bonds[i] = MAX( (int) CEIL(2 * num_bonds * SAFE_ZONE), MIN_BONDS ); - hbonds[i] = num_hbonds; max_hbonds[i] = MAX( (int) CEIL(num_hbonds * SAFE_ZONE), MIN_HBONDS ); } @@ -1199,6 +1394,7 @@ CUDA_GLOBAL void k_update_sym_dbond_indices( reax_list bond_list, int N ) ibond->sym_index = pk; jbond->sym_index = pj; + break; } } } @@ -1213,18 +1409,17 @@ CUDA_GLOBAL void k_update_sym_hbond_indices_opt( reax_atom *my_atoms, int nbr, nbrstart, nbrend; int start, end; hbond_data *ihbond, *jhbond; - int thread_id, warp_id, lane_id; + int thread_id, lane_id; thread_id = blockIdx.x * blockDim.x + threadIdx.x; - warp_id = thread_id >> 5; + i = thread_id / warpSize; - if ( warp_id > N ) + if ( i > N ) { return; } - i = warp_id; - lane_id = thread_id & 0x0000001F; + lane_id = thread_id % warpSize; start = Start_Index( my_atoms[i].Hindex, &hbond_list ); end = End_Index( my_atoms[i].Hindex, &hbond_list ); pj = start + lane_id; @@ -1408,11 +1603,6 @@ void Cuda_Init_HBond_Indices( reax_system *system, storage *workspace, "Cuda_Init_HBond_Indices::workspace->scratch" ); temp = (int *) workspace->scratch; - /* init Hindices */ - k_init_hindex <<< blocks, DEF_BLOCK_SIZE >>> - ( system->d_my_atoms, system->total_cap ); - cudaCheckError( ); - /* init indices and end_indices */ Cuda_Scan_Excl_Sum( system->d_max_hbonds, temp, system->total_cap ); @@ -1466,8 +1656,8 @@ void Cuda_Init_Sparse_Matrix_Indices( reax_system *system, sparse_matrix *H ) void Cuda_Estimate_Storages( reax_system *system, control_params *control, - storage *workspace, reax_list **lists, - int realloc_bonds, int realloc_hbonds, int realloc_cm, int step ) + storage *workspace, reax_list **lists, int realloc_cm, + int realloc_bonds, int realloc_hbonds, int step ) { int blocks; @@ -1500,23 +1690,19 @@ void Cuda_Estimate_Storages( reax_system *system, control_params *control, cudaMemcpyDeviceToHost, "Cuda_Estimate_Storages::d_total_cm_entries" ); } - if ( realloc_bonds == TRUE || realloc_hbonds == TRUE ) + if ( realloc_bonds == TRUE ) { blocks = system->total_cap / DEF_BLOCK_SIZE + (system->total_cap % DEF_BLOCK_SIZE == 0 ? 0 : 1); - k_estimate_storages_bonds <<< blocks, DEF_BLOCK_SIZE >>> + k_estimate_storage_bonds <<< blocks, DEF_BLOCK_SIZE >>> ( system->d_my_atoms, system->reax_param.d_sbp, system->reax_param.d_tbp, (control_params *) control->d_control_params, *(lists[FAR_NBRS]), system->reax_param.num_atom_types, system->n, system->N, system->total_cap, - system->d_bonds, system->d_max_bonds, - system->d_hbonds, system->d_max_hbonds ); + system->d_bonds, system->d_max_bonds ); cudaCheckError( ); - } - if ( realloc_bonds == TRUE ) - { Cuda_Reduction_Sum( system->d_max_bonds, system->d_total_bonds, system->total_cap ); copy_host_device( &system->total_bonds, system->d_total_bonds, sizeof(int), @@ -1525,12 +1711,23 @@ void Cuda_Estimate_Storages( reax_system *system, control_params *control, if ( system->numH > 0 && control->hbond_cut > 0.0 && realloc_hbonds == TRUE ) { + blocks = system->total_cap / DEF_BLOCK_SIZE + + (system->total_cap % DEF_BLOCK_SIZE == 0 ? 0 : 1); + + k_estimate_storage_hbonds <<< blocks, DEF_BLOCK_SIZE >>> + ( system->d_my_atoms, system->reax_param.d_sbp, + (control_params *) control->d_control_params, + *(lists[FAR_NBRS]), system->reax_param.num_atom_types, + system->n, system->N, system->total_cap, + system->d_hbonds, system->d_max_hbonds ); + cudaCheckError( ); + Cuda_Reduction_Sum( system->d_max_hbonds, system->d_total_hbonds, system->total_cap ); copy_host_device( &system->total_hbonds, system->d_total_hbonds, sizeof(int), cudaMemcpyDeviceToHost, "Cuda_Estimate_Storages::d_total_hbonds" ); } - else if ( step == 0 ) + else if ( step == 0 && (system->numH == 0 || control->hbond_cut <= 0.0) ) { #if defined(DEBUG_FOCUS) if ( system->numH == 0 ) @@ -1554,8 +1751,8 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control ) { - int renbr, blocks, ret, realloc_bonds, realloc_hbonds, realloc_cm; - static int dist_done = FALSE, cm_done = FALSE, bonds_done = FALSE; + int renbr, blocks, ret, realloc_cm, realloc_bonds, realloc_hbonds; + static int dist_done = FALSE, cm_done = FALSE, bonds_done = FALSE, hbonds_done = FALSE; #if defined(LOG_PERFORMANCE) float time_elapsed; cudaEvent_t time_event[4]; @@ -1578,12 +1775,12 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, // cudaCheckError( ); /* reset reallocation flags on device */ + cuda_memset( system->d_realloc_cm_entries, FALSE, sizeof(int), + "Cuda_Init_Forces::d_realloc_cm_entries" ); cuda_memset( system->d_realloc_bonds, FALSE, sizeof(int), "Cuda_Init_Forces::d_realloc_bonds" ); cuda_memset( system->d_realloc_hbonds, FALSE, sizeof(int), "Cuda_Init_Forces::d_realloc_hbonds" ); - cuda_memset( system->d_realloc_cm_entries, FALSE, sizeof(int), - "Cuda_Init_Forces::d_realloc_cm_entries" ); #if defined(LOG_PERFORMANCE) cudaEventRecord( time_event[0] ); @@ -1591,11 +1788,12 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, if ( renbr == FALSE && dist_done == FALSE ) { +// k_init_dist <<< control->blocks_n, control->block_size_n >>> +// ( system->d_my_atoms, *(lists[FAR_NBRS]), system->N ); + blocks = system->N * 32 / DEF_BLOCK_SIZE + (system->N * 32 % DEF_BLOCK_SIZE == 0 ? 0 : 1); -// k_init_dist <<< control->blocks_n, control->block_size_n >>> -// ( system->d_my_atoms, *(lists[FAR_NBRS]), system->N ); k_init_dist_opt <<< blocks, DEF_BLOCK_SIZE >>> ( system->d_my_atoms, *(lists[FAR_NBRS]), system->N ); cudaCheckError( ); @@ -1615,6 +1813,8 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, if ( cm_done == FALSE ) { + Cuda_Init_Sparse_Matrix_Indices( system, &workspace->d_workspace->H ); + if ( workspace->d_workspace->H.format == SYM_HALF_MATRIX ) { if ( control->tabulate <= 0 ) @@ -1672,13 +1872,46 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, if ( bonds_done == FALSE ) { + blocks = system->total_cap / DEF_BLOCK_SIZE + + ((system->total_cap % DEF_BLOCK_SIZE == 0 ) ? 0 : 1); + + k_reset_bond_orders <<< blocks, DEF_BLOCK_SIZE >>> + ( *(workspace->d_workspace), system->total_cap ); + cudaCheckError( ); + + Cuda_Init_Bond_Indices( system, lists[BONDS] ); + k_init_bonds <<< control->blocks_n, control->block_size_n >>> ( system->d_my_atoms, system->reax_param.d_sbp, system->reax_param.d_tbp, *(workspace->d_workspace), (control_params *) control->d_control_params, - *(lists[FAR_NBRS]), *(lists[BONDS]), *(lists[HBONDS]), - workspace->d_LR, system->n, system->N, system->reax_param.num_atom_types, - system->d_max_bonds, system->d_realloc_bonds, + *(lists[FAR_NBRS]), *(lists[BONDS]), + system->n, system->N, system->reax_param.num_atom_types, + system->d_max_bonds, system->d_realloc_bonds ); + +// blocks = control->block_size_n * 32 / DEF_BLOCK_SIZE +// + (control->block_size_n * 32 % DEF_BLOCK_SIZE == 0 ? 0 : 1); +// +// k_init_bonds_opt <<< blocks, DEF_BLOCK_SIZE, +// sizeof(cub::WarpScan<int>::TempStorage) * (DEF_BLOCK_SIZE / 32) >>> +// ( system->d_my_atoms, system->reax_param.d_sbp, +// system->reax_param.d_tbp, *(workspace->d_workspace), +// (control_params *) control->d_control_params, +// *(lists[FAR_NBRS]), *(lists[BONDS]), +// system->n, system->N, system->reax_param.num_atom_types, +// system->d_max_bonds, system->d_realloc_bonds ); + cudaCheckError( ); + } + + if ( control->hbond_cut > 0.0 && system->numH > 0 && hbonds_done == FALSE ) + { + Cuda_Init_HBond_Indices( system, workspace, lists[HBONDS] ); + + k_init_hbonds <<< control->blocks_n, control->block_size_n >>> + ( system->d_my_atoms, system->reax_param.d_sbp, + (control_params *) control->d_control_params, + *(lists[FAR_NBRS]), *(lists[HBONDS]), + system->n, system->N, system->reax_param.num_atom_types, system->d_max_hbonds, system->d_realloc_hbonds ); cudaCheckError( ); } @@ -1731,17 +1964,21 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, } #endif - ret = (realloc_cm == FALSE && realloc_bonds == FALSE && realloc_hbonds == FALSE) - ? SUCCESS : FAILURE; + ret = (realloc_cm == FALSE && realloc_bonds == FALSE && realloc_hbonds == FALSE + ? SUCCESS : FAILURE); if ( realloc_cm == FALSE ) { cm_done = TRUE; } - if ( realloc_bonds == FALSE && realloc_hbonds == FALSE ) + if ( realloc_bonds == FALSE ) { bonds_done = TRUE; } + if ( realloc_hbonds == FALSE ) + { + hbonds_done = TRUE; + } if ( ret == SUCCESS ) { @@ -1769,17 +2006,18 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, dist_done = FALSE; cm_done = FALSE; bonds_done = FALSE; + hbonds_done = FALSE; } else { Cuda_Estimate_Storages( system, control, workspace, lists, - realloc_bonds, realloc_hbonds, realloc_cm, + realloc_cm, realloc_bonds, realloc_hbonds, data->step - data->prev_steps ); /* schedule reallocations after updating allocation sizes */ + workspace->d_workspace->realloc.cm = realloc_cm; workspace->d_workspace->realloc.bonds = realloc_bonds; workspace->d_workspace->realloc.hbonds = realloc_hbonds; - workspace->d_workspace->realloc.cm = realloc_cm; } return ret; diff --git a/PG-PuReMD/src/cuda/cuda_init_md.cu b/PG-PuReMD/src/cuda/cuda_init_md.cu index 68cef63a..1c4ee61d 100644 --- a/PG-PuReMD/src/cuda/cuda_init_md.cu +++ b/PG-PuReMD/src/cuda/cuda_init_md.cu @@ -168,15 +168,15 @@ void Cuda_Init_Workspace( reax_system *system, control_params *control, workspace->realloc.far_nbrs = FALSE; workspace->realloc.cm = FALSE; - workspace->realloc.hbonds = FALSE; workspace->realloc.bonds = FALSE; + workspace->realloc.hbonds = FALSE; workspace->realloc.thbody = FALSE; workspace->realloc.gcell_atoms = 0; workspace->d_workspace->realloc.far_nbrs = FALSE; workspace->d_workspace->realloc.cm = FALSE; - workspace->d_workspace->realloc.hbonds = FALSE; workspace->d_workspace->realloc.bonds = FALSE; + workspace->d_workspace->realloc.hbonds = FALSE; workspace->d_workspace->realloc.thbody = FALSE; workspace->d_workspace->realloc.gcell_atoms = 0; diff --git a/PG-PuReMD/src/cuda/cuda_integrate.cu b/PG-PuReMD/src/cuda/cuda_integrate.cu index 8a32018a..9a4a50e6 100644 --- a/PG-PuReMD/src/cuda/cuda_integrate.cu +++ b/PG-PuReMD/src/cuda/cuda_integrate.cu @@ -332,11 +332,11 @@ int Cuda_Velocity_Verlet_NVE( reax_system *system, control_params *control, Cuda_Copy_Grid_Host_to_Device( &system->my_grid, &system->d_my_grid ); } + Cuda_Reset( system, control, data, workspace, lists ); + cuda_copy = TRUE; } - Cuda_Reset( system, control, data, workspace, lists ); - if ( renbr == TRUE && gen_nbr_list == FALSE ) { ret = Cuda_Generate_Neighbor_Lists( system, data, workspace, lists ); @@ -430,11 +430,11 @@ int Cuda_Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system* system, Cuda_Copy_Grid_Host_to_Device( &system->my_grid, &system->d_my_grid ); } + Cuda_Reset( system, control, data, workspace, lists ); + cuda_copy = TRUE; } - Cuda_Reset( system, control, data, workspace, lists ); - if ( renbr == TRUE && gen_nbr_list == FALSE ) { ret = Cuda_Generate_Neighbor_Lists( system, data, workspace, lists ); @@ -561,11 +561,11 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con Cuda_Copy_Grid_Host_to_Device( &system->my_grid, &system->d_my_grid ); } + Cuda_Reset( system, control, data, workspace, lists ); + cuda_copy = TRUE; } - Cuda_Reset( system, control, data, workspace, lists ); - if ( renbr == TRUE && gen_nbr_list == FALSE ) { ret = Cuda_Generate_Neighbor_Lists( system, data, workspace, lists ); @@ -675,11 +675,11 @@ int Cuda_Velocity_Verlet_Berendsen_NPT( reax_system* system, control_params* con Cuda_Copy_Grid_Host_to_Device( &system->my_grid, &system->d_my_grid ); } + Cuda_Reset( system, control, data, workspace, lists ); + cuda_copy = TRUE; } - Cuda_Reset( system, control, data, workspace, lists ); - if ( renbr == TRUE && gen_nbr_list == FALSE ) { ret = Cuda_Generate_Neighbor_Lists( system, data, workspace, lists ); diff --git a/PG-PuReMD/src/cuda/cuda_neighbors.cu b/PG-PuReMD/src/cuda/cuda_neighbors.cu index e94cd26b..1ef460f3 100644 --- a/PG-PuReMD/src/cuda/cuda_neighbors.cu +++ b/PG-PuReMD/src/cuda/cuda_neighbors.cu @@ -47,8 +47,9 @@ CUDA_DEVICE real Cuda_DistSqr_to_Special_Point( rvec cp, rvec x ) } -/* Generate far neighbor lists by scanning the atoms list and applying cutoffs */ -CUDA_GLOBAL void k_generate_neighbor_lists( reax_atom *my_atoms, +/* Generate far neighbor lists in full format + * by scanning the atoms list and applying cutoffs */ +CUDA_GLOBAL void k_generate_neighbor_lists_full( reax_atom *my_atoms, simulation_box my_ext_box, grid g, reax_list far_nbr_list, int n, int N, int *far_nbrs, int *max_far_nbrs, int *realloc_far_nbrs ) { @@ -113,15 +114,16 @@ CUDA_GLOBAL void k_generate_neighbor_lists( reax_atom *my_atoms, ivec_Copy( nbrs_x, g.nbrs_x[index_grid_nbrs(i, j, k, itr, &g)] ); /* if neighboring grid cell is further in the "positive" direction AND within cutoff */ - if ( g.str[index_grid_3d(i, j, k, &g)] <= g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)] - && Cuda_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs(i, j, k, itr, &g)], atom1->x) <= cutoff ) + if ( //g.str[index_grid_3d(i, j, k, &g)] <= g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)] && + Cuda_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs(i, j, k, itr, &g)], atom1->x) <= cutoff ) { /* pick up another atom from the neighbor cell */ for ( m = g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; m < g.end[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; ++m ) { /* prevent recounting same pairs within a gcell */ - if ( l < m ) +// if ( l < m ) + if ( l != m ) { atom2 = &my_atoms[m]; dvec[0] = atom2->x[0] - atom1->x[0]; @@ -148,53 +150,11 @@ CUDA_GLOBAL void k_generate_neighbor_lists( reax_atom *my_atoms, ++itr; } - /* scan neighboring grid cells within cutoff */ - itr = 0; - while ( g.nbrs_x[index_grid_nbrs(i, j, k, itr, &g)][0] >= 0 ) - { - ivec_Copy( nbrs_x, g.nbrs_x[index_grid_nbrs(i, j, k, itr, &g)] ); - cutoff = SQR( g.cutoff[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)] ); - - /* if neighboring grid cell is further in the "negative" direction AND within cutoff */ - if ( g.str[index_grid_3d(i, j, k, &g)] >= g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)] && - Cuda_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs(i, j, k, itr, &g)], atom1->x) <= cutoff ) - { - /* pick up another atom from the neighbor cell */ - for ( m = g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; - m < g.end[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; ++m ) - { - /* prevent recounting same pairs within a gcell */ - if ( l > m ) - { - atom2 = &my_atoms[m]; - dvec[0] = atom1->x[0] - atom2->x[0]; - dvec[1] = atom1->x[1] - atom2->x[1]; - dvec[2] = atom1->x[2] - atom2->x[2]; - d = rvec_Norm_Sqr( dvec ); - - if ( d <= cutoff ) - { - /* commit far neighbor to list */ - far_nbr_list.far_nbr_list.nbr[num_far] = m; - far_nbr_list.far_nbr_list.d[num_far] = SQRT( d ); - rvec_Copy( far_nbr_list.far_nbr_list.dvec[num_far], dvec ); - ivec_ScaledSum( far_nbr_list.far_nbr_list.rel_box[num_far], - 1, g.rel_box[ index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g) ], - -1, g.rel_box[ index_grid_3d(i, j, k, &g) ] ); - - ++num_far; - } - } - } - } - - ++itr; - } - Set_End_Index( l, num_far, &far_nbr_list ); - /* reallocation check */ my_num_far = num_far - Start_Index( l, &far_nbr_list ); + + /* reallocation check */ if ( my_num_far > max_far_nbrs[l] ) { *realloc_far_nbrs = TRUE; @@ -424,8 +384,8 @@ CUDA_GLOBAL void k_mt_generate_neighbor_lists( reax_atom *my_atoms, } -/* Estimate the number of far neighbors per atom (GPU) */ -CUDA_GLOBAL void k_estimate_neighbors( reax_atom *my_atoms, +/* Estimate the number of far neighbors in full format per atom */ +CUDA_GLOBAL void k_estimate_neighbors_full( reax_atom *my_atoms, simulation_box my_ext_box, grid g, int n, int N, int total_cap, int *far_nbrs, int *max_far_nbrs ) { @@ -500,7 +460,8 @@ CUDA_GLOBAL void k_estimate_neighbors( reax_atom *my_atoms, m < g.end[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; ++m ) { /* prevent recounting same pairs within a gcell */ - if ( l < m ) +// if ( l < m ) + if ( l != m ) { atom2 = &my_atoms[m]; dvec[0] = atom2->x[0] - atom1->x[0]; @@ -518,38 +479,6 @@ CUDA_GLOBAL void k_estimate_neighbors( reax_atom *my_atoms, ++itr; } - - itr = 0; - while ( g.nbrs_x[index_grid_nbrs(i, j, k, itr, &g)][0] >= 0 ) - { - ivec_Copy( nbrs_x, g.nbrs_x[index_grid_nbrs(i, j, k, itr, &g)] ); - cutoff = SQR( g.cutoff[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)] ); - - if ( g.str[index_grid_3d(i, j, k, &g)] >= g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)] && - Cuda_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs(i, j, k, itr, &g)],atom1->x) <= cutoff ) - { - /* pick up another atom from the neighbor cell */ - for ( m = g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; - m < g.end[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; ++m ) - { - /* prevent recounting same pairs within a gcell */ - if ( l > m ) - { - atom2 = &my_atoms[m]; - dvec[0] = atom2->x[0] - atom1->x[0]; - dvec[1] = atom2->x[1] - atom1->x[1]; - dvec[2] = atom2->x[2] - atom1->x[2]; - d = rvec_Norm_Sqr( dvec ); - - if ( d <= cutoff ) - { - num_far++; - } - } - } - } - ++itr; - } } else { @@ -574,6 +503,8 @@ extern "C" int Cuda_Generate_Neighbor_Lists( reax_system *system, { cudaEventCreate( &time_event[i] ); } + + cudaEventRecord( time_event[0] ); #endif /* reset reallocation flag on device */ @@ -586,11 +517,7 @@ extern "C" int Cuda_Generate_Neighbor_Lists( reax_system *system, blocks = (system->N / NBRS_BLOCK_SIZE) + ((system->N % NBRS_BLOCK_SIZE) == 0 ? 0 : 1); -#if defined(LOG_PERFORMANCE) - cudaEventRecord( time_event[0] ); -#endif - - k_generate_neighbor_lists <<< blocks, NBRS_BLOCK_SIZE >>> + k_generate_neighbor_lists_full <<< blocks, NBRS_BLOCK_SIZE >>> ( system->d_my_atoms, system->my_ext_box, system->d_my_grid, *(lists[FAR_NBRS]), system->n, system->N, @@ -607,10 +534,6 @@ extern "C" int Cuda_Generate_Neighbor_Lists( reax_system *system, // *(lists[FAR_NBRS]), system->n, system->N ); // cudaCheckError( ); -#if defined(LOG_PERFORMANCE) - cudaEventRecord( time_event[1] ); -#endif - /* check reallocation flag on device */ copy_host_device( &ret_far_nbr, system->d_realloc_far_nbrs, sizeof(int), cudaMemcpyDeviceToHost, "Cuda_Generate_Neighbor_Lists::d_realloc_far_nbrs" ); @@ -619,6 +542,8 @@ extern "C" int Cuda_Generate_Neighbor_Lists( reax_system *system, workspace->d_workspace->realloc.far_nbrs = ret_far_nbr; #if defined(LOG_PERFORMANCE) + cudaEventRecord( time_event[1] ); + if ( cudaEventQuery( time_event[0] ) != cudaSuccess ) { cudaEventSynchronize( time_event[0] ); @@ -644,15 +569,21 @@ void Cuda_Estimate_Num_Neighbors( reax_system *system, simulation_data *data ) { int blocks; #if defined(LOG_PERFORMANCE) - double time; + float time_elapsed; + cudaEvent_t time_event[2]; - time = Get_Time( ); + for ( int i = 0; i < 2; ++i ) + { + cudaEventCreate( &time_event[i] ); + } + + cudaEventRecord( time_event[0] ); #endif blocks = system->total_cap / DEF_BLOCK_SIZE + (system->total_cap % DEF_BLOCK_SIZE == 0 ? 0 : 1); - k_estimate_neighbors <<< blocks, DEF_BLOCK_SIZE >>> + k_estimate_neighbors_full <<< blocks, DEF_BLOCK_SIZE >>> ( system->d_my_atoms, system->my_ext_box, system->d_my_grid, system->n, system->N, system->total_cap, system->d_far_nbrs, system->d_max_far_nbrs ); @@ -664,6 +595,19 @@ void Cuda_Estimate_Num_Neighbors( reax_system *system, simulation_data *data ) cudaMemcpyDeviceToHost, "Cuda_Estimate_Neighbors::d_total_far_nbrs" ); #if defined(LOG_PERFORMANCE) - Update_Timing_Info( &time, &data->timing.nbrs ); + cudaEventRecord( time_event[1] ); + + if ( cudaEventQuery( time_event[0] ) != cudaSuccess ) + { + cudaEventSynchronize( time_event[0] ); + } + + if ( cudaEventQuery( time_event[1] ) != cudaSuccess ) + { + cudaEventSynchronize( time_event[1] ); + } + + cudaEventElapsedTime( &time_elapsed, time_event[0], time_event[1] ); + data->timing.nbrs += (real) (time_elapsed / 1000.0); #endif } diff --git a/PG-PuReMD/src/cuda/cuda_nonbonded.cu b/PG-PuReMD/src/cuda/cuda_nonbonded.cu index 15e56ea8..c619f3eb 100644 --- a/PG-PuReMD/src/cuda/cuda_nonbonded.cu +++ b/PG-PuReMD/src/cuda/cuda_nonbonded.cu @@ -59,8 +59,11 @@ CUDA_GLOBAL void k_compute_polarization_energy( reax_atom *my_atoms, } -/* one thread per atom implementation */ -CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms, +/* Compute energies and forces due to van der Waals and Coulomb interactions + * where the far neighbors list is in full format + * + * This implementation assigns one thread per atom */ +CUDA_GLOBAL void k_vdW_coulomb_energy_full( reax_atom *my_atoms, two_body_parameters *tbp, global_parameters gp, control_params *control, storage workspace, reax_list far_nbr_list, int n, int num_atom_types, real *e_vdW_g, real *e_ele_g ) @@ -99,7 +102,6 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms, j = far_nbr_list.far_nbr_list.nbr[pj]; orig_j = my_atoms[j].orig_id; - //TODO: assuming far_nbr_list in FULL_LIST, add conditions for HALF_LIST if ( far_nbr_list.far_nbr_list.d[pj] <= control->nonb_cut && orig_i < orig_j ) { @@ -187,14 +189,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms, * (r_ij * r_ij) / POW( dr3gamij_1, 4.0 / 3.0 ); CEclmb = self_coef * (de_clb * Tap + e_clb * dTap); - if ( i < j ) - { - rvec_Scale( temp, -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } - else - { - rvec_Scale( temp, (CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } + rvec_Scale( temp, -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); rvec_Add( f_i_l, temp ); rvec_Scale( temp, -1.0, temp ); atomic_rvecAdd( workspace.f[j], temp ); @@ -212,8 +207,11 @@ CUDA_GLOBAL void k_vdW_coulomb_energy( reax_atom *my_atoms, } -/* one thread per atom implementation */ -CUDA_GLOBAL void k_vdW_coulomb_virial_energy( reax_atom *my_atoms, +/* Compute virial terms, energies, and forces due to van der Waals and Coulomb interactions + * where the far neighbors list is in full format + * + * This implementation assigns one thread per atom */ +CUDA_GLOBAL void k_vdW_coulomb_energy_virial_full( reax_atom *my_atoms, two_body_parameters *tbp, global_parameters gp, control_params *control, storage workspace, reax_list far_nbr_list, int n, int num_atom_types, real *e_vdW_g, real *e_ele_g, rvec *ext_press_g ) @@ -253,7 +251,6 @@ CUDA_GLOBAL void k_vdW_coulomb_virial_energy( reax_atom *my_atoms, j = far_nbr_list.far_nbr_list.nbr[pj]; orig_j = my_atoms[j].orig_id; - //TODO: assuming far_nbr_list in FULL_LIST, add conditions for HALF_LIST if ( far_nbr_list.far_nbr_list.d[pj] <= control->nonb_cut && orig_i < orig_j ) { @@ -343,16 +340,8 @@ CUDA_GLOBAL void k_vdW_coulomb_virial_energy( reax_atom *my_atoms, /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ - if ( i < j ) - { - rvec_Scale( temp, - -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } - else - { - rvec_Scale( temp, - (CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } + rvec_Scale( temp, + -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); rvec_Add( f_i_l, temp ); rvec_Scale( temp, -1.0, temp ); atomic_rvecAdd( workspace.f[j], temp ); @@ -376,8 +365,11 @@ CUDA_GLOBAL void k_vdW_coulomb_virial_energy( reax_atom *my_atoms, } -/* one warp of threads per atom implementation */ -CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms, +/* Compute energies and forces due to van der Waals and Coulomb interactions + * where the far neighbors list is in full format + * + * This implementation assigns one warp of threads per atom */ +CUDA_GLOBAL void k_vdW_coulomb_energy_full_opt( reax_atom *my_atoms, two_body_parameters *tbp, global_parameters gp, control_params *control, storage workspace, reax_list far_nbr_list, int n, int num_atom_types, real *e_vdW_g, real *e_ele_g ) @@ -423,7 +415,6 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms, j = far_nbr_list.far_nbr_list.nbr[pj]; orig_j = my_atoms[j].orig_id; - //TODO: assuming far_nbr_list in FULL_LIST, add conditions for HALF_LIST if ( far_nbr_list.far_nbr_list.d[pj] <= control->nonb_cut && orig_i < orig_j ) { @@ -511,14 +502,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms, * (r_ij * r_ij) / POW( dr3gamij_1, 4.0 / 3.0 ); CEclmb = self_coef * (de_clb * Tap + e_clb * dTap); - if ( i < j ) - { - rvec_Scale( temp, -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } - else - { - rvec_Scale( temp, (CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } + rvec_Scale( temp, -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); rvec_Add( f_i_l, temp ); rvec_Scale( temp, -1.0, temp ); atomic_rvecAdd( workspace.f[j], temp ); @@ -548,8 +532,11 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_opt( reax_atom *my_atoms, } -/* one warp of threads per atom implementation */ -CUDA_GLOBAL void k_vdW_coulomb_energy_virial_opt( reax_atom *my_atoms, +/* Compute virial terms, energies, and forces due to van der Waals and Coulomb interactions + * where the far neighbors list is in full format + * + * This implementation assigns one warp of threads per atom */ +CUDA_GLOBAL void k_vdW_coulomb_energy_virial_full_opt( reax_atom *my_atoms, two_body_parameters *tbp, global_parameters gp, control_params *control, storage workspace, reax_list far_nbr_list, int n, int num_atom_types, real *e_vdW_g, real *e_ele_g, rvec *ext_press_g ) @@ -596,7 +583,6 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_virial_opt( reax_atom *my_atoms, j = far_nbr_list.far_nbr_list.nbr[pj]; orig_j = my_atoms[j].orig_id; - //TODO: assuming far_nbr_list in FULL_LIST, add conditions for HALF_LIST if ( far_nbr_list.far_nbr_list.d[pj] <= control->nonb_cut && orig_i < orig_j ) { @@ -686,16 +672,8 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_virial_opt( reax_atom *my_atoms, /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ - if ( i < j ) - { - rvec_Scale( temp, - -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } - else - { - rvec_Scale( temp, - (CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } + rvec_Scale( temp, + -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); rvec_Add( f_i_l, temp ); rvec_Scale( temp, -1.0, temp ); atomic_rvecAdd( workspace.f[j], temp ); @@ -732,7 +710,7 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_virial_opt( reax_atom *my_atoms, /* one thread per atom implementation */ -CUDA_GLOBAL void k_vdW_coulomb_energy_tab( reax_atom *my_atoms, +CUDA_GLOBAL void k_vdW_coulomb_energy_tab_full( reax_atom *my_atoms, global_parameters gp, control_params *control, storage workspace, reax_list far_nbr_list, LR_lookup_table *t_LR, int n, int num_atom_types, @@ -773,7 +751,6 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_tab( reax_atom *my_atoms, j = far_nbr_list.far_nbr_list.nbr[pj]; orig_j = my_atoms[j].orig_id; - //TODO: assuming far_nbr_list in FULL_LIST, add conditions for HALF_LIST if ( far_nbr_list.far_nbr_list.d[pj] <= control->nonb_cut && orig_i < orig_j ) { @@ -812,16 +789,8 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_tab( reax_atom *my_atoms, if ( control->virial == 0 ) { - if ( i < j ) - { - rvec_ScaledAdd( temp, - -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } - else - { - rvec_ScaledAdd( temp, - (CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } + rvec_ScaledAdd( temp, + -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); rvec_Add( f_i_l, temp ); rvec_Scale( temp, -1.0, temp ); atomic_rvecAdd( workspace.f[j], temp ); @@ -831,16 +800,8 @@ CUDA_GLOBAL void k_vdW_coulomb_energy_tab( reax_atom *my_atoms, { /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ - if ( i < j ) - { - rvec_Scale( temp, - -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } - else - { - rvec_Scale( temp, - (CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); - } + rvec_Scale( temp, + -(CEvd + CEclmb) / r_ij, far_nbr_list.far_nbr_list.dvec[pj] ); rvec_Add( f_i_l, temp ); rvec_ScaledAdd( temp, -1.0, temp ); atomic_rvecAdd( workspace.f[j], temp ); @@ -948,7 +909,7 @@ void Cuda_Compute_NonBonded_Forces( reax_system *system, control_params *control { if ( control->virial == 1 ) { -// k_vdW_coulomb_energy_virial <<< control->blocks, control->block_size >>> +// k_vdW_coulomb_energy_virial_full <<< control->blocks, control->block_size >>> // ( system->d_my_atoms, system->reax_param.d_tbp, // system->reax_param.d_gp, (control_params *) control->d_control_params, // *(workspace->d_workspace), *(lists[FAR_NBRS]), @@ -962,7 +923,7 @@ void Cuda_Compute_NonBonded_Forces( reax_system *system, control_params *control //#endif // ); - k_vdW_coulomb_energy_virial_opt <<< blocks, DEF_BLOCK_SIZE, + k_vdW_coulomb_energy_virial_full_opt <<< blocks, DEF_BLOCK_SIZE, sizeof(real) * (DEF_BLOCK_SIZE / 32) >>> ( system->d_my_atoms, system->reax_param.d_tbp, system->reax_param.d_gp, (control_params *) control->d_control_params, @@ -979,7 +940,7 @@ void Cuda_Compute_NonBonded_Forces( reax_system *system, control_params *control } else { -// k_vdW_coulomb_energy <<< control->blocks, control->block_size >>> +// k_vdW_coulomb_energy_full <<< control->blocks, control->block_size >>> // ( system->d_my_atoms, system->reax_param.d_tbp, // system->reax_param.d_gp, (control_params *) control->d_control_params, // *(workspace->d_workspace), *(lists[FAR_NBRS]), @@ -992,7 +953,7 @@ void Cuda_Compute_NonBonded_Forces( reax_system *system, control_params *control //#endif // ); - k_vdW_coulomb_energy_opt <<< blocks, DEF_BLOCK_SIZE, + k_vdW_coulomb_energy_full_opt <<< blocks, DEF_BLOCK_SIZE, sizeof(real) * (DEF_BLOCK_SIZE / 32) >>> ( system->d_my_atoms, system->reax_param.d_tbp, system->reax_param.d_gp, (control_params *) control->d_control_params, @@ -1010,7 +971,7 @@ void Cuda_Compute_NonBonded_Forces( reax_system *system, control_params *control } else { - k_vdW_coulomb_energy_tab <<< control->blocks, control->block_size >>> + k_vdW_coulomb_energy_tab_full <<< control->blocks, control->block_size >>> ( system->d_my_atoms, system->reax_param.d_gp, (control_params *) control->d_control_params, *(workspace->d_workspace), *(lists[FAR_NBRS]), diff --git a/PG-PuReMD/src/cuda/cuda_reset_tools.cu b/PG-PuReMD/src/cuda/cuda_reset_tools.cu index 99d55dd0..532d643c 100644 --- a/PG-PuReMD/src/cuda/cuda_reset_tools.cu +++ b/PG-PuReMD/src/cuda/cuda_reset_tools.cu @@ -20,9 +20,7 @@ CUDA_GLOBAL void k_reset_workspace( storage workspace, int N ) return; } - workspace.total_bond_order[i] = 0.0; workspace.CdDelta[i] = 0.0; - rvec_MakeZero( workspace.dDeltap_self[i] ); rvec_MakeZero( workspace.f[i] ); } @@ -39,18 +37,22 @@ CUDA_GLOBAL void k_reset_hindex( reax_atom *my_atoms, single_body_parameters *sb return; } + my_atoms[i].Hindex = i; + if ( sbp[ my_atoms[i].type ].p_hbond == H_ATOM || sbp[ my_atoms[i].type ].p_hbond == H_BONDING_ATOM ) { +#if !defined(CUDA_ACCUM_ATOMIC) hindex[i] = 1; } else { hindex[i] = 0; } - -// my_atoms[i].Hindex = hindex[i]; - my_atoms[i].Hindex = i; +#else + atomicAdd( hindex, 1 ); + } +#endif } void Cuda_Reset_Workspace( reax_system *system, storage *workspace ) @@ -69,18 +71,28 @@ void Cuda_Reset_Workspace( reax_system *system, storage *workspace ) void Cuda_Reset_Atoms_HBond_Indices( reax_system* system, control_params *control, storage *workspace ) { +#if !defined(CUDA_ACCUM_ATOMIC) int *hindex; cuda_check_malloc( &workspace->scratch, &workspace->scratch_size, - sizeof(int) * system->N, + sizeof(int) * system->total_cap, "Cuda_Reset_Atoms_HBond_Indices::workspace->scratch" ); hindex = (int *) workspace->scratch; +#endif k_reset_hindex <<< control->blocks_n, control->block_size_n >>> - ( system->d_my_atoms, system->reax_param.d_sbp, hindex, system->N ); + ( system->d_my_atoms, system->reax_param.d_sbp, +#if !defined(CUDA_ACCUM_ATOMIC) + hindex, +#else + system->d_numH, +#endif + system->total_cap ); cudaCheckError( ); +#if !defined(CUDA_ACCUM_ATOMIC) Cuda_Reduction_Sum( hindex, system->d_numH, system->N ); +#endif copy_host_device( &system->numH, system->d_numH, sizeof(int), cudaMemcpyDeviceToHost, "Cuda_Reset_Atoms_HBond_Indices::d_numH" ); diff --git a/PG-PuReMD/src/cuda/cuda_valence_angles.cu b/PG-PuReMD/src/cuda/cuda_valence_angles.cu index e31dc56d..fca9236d 100644 --- a/PG-PuReMD/src/cuda/cuda_valence_angles.cu +++ b/PG-PuReMD/src/cuda/cuda_valence_angles.cu @@ -43,7 +43,6 @@ CUDA_GLOBAL void k_valence_angles_part1( reax_atom *my_atoms, int i, j, pi, k, pk, t; int type_i, type_j, type_k; int start_j, end_j; -// int start_pk, end_pk; int cnt, num_thb_intrs; real temp, temp_bo_jt, pBOjt7; real p_val1, p_val2, p_val3, p_val4, p_val5; @@ -65,7 +64,6 @@ CUDA_GLOBAL void k_valence_angles_part1( reax_atom *my_atoms, three_body_header *thbh; three_body_parameters *thbp; three_body_interaction_data *p_ijk; -// three_body_interaction_data *p_kji; bond_data *pbond_ij, *pbond_jk, *pbond_jt; bond_order_data *bo_ij, *bo_jk, *bo_jt; @@ -86,7 +84,6 @@ CUDA_GLOBAL void k_valence_angles_part1( reax_atom *my_atoms, p_val8 = gp.l[33]; p_val9 = gp.l[16]; p_val10 = gp.l[17]; - //num_thb_intrs = j * THREE_BODY_OFFSET; e_ang_l = 0.0; e_coa_l = 0.0; e_pen_l = 0.0; @@ -165,41 +162,7 @@ CUDA_GLOBAL void k_valence_angles_part1( reax_atom *my_atoms, i = pbond_ij->nbr; type_i = my_atoms[i].type; - /* first copy 3-body intrs from previously computed ones where i > k. - * IMPORTANT: if it is less costly to compute theta and its - * derivative, we should definitely re-compute them, - * instead of copying! - * in the second for-loop below, we compute only new 3-body intrs - * where i < k */ - // The copy loop commented out because strange asynchronous issues started to surface - // Each kernel now manually generates everything -// for( pk = start_j; pk < pi; ++pk ) -// { -// start_pk = Start_Index( pk, &thb_list ); -// end_pk = End_Index( pk, &thb_list ); -// -// for( t = start_pk; t < end_pk; ++t ) -// { -// if( thb_list.three_body_list[t].thb == i ) -// { -// p_ijk = &thb_list.three_body_list[num_thb_intrs]; -// p_kji = &thb_list.three_body_list[t]; -// -// p_ijk->thb = bond_list.bond_list[pk].nbr; -// p_ijk->pthb = pk; -// p_ijk->theta = p_kji->theta; -// rvec_Copy( p_ijk->dcos_di, p_kji->dcos_dk ); -// rvec_Copy( p_ijk->dcos_dj, p_kji->dcos_dj ); -// rvec_Copy( p_ijk->dcos_dk, p_kji->dcos_di ); -// -// ++num_thb_intrs; -// break; -// } -// } -// } - - /* and this is the second for loop mentioned above */ - // Except that now the loop goes all the way from start_j to end_j + /* compute _ALL_ 3-body intrs */ for ( pk = start_j; pk < end_j; ++pk ) { if ( pk == pi ) @@ -332,11 +295,9 @@ CUDA_GLOBAL void k_valence_angles_part1( reax_atom *my_atoms, - (2.0 + exp_pen3) * ( -p_pen3 * exp_pen3 + p_pen4 * exp_pen4 )) / SQR( trm_pen34 ); - /* very important: since each kernel generates all interactions, - * need to prevent all energies becoming duplicates */ + e_pen = p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; if ( pk < pi ) { - e_pen = p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; e_pen_l += e_pen; } @@ -355,7 +316,7 @@ CUDA_GLOBAL void k_valence_angles_part1( reax_atom *my_atoms, * EXP( -p_coa3 * SQR(workspace.total_bond_order[i] - BOA_ij) ) * EXP( -p_coa3 * SQR(workspace.total_bond_order[k] - BOA_jk) ) / (1.0 + exp_coa2); - /* similar to above comment regarding if statement */ + if ( pk < pi ) { e_coa_l += e_coa; @@ -368,7 +329,6 @@ CUDA_GLOBAL void k_valence_angles_part1( reax_atom *my_atoms, CEcoa5 = -2.0 * p_coa3 * (workspace.total_bond_order[k] - BOA_jk) * e_coa; /* calculate force contributions */ - /* we must again check for pk < pi for entire forces part */ if ( pk < pi ) { #if !defined(CUDA_ACCUM_ATOMIC) @@ -446,7 +406,6 @@ CUDA_GLOBAL void k_valence_angles_virial_part1( reax_atom *my_atoms, int i, j, pi, k, pk, t; int type_i, type_j, type_k; int start_j, end_j; -// int start_pk, end_pk; int cnt, num_thb_intrs; real temp, temp_bo_jt, pBOjt7; real p_val1, p_val2, p_val3, p_val4, p_val5; @@ -468,7 +427,6 @@ CUDA_GLOBAL void k_valence_angles_virial_part1( reax_atom *my_atoms, three_body_header *thbh; three_body_parameters *thbp; three_body_interaction_data *p_ijk; -// three_body_interaction_data *p_kji; bond_data *pbond_ij, *pbond_jk, *pbond_jt; bond_order_data *bo_ij, *bo_jk, *bo_jt; @@ -489,7 +447,6 @@ CUDA_GLOBAL void k_valence_angles_virial_part1( reax_atom *my_atoms, p_val8 = gp.l[33]; p_val9 = gp.l[16]; p_val10 = gp.l[17]; - //num_thb_intrs = j * THREE_BODY_OFFSET; e_ang_l = 0.0; e_coa_l = 0.0; e_pen_l = 0.0; @@ -569,41 +526,7 @@ CUDA_GLOBAL void k_valence_angles_virial_part1( reax_atom *my_atoms, i = pbond_ij->nbr; type_i = my_atoms[i].type; - /* first copy 3-body intrs from previously computed ones where i > k. - * IMPORTANT: if it is less costly to compute theta and its - * derivative, we should definitely re-compute them, - * instead of copying! - * in the second for-loop below, we compute only new 3-body intrs - * where i < k */ - // The copy loop commented out because strange asynchronous issues started to surface - // Each kernel now manually generates everything -// for( pk = start_j; pk < pi; ++pk ) -// { -// start_pk = Start_Index( pk, &thb_list ); -// end_pk = End_Index( pk, &thb_list ); -// -// for( t = start_pk; t < end_pk; ++t ) -// { -// if( thb_list.three_body_list[t].thb == i ) -// { -// p_ijk = &thb_list.three_body_list[num_thb_intrs]; -// p_kji = &thb_list.three_body_list[t]; -// -// p_ijk->thb = bond_list.bond_list[pk].nbr; -// p_ijk->pthb = pk; -// p_ijk->theta = p_kji->theta; -// rvec_Copy( p_ijk->dcos_di, p_kji->dcos_dk ); -// rvec_Copy( p_ijk->dcos_dj, p_kji->dcos_dj ); -// rvec_Copy( p_ijk->dcos_dk, p_kji->dcos_di ); -// -// ++num_thb_intrs; -// break; -// } -// } -// } - - /* and this is the second for loop mentioned above */ - // Except that now the loop goes all the way from start_j to end_j + /* compute _ALL_ 3-body intrs */ for ( pk = start_j; pk < end_j; ++pk ) { if ( pk == pi ) @@ -736,11 +659,9 @@ CUDA_GLOBAL void k_valence_angles_virial_part1( reax_atom *my_atoms, - (2.0 + exp_pen3) * ( -p_pen3 * exp_pen3 + p_pen4 * exp_pen4 )) / SQR( trm_pen34 ); - /* very important: since each kernel generates all interactions, - * need to prevent all energies becoming duplicates */ + e_pen = p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; if ( pk < pi ) { - e_pen = p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; e_pen_l += e_pen; } @@ -759,7 +680,7 @@ CUDA_GLOBAL void k_valence_angles_virial_part1( reax_atom *my_atoms, * EXP( -p_coa3 * SQR(workspace.total_bond_order[i] - BOA_ij) ) * EXP( -p_coa3 * SQR(workspace.total_bond_order[k] - BOA_jk) ) / (1.0 + exp_coa2); - /* similar to above comment regarding if statement */ + if ( pk < pi ) { e_coa_l += e_coa; @@ -772,7 +693,6 @@ CUDA_GLOBAL void k_valence_angles_virial_part1( reax_atom *my_atoms, CEcoa5 = -2.0 * p_coa3 * (workspace.total_bond_order[k] - BOA_jk) * e_coa; /* calculate force contributions */ - /* we must again check for pk < pi for entire forces part */ if ( pk < pi ) { #if !defined(CUDA_ACCUM_ATOMIC) diff --git a/PG-PuReMD/src/forces.c b/PG-PuReMD/src/forces.c index 29e220c7..bca63ec9 100644 --- a/PG-PuReMD/src/forces.c +++ b/PG-PuReMD/src/forces.c @@ -27,6 +27,7 @@ #if defined(PURE_REAX) #include "forces.h" + #include "allocate.h" #include "bond_orders.h" #include "bonds.h" #include "basic_comm.h" @@ -45,6 +46,7 @@ #elif defined(LAMMPS_REAX) #include "reax_forces.h" + #include "reax_allocate.h" #include "reax_bond_orders.h" #include "reax_bonds.h" #include "reax_basic_comm.h" @@ -435,32 +437,27 @@ static void Init_Distance( reax_system *system, control_params *control, { int i, j, pj; int start_i, end_i; - int renbr; reax_list *far_nbr_list; reax_atom *atom_i, *atom_j; far_nbr_list = lists[FAR_NBRS]; - renbr = (data->step - data->prev_steps) % control->reneighbor == 0 ? TRUE : FALSE; - if ( renbr == FALSE ) + for ( i = 0; i < system->N; ++i ) { - for ( i = 0; i < system->N; ++i ) - { - atom_i = &system->my_atoms[i]; - start_i = Start_Index( i, far_nbr_list ); - end_i = End_Index( i, far_nbr_list ); + atom_i = &system->my_atoms[i]; + start_i = Start_Index( i, far_nbr_list ); + end_i = End_Index( i, far_nbr_list ); - /* update distance and displacement vector between atoms i and j (i-j) */ - for ( pj = start_i; pj < end_i; ++pj ) - { - j = far_nbr_list->far_nbr_list.nbr[pj]; - atom_j = &system->my_atoms[j]; - - far_nbr_list->far_nbr_list.dvec[pj][0] = atom_j->x[0] - atom_i->x[0]; - far_nbr_list->far_nbr_list.dvec[pj][1] = atom_j->x[1] - atom_i->x[1]; - far_nbr_list->far_nbr_list.dvec[pj][2] = atom_j->x[2] - atom_i->x[2]; - far_nbr_list->far_nbr_list.d[pj] = rvec_Norm( far_nbr_list->far_nbr_list.dvec[pj] ); - } + /* update distance and displacement vector between atoms i and j (i-j) */ + for ( pj = start_i; pj < end_i; ++pj ) + { + j = far_nbr_list->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; + + far_nbr_list->far_nbr_list.dvec[pj][0] = atom_j->x[0] - atom_i->x[0]; + far_nbr_list->far_nbr_list.dvec[pj][1] = atom_j->x[1] - atom_i->x[1]; + far_nbr_list->far_nbr_list.dvec[pj][2] = atom_j->x[2] - atom_i->x[2]; + far_nbr_list->far_nbr_list.d[pj] = rvec_Norm( far_nbr_list->far_nbr_list.dvec[pj] ); } } } @@ -672,12 +669,13 @@ static void Init_CM_Half_NT( reax_system *system, control_params *control, } } - /* reallocation checks */ + /* reallocation check */ for ( i = 0; i < system->N; ++i ) { if ( i < system->n && H->end[i] - H->start[i] > system->max_cm_entries[i] ) { workspace->realloc.cm = TRUE; + break; } } } @@ -887,12 +885,13 @@ static void Init_CM_Full_NT( reax_system *system, control_params *control, } } - /* reallocation checks */ + /* reallocation check */ for ( i = 0; i < system->N; ++i ) { if ( i < system->n && H->end[i] - H->start[i] > system->max_cm_entries[i] ) { workspace->realloc.cm = TRUE; + break; } } } @@ -967,12 +966,13 @@ static void Init_CM_Half_FS( reax_system *system, control_params *control, H->end[i] = cm_top; } - /* reallocation checks */ + /* reallocation check */ for ( i = 0; i < system->n; ++i ) { if ( H->end[i] - H->start[i] > system->max_cm_entries[i] ) { workspace->realloc.cm = TRUE; + break; } } } @@ -1038,12 +1038,13 @@ static void Init_CM_Full_FS( reax_system *system, control_params *control, H->end[i] = cm_top; } - /* reallocation checks */ + /* reallocation check */ for ( i = 0; i < system->n; ++i ) { if ( H->end[i] - H->start[i] > system->max_cm_entries[i] ) { workspace->realloc.cm = TRUE; + break; } } } @@ -1210,14 +1211,18 @@ static void Init_Bond_Half( reax_system *system, control_params *control, if ( Num_Entries( i, bond_list ) > system->max_bonds[i] ) { workspace->realloc.bonds = TRUE; + break; } + } - if ( i < system->n - && system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == H_ATOM + for ( i = 0; i < system->n; ++i ) + { + if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == H_ATOM && Num_Entries( system->my_atoms[i].Hindex, hbond_list ) - > system->max_hbonds[system->my_atoms[i].Hindex] ) + > system->max_hbonds[system->my_atoms[i].Hindex] ) { workspace->realloc.hbonds = TRUE; + break; } } } @@ -1387,14 +1392,18 @@ static void Init_Bond_Full( reax_system *system, control_params *control, if ( Num_Entries( i, bond_list ) > system->max_bonds[i] ) { workspace->realloc.bonds = TRUE; + break; } + } - if ( i < system->n - && system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == H_ATOM + for ( i = 0; i < system->n; ++i ) + { + if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == H_ATOM && Num_Entries( system->my_atoms[i].Hindex, hbond_list ) > system->max_hbonds[system->my_atoms[i].Hindex] ) { workspace->realloc.hbonds = TRUE; + break; } } @@ -1449,398 +1458,180 @@ static void Compute_NonBonded_Forces( reax_system * const system, } -/* this version of Compute_Total_Force computes forces from - * coefficients accumulated by all interaction functions. - * Saves enormous time & space! */ -static void Compute_Total_Force( reax_system * const system, - control_params * const control, - simulation_data * const data, storage * const workspace, reax_list ** const lists, - mpi_datatypes * const mpi_data ) + +static void Estimate_Storages_CM( reax_system * const system, control_params * const control, + reax_list ** const lists, int * const matrix_dim, int cm_format ) { - int i, pj; - reax_list * const bond_list = lists[BONDS]; + int i, j, pj; + int start_i, end_i; + int local; + real cutoff; + reax_list *far_nbr_list; + reax_atom *atom_i, *atom_j; +#if defined(NEUTRAL_TERRITORY) + int mark[6] = {1, 1, 2, 2, 2, 2}; +#endif - for ( i = 0; i < system->N; ++i ) + far_nbr_list = lists[FAR_NBRS]; + *matrix_dim = 0; + + for ( i = 0; i < system->total_cap; ++i ) { - for ( pj = Start_Index(i, bond_list); pj < End_Index(i, bond_list); ++pj ) + if ( i < system->local_cap ) { - if ( i < bond_list->bond_list[pj].nbr ) - { - if ( control->virial == 0 ) - { - Add_dBond_to_Forces( i, pj, system, data, workspace, lists ); - } - else - { - Add_dBond_to_Forces_NPT( i, pj, system, data, workspace, lists ); - } - } + system->cm_entries[i] = 0; } } -#if defined(PURE_REAX) - /* now all forces are computed to their partially-final values - * based on the neighbors information each processor has had. - * final values of force on each atom needs to be computed by adding up - * all partially-final pieces */ - Coll_FS( system, mpi_data, workspace->f, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - for ( i = 0; i < system->n; ++i ) + for ( i = 0; i < system->N; ++i ) { - rvec_Copy( system->my_atoms[i].f, workspace->f[i] ); - } - -#if defined(TEST_FORCES) - Coll_FS( system, mpi_data, workspace->f_ele, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_vdw, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_be, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_lp, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_ov, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_un, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_ang, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_coa, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_pen, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_hb, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_tor, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - Coll_FS( system, mpi_data, workspace->f_con, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); -#endif + atom_i = &system->my_atoms[i]; + start_i = Start_Index( i, far_nbr_list ); + end_i = End_Index( i, far_nbr_list ); + if ( i < system->n ) + { + local = 1; + cutoff = control->nonb_cut; + ++system->cm_entries[i]; + ++(*matrix_dim); + } +#if defined(NEUTRAL_TERRITORY) + else if ( atom_i->nt_dir != -1 ) + { + local = 2; + cutoff = control->nonb_cut; + ++(*matrix_dim); + } #endif -} + else + { + local = 0; + cutoff = control->bond_cut; + } + for ( pj = start_i; pj < end_i; ++pj ) + { + j = far_nbr_list->far_nbr_list.nbr[pj]; -static int Init_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, reax_list **lists, - output_controls *out_control, mpi_datatypes *mpi_data ) -{ -#if defined(LOG_PERFORMANCE) - double time; - - time = Get_Time( ); +#if !defined(NEUTRAL_TERRITORY) + if ( far_nbr_list->format == HALF_LIST ) #endif + { + atom_j = &system->my_atoms[j]; + } - Init_Distance( system, control, data, workspace, lists, out_control ); - -#if defined(LOG_PERFORMANCE) - Update_Timing_Info( &time, &data->timing.init_dist ); + if ( far_nbr_list->far_nbr_list.d[pj] <= cutoff ) + { + if ( local == 1 ) + { +#if defined(NEUTRAL_TERRITORY) + if( atom_j->nt_dir > 0 || j < system->n ) + { + ++system->cm_entries[i]; + } +#else + if ( (far_nbr_list->format == HALF_LIST + && (j < system->n || atom_i->orig_id < atom_j->orig_id)) + || far_nbr_list->format == FULL_LIST ) + { + ++system->cm_entries[i]; + } #endif + } #if defined(NEUTRAL_TERRITORY) - if ( workspace->H.format == SYM_HALF_MATRIX ) - { - Init_CM_Half_NT( system, control, data, workspace, lists, out_control ); - } - else - { - Init_CM_Full_NT( system, control, data, workspace, lists, out_control ); - } -#else - if ( workspace->H.format == SYM_HALF_MATRIX ) - { - Init_CM_Half_FS( system, control, data, workspace, lists, out_control ); + else if ( local == 2 ) + { + if( ( atom_j->nt_dir != -1 && mark[atom_i->nt_dir] != mark[atom_j->nt_dir] ) + || ( j < system->n && atom_i->nt_dir != 0 )) + { + ++system->cm_entries[i]; + } + } +#endif + } + } } - else + +#if defined(NEUTRAL_TERRITORY) + /* Since we don't know the NT atoms' position yet, Htop cannot be calculated accurately. + * Therefore, we assume it is full and divide 2 if necessary. */ + if ( cm_format == SYM_HALF_MATRIX ) { - Init_CM_Full_FS( system, control, data, workspace, lists, out_control ); + for ( i = 0; i < system->local_cap; ++i ) + { + system->cm_entries[i] = (system->cm_entries[i] + system->n + 1) / 2; + } } #endif -#if defined(LOG_PERFORMANCE) - Update_Timing_Info( &time, &data->timing.init_cm ); +#if defined(NEUTRAL_TERRITORY) + *matrix_dim = (int) MAX( *matrix_dim * SAFE_ZONE_NT, MIN_CAP ); +#else + *matrix_dim = (int) MAX( *matrix_dim * SAFE_ZONE, MIN_CAP ); #endif - if ( lists[FAR_NBRS]->format == HALF_LIST ) + for ( i = 0; i < system->N; ++i ) { - Init_Bond_Half( system, control, data, workspace, lists, out_control ); + if ( i < system->local_cap ) + { +#if defined(NEUTRAL_TERRITORY) + system->max_cm_entries[i] = MAX( (int)(system->cm_entries[i] * SAFE_ZONE_NT), MIN_CM_ENTRIES ); +#else + system->max_cm_entries[i] = MAX( (int)(system->cm_entries[i] * SAFE_ZONE), MIN_CM_ENTRIES ); +#endif + } } - else + + /* set currently unused space to min. capacity */ + for ( i = system->N; i < system->local_cap; ++i ) { - Init_Bond_Full( system, control, data, workspace, lists, out_control ); + system->max_cm_entries[i] = MIN_CM_ENTRIES; } -#if defined(LOG_PERFORMANCE) - Update_Timing_Info( &time, &data->timing.init_bond ); -#endif + /* reductions to get totals */ + system->total_cm_entries = 0; - return (workspace->realloc.bonds == FALSE - && workspace->realloc.hbonds == FALSE - && workspace->realloc.cm == FALSE) ? SUCCESS : FAILURE; + for ( i = 0; i < system->local_cap; ++i ) + { + system->total_cm_entries += system->max_cm_entries[i]; + } + + system->total_cm_entries = MAX( system->total_cm_entries, MIN_CAP * MIN_CM_ENTRIES ); } -static int Init_Forces_No_Charges( reax_system * const system, control_params * const control, - simulation_data * const data, storage * const workspace, reax_list ** const lists, - output_controls * const out_control ) +static void Estimate_Storages_Bonds( reax_system * const system, + control_params * const control, reax_list ** const lists ) { int i, j, pj; int start_i, end_i; int type_i, type_j; - int btop_i; - int ihb, jhb, ihb_top; - int local, flag, renbr; + int ihb, jhb; + int local; real cutoff; - reax_list *far_nbr_list, *bond_list, *hbond_list; + real r_ij; + real C12, C34, C56; + real BO, BO_s, BO_pi, BO_pi2; + reax_list *far_nbr_list; single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; reax_atom *atom_i, *atom_j; - int jhb_top; - int start_j, end_j; - int btop_j; +#if defined(NEUTRAL_TERRITORY) + int mark[6] = {1, 1, 2, 2, 2, 2}; +#endif far_nbr_list = lists[FAR_NBRS]; - bond_list = lists[BONDS]; - hbond_list = lists[HBONDS]; - for ( i = 0; i < system->n; ++i ) - { - workspace->bond_mark[i] = 0; - } - for ( i = system->n; i < system->N; ++i ) + for ( i = 0; i < system->total_cap; ++i ) { - /* put ghost atoms to an infinite distance */ - workspace->bond_mark[i] = 1000; + system->bonds[i] = 0; } - renbr = (data->step - data->prev_steps) % control->reneighbor == 0 ? TRUE : FALSE; - - for ( i = 0; i < system->N; ++i ) - { - atom_i = &system->my_atoms[i]; - type_i = atom_i->type; - start_i = Start_Index( i, far_nbr_list ); - end_i = End_Index( i, far_nbr_list ); - if ( far_nbr_list->format == HALF_LIST ) - { - /* start at end because other atoms - * can add to this atom's list (half-list) */ - btop_i = End_Index( i, bond_list ); - } - else if ( far_nbr_list->format == FULL_LIST ) - { - btop_i = Start_Index( i, bond_list ); - } - else - { - btop_i = 0; - } - sbp_i = &system->reax_param.sbp[type_i]; - ihb = NON_H_BONDING_ATOM; - ihb_top = -1; - - if ( i < system->n ) - { - local = TRUE; - cutoff = MAX( control->hbond_cut, control->bond_cut ); - } - else - { - local = FALSE; - cutoff = control->bond_cut; - } - - if ( local == TRUE && control->hbond_cut > 0.0 ) - { - ihb = sbp_i->p_hbond; - - if ( ihb == H_ATOM ) - { - if ( far_nbr_list->format == HALF_LIST ) - { - /* start at end because other atoms - * can add to this atom's list (half-list) */ - ihb_top = End_Index( atom_i->Hindex, hbond_list ); - } - else if ( far_nbr_list->format == FULL_LIST ) - { - ihb_top = Start_Index( atom_i->Hindex, hbond_list ); - } - } - else - { - ihb_top = -1; - } - } - - /* update i-j distance - check if j is within cutoff */ - for ( pj = start_i; pj < end_i; ++pj ) - { - j = far_nbr_list->far_nbr_list.nbr[pj]; - atom_j = &system->my_atoms[j]; - - if ( renbr == TRUE ) - { - if ( far_nbr_list->far_nbr_list.d[pj] <= cutoff ) - { - flag = TRUE; - } - else - { - flag = FALSE; - } - } - else - { - far_nbr_list->far_nbr_list.dvec[pj][0] = atom_j->x[0] - atom_i->x[0]; - far_nbr_list->far_nbr_list.dvec[pj][1] = atom_j->x[1] - atom_i->x[1]; - far_nbr_list->far_nbr_list.dvec[pj][2] = atom_j->x[2] - atom_i->x[2]; - far_nbr_list->far_nbr_list.d[pj] = rvec_Norm_Sqr( far_nbr_list->far_nbr_list.dvec[pj] ); - - if ( far_nbr_list->far_nbr_list.d[pj] <= SQR(cutoff) ) - { - far_nbr_list->far_nbr_list.d[pj] = SQRT( far_nbr_list->far_nbr_list.d[pj] ); - flag = TRUE; - } - else - { - flag = FALSE; - } - } - - if ( flag == TRUE ) - { - type_j = atom_j->type; - sbp_j = &system->reax_param.sbp[type_j]; - twbp = &system->reax_param.tbp[ - index_tbp(type_i, type_j, system->reax_param.num_atom_types) ]; - - if ( local == TRUE ) - { - /* hydrogen bond lists */ - if ( control->hbond_cut > 0.0 - && (ihb == H_ATOM || ihb == H_BONDING_ATOM) - && far_nbr_list->far_nbr_list.d[pj] <= control->hbond_cut ) - { - jhb = sbp_j->p_hbond; - - if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) - { - hbond_list->hbond_list[ihb_top].nbr = j; - hbond_list->hbond_list[ihb_top].scl = 1; - hbond_list->hbond_list[ihb_top].ptr = pj; - ++ihb_top; - } - /* only add to list for local j (far nbrs is half-list) */ - else if ( far_nbr_list->format == HALF_LIST - && ihb == H_BONDING_ATOM && jhb == H_ATOM ) - { - jhb_top = End_Index( atom_j->Hindex, hbond_list ); - hbond_list->hbond_list[jhb_top].nbr = i; - hbond_list->hbond_list[jhb_top].scl = -1; - hbond_list->hbond_list[jhb_top].ptr = pj; - Set_End_Index( atom_j->Hindex, jhb_top + 1, hbond_list ); - } - } - } - - /* uncorrected bond orders */ - if ( //(workspace->bond_mark[i] < 3 || workspace->bond_mark[j] < 3) && - far_nbr_list->far_nbr_list.d[pj] <= control->bond_cut - && BOp( workspace, bond_list, control->bo_cut, - i, btop_i, far_nbr_list->far_nbr_list.nbr[pj], - &far_nbr_list->far_nbr_list.rel_box[pj], far_nbr_list->far_nbr_list.d[pj], - &far_nbr_list->far_nbr_list.dvec[pj], far_nbr_list->format, - sbp_i, sbp_j, twbp ) == TRUE ) - { - ++btop_i; - - if ( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) - { - workspace->bond_mark[j] = workspace->bond_mark[i] + 1; - } - else if ( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) - { - workspace->bond_mark[i] = workspace->bond_mark[j] + 1; - } - } - } - } - - Set_End_Index( i, btop_i, bond_list ); - - if ( local == TRUE && ihb == H_ATOM ) - { - Set_End_Index( atom_i->Hindex, ihb_top, hbond_list ); - } - } - - if ( far_nbr_list->format == FULL_LIST ) - { - /* set sym_index for bonds list (far_nbrs full list) */ - for ( i = 0; i < system->N; ++i ) - { - start_i = Start_Index( i, bond_list ); - end_i = End_Index( i, bond_list ); - - for ( btop_i = start_i; btop_i < end_i; ++btop_i ) - { - j = bond_list->bond_list[btop_i].nbr; - start_j = Start_Index( j, bond_list ); - end_j = End_Index( j, bond_list ); - - for ( btop_j = start_j; btop_j < end_j; ++btop_j ) - { - if ( bond_list->bond_list[btop_j].nbr == i ) - { - bond_list->bond_list[btop_i].sym_index = btop_j; - break; - } - } - } - } - } - - /* reallocation checks */ - for ( i = 0; i < system->N; ++i ) - { - if ( Num_Entries( i, bond_list ) > system->max_bonds[i] ) - { - workspace->realloc.bonds = TRUE; - } - - if ( i < system->n - && system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == H_ATOM - && Num_Entries( system->my_atoms[i].Hindex, hbond_list ) - > system->max_hbonds[system->my_atoms[i].Hindex] ) - { - workspace->realloc.hbonds = TRUE; - } - } - - return (workspace->realloc.bonds == TRUE - || workspace->realloc.hbonds == TRUE) ? FAILURE : SUCCESS; -} - - -void Estimate_Storages( reax_system * const system, control_params * const control, - reax_list ** const lists, int * const matrix_dim, int cm_format ) -{ - int i, j, pj; - int start_i, end_i; - int type_i, type_j; - int ihb, jhb; - int local; - real cutoff; - real r_ij; - real C12, C34, C56; - real BO, BO_s, BO_pi, BO_pi2; - reax_list *far_nbr_list; - single_body_parameters *sbp_i, *sbp_j; - two_body_parameters *twbp; - reax_atom *atom_i, *atom_j; -#if defined(NEUTRAL_TERRITORY) - int mark[6] = {1, 1, 2, 2, 2, 2}; -#endif - - far_nbr_list = lists[FAR_NBRS]; - *matrix_dim = 0; - for ( i = 0; i < system->total_cap; ++i ) { - system->bonds[i] = 0; system->hbonds[i] = 0; - if ( i < system->local_cap ) - { - system->cm_entries[i] = 0; - } } for ( i = 0; i < system->N; ++i ) @@ -1855,8 +1646,6 @@ void Estimate_Storages( reax_system * const system, control_params * const contr { local = 1; cutoff = control->nonb_cut; - ++system->cm_entries[i]; - ++(*matrix_dim); ihb = sbp_i->p_hbond; } #if defined(NEUTRAL_TERRITORY) @@ -1864,7 +1653,6 @@ void Estimate_Storages( reax_system * const system, control_params * const contr { local = 2; cutoff = control->nonb_cut; - ++(*matrix_dim); ihb = -1; } #endif @@ -1896,20 +1684,6 @@ void Estimate_Storages( reax_system * const system, control_params * const contr if ( local == 1 ) { -#if defined(NEUTRAL_TERRITORY) - if( atom_j->nt_dir > 0 || j < system->n ) - { - ++system->cm_entries[i]; - } -#else - if ( (far_nbr_list->format == HALF_LIST - && (j < system->n || atom_i->orig_id < atom_j->orig_id)) - || far_nbr_list->format == FULL_LIST ) - { - ++system->cm_entries[i]; - } -#endif - /* hydrogen bond lists */ if ( control->hbond_cut > 0.1 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) @@ -1930,17 +1704,6 @@ void Estimate_Storages( reax_system * const system, control_params * const contr } } -#if defined(NEUTRAL_TERRITORY) - else if ( local == 2 ) - { - if( ( atom_j->nt_dir != -1 && mark[atom_i->nt_dir] != mark[atom_j->nt_dir] ) - || ( j < system->n && atom_i->nt_dir != 0 )) - { - ++system->cm_entries[i]; - } - } -#endif - /* uncorrected bond orders */ if ( far_nbr_list->far_nbr_list.d[pj] <= control->bond_cut ) { @@ -1993,24 +1756,6 @@ void Estimate_Storages( reax_system * const system, control_params * const contr } } -#if defined(NEUTRAL_TERRITORY) - /* Since we don't know the NT atoms' position yet, Htop cannot be calculated accurately. - * Therefore, we assume it is full and divide 2 if necessary. */ - if ( cm_format == SYM_HALF_MATRIX ) - { - for ( i = 0; i < system->local_cap; ++i ) - { - system->cm_entries[i] = (system->cm_entries[i] + system->n + 1) / 2; - } - } -#endif - -#if defined(NEUTRAL_TERRITORY) - *matrix_dim = (int) MAX( *matrix_dim * SAFE_ZONE_NT, MIN_CAP ); -#else - *matrix_dim = (int) MAX( *matrix_dim * SAFE_ZONE, MIN_CAP ); -#endif - for ( i = 0; i < system->total_cap; ++i ) { system->max_hbonds[i] = 0; @@ -2031,14 +1776,6 @@ void Estimate_Storages( reax_system * const system, control_params * const contr system->max_hbonds[ system->my_atoms[i].Hindex ] = MAX( (int)(system->hbonds[ system->my_atoms[i].Hindex ] * SAFE_ZONE), MIN_HBONDS ); } - if ( i < system->local_cap ) - { -#if defined(NEUTRAL_TERRITORY) - system->max_cm_entries[i] = MAX( (int)(system->cm_entries[i] * SAFE_ZONE_NT), MIN_CM_ENTRIES ); -#else - system->max_cm_entries[i] = MAX( (int)(system->cm_entries[i] * SAFE_ZONE), MIN_CM_ENTRIES ); -#endif - } } /* set currently unused space to min. capacity */ @@ -2050,15 +1787,10 @@ void Estimate_Storages( reax_system * const system, control_params * const contr { system->max_hbonds[i] = MIN_HBONDS; } - for ( i = system->N; i < system->local_cap; ++i ) - { - system->max_cm_entries[i] = MIN_CM_ENTRIES; - } /* reductions to get totals */ system->total_bonds = 0; system->total_hbonds = 0; - system->total_cm_entries = 0; system->total_thbodies = 0; for ( i = 0; i < system->total_cap; ++i ) @@ -2069,10 +1801,6 @@ void Estimate_Storages( reax_system * const system, control_params * const contr { system->total_hbonds += system->max_hbonds[i]; } - for ( i = 0; i < system->local_cap; ++i ) - { - system->total_cm_entries += system->max_cm_entries[i]; - } if ( far_nbr_list->format == HALF_LIST ) { for ( i = 0; i < system->total_cap; ++i ) @@ -2090,7 +1818,6 @@ void Estimate_Storages( reax_system * const system, control_params * const contr system->total_bonds = MAX( system->total_bonds, MIN_CAP * MIN_BONDS ); system->total_hbonds = MAX( system->total_hbonds, MIN_CAP * MIN_HBONDS ); - system->total_cm_entries = MAX( system->total_cm_entries, MIN_CAP * MIN_CM_ENTRIES ); system->total_thbodies = MAX( system->total_thbodies * SAFE_ZONE, MIN_3BODIES ); /* duplicate info in atom structs in case of @@ -2103,11 +1830,431 @@ void Estimate_Storages( reax_system * const system, control_params * const contr { system->my_atoms[i].num_hbonds = system->hbonds[i]; } +} + + +/* this version of Compute_Total_Force computes forces from + * coefficients accumulated by all interaction functions. + * Saves enormous time & space! */ +static void Compute_Total_Force( reax_system * const system, + control_params * const control, + simulation_data * const data, storage * const workspace, reax_list ** const lists, + mpi_datatypes * const mpi_data ) +{ + int i, pj; + reax_list * const bond_list = lists[BONDS]; + + for ( i = 0; i < system->N; ++i ) + { + for ( pj = Start_Index(i, bond_list); pj < End_Index(i, bond_list); ++pj ) + { + if ( i < bond_list->bond_list[pj].nbr ) + { + if ( control->virial == 0 ) + { + Add_dBond_to_Forces( i, pj, system, data, workspace, lists ); + } + else + { + Add_dBond_to_Forces_NPT( i, pj, system, data, workspace, lists ); + } + } + } + } + +#if defined(PURE_REAX) + /* now all forces are computed to their partially-final values + * based on the neighbors information each processor has had. + * final values of force on each atom needs to be computed by adding up + * all partially-final pieces */ + Coll_FS( system, mpi_data, workspace->f, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + for ( i = 0; i < system->n; ++i ) + { + rvec_Copy( system->my_atoms[i].f, workspace->f[i] ); + } + +#if defined(TEST_FORCES) + Coll_FS( system, mpi_data, workspace->f_ele, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_vdw, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_be, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_lp, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_ov, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_un, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_ang, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_coa, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_pen, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_hb, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_tor, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_con, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); +#endif + +#endif +} + + +static int Init_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, mpi_datatypes *mpi_data ) +{ + int i, renbr, ret; + static int dist_done = FALSE, cm_done = FALSE, bonds_done = FALSE; +#if defined(LOG_PERFORMANCE) + double time; + + time = Get_Time( ); +#endif + + renbr = (data->step - data->prev_steps) % control->reneighbor == 0 ? TRUE : FALSE; + + if ( renbr == FALSE && dist_done == FALSE ) + { + Init_Distance( system, control, data, workspace, lists, out_control ); + + dist_done = TRUE; + } + +#if defined(LOG_PERFORMANCE) + Update_Timing_Info( &time, &data->timing.init_dist ); +#endif + + if ( cm_done == FALSE ) + { + Init_Matrix_Row_Indices( &workspace->H, system->max_cm_entries ); + +#if defined(NEUTRAL_TERRITORY) + if ( workspace->H.format == SYM_HALF_MATRIX ) + { + Init_CM_Half_NT( system, control, data, workspace, lists, out_control ); + } + else + { + Init_CM_Full_NT( system, control, data, workspace, lists, out_control ); + } +#else + if ( workspace->H.format == SYM_HALF_MATRIX ) + { + Init_CM_Half_FS( system, control, data, workspace, lists, out_control ); + } + else + { + Init_CM_Full_FS( system, control, data, workspace, lists, out_control ); + } +#endif + } + +#if defined(LOG_PERFORMANCE) + Update_Timing_Info( &time, &data->timing.init_cm ); +#endif + + if ( bonds_done == FALSE ) + { + for ( i = 0; i < system->total_cap; ++i ) + { + workspace->total_bond_order[i] = 0.0; + } + for ( i = 0; i < system->total_cap; ++i ) + { + rvec_MakeZero( workspace->dDeltap_self[i] ); + } + + Init_List_Indices( lists[BONDS], system->max_bonds ); + Init_List_Indices( lists[HBONDS], system->max_hbonds ); + + if ( lists[FAR_NBRS]->format == HALF_LIST ) + { + Init_Bond_Half( system, control, data, workspace, lists, out_control ); + } + else + { + Init_Bond_Full( system, control, data, workspace, lists, out_control ); + } + } -#if defined(DEBUG_FOCUS) - fprintf( stderr, "[INFO] p%d @ estimate storages: total_cm_entries = %d, total_thbodies = %d\n", - system->my_rank, system->total_cm_entries, system->total_thbodies ); +#if defined(LOG_PERFORMANCE) + Update_Timing_Info( &time, &data->timing.init_bond ); #endif + + ret = (workspace->realloc.cm == FALSE + && workspace->realloc.bonds == FALSE + && workspace->realloc.hbonds == FALSE + ? SUCCESS : FAILURE); + + if ( workspace->realloc.cm == FALSE ) + { + cm_done = TRUE; + } + if ( workspace->realloc.bonds == FALSE && workspace->realloc.hbonds == FALSE ) + { + bonds_done = TRUE; + } + + if ( ret == SUCCESS ) + { + dist_done = FALSE; + cm_done = FALSE; + bonds_done = FALSE; + } + + return ret; +} + + +static int Init_Forces_No_Charges( reax_system * const system, control_params * const control, + simulation_data * const data, storage * const workspace, reax_list ** const lists, + output_controls * const out_control ) +{ + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int btop_i; + int ihb, jhb, ihb_top; + int local, flag, renbr; + real cutoff; + reax_list *far_nbr_list, *bond_list, *hbond_list; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + reax_atom *atom_i, *atom_j; + int jhb_top; + int start_j, end_j; + int btop_j; + + far_nbr_list = lists[FAR_NBRS]; + bond_list = lists[BONDS]; + hbond_list = lists[HBONDS]; + + for ( i = 0; i < system->n; ++i ) + { + workspace->bond_mark[i] = 0; + } + for ( i = system->n; i < system->N; ++i ) + { + /* put ghost atoms to an infinite distance */ + workspace->bond_mark[i] = 1000; + } + + renbr = (data->step - data->prev_steps) % control->reneighbor == 0 ? TRUE : FALSE; + + for ( i = 0; i < system->N; ++i ) + { + atom_i = &system->my_atoms[i]; + type_i = atom_i->type; + start_i = Start_Index( i, far_nbr_list ); + end_i = End_Index( i, far_nbr_list ); + if ( far_nbr_list->format == HALF_LIST ) + { + /* start at end because other atoms + * can add to this atom's list (half-list) */ + btop_i = End_Index( i, bond_list ); + } + else if ( far_nbr_list->format == FULL_LIST ) + { + btop_i = Start_Index( i, bond_list ); + } + else + { + btop_i = 0; + } + sbp_i = &system->reax_param.sbp[type_i]; + ihb = NON_H_BONDING_ATOM; + ihb_top = -1; + + if ( i < system->n ) + { + local = TRUE; + cutoff = MAX( control->hbond_cut, control->bond_cut ); + } + else + { + local = FALSE; + cutoff = control->bond_cut; + } + + if ( local == TRUE && control->hbond_cut > 0.0 ) + { + ihb = sbp_i->p_hbond; + + if ( ihb == H_ATOM ) + { + if ( far_nbr_list->format == HALF_LIST ) + { + /* start at end because other atoms + * can add to this atom's list (half-list) */ + ihb_top = End_Index( atom_i->Hindex, hbond_list ); + } + else if ( far_nbr_list->format == FULL_LIST ) + { + ihb_top = Start_Index( atom_i->Hindex, hbond_list ); + } + } + else + { + ihb_top = -1; + } + } + + /* update i-j distance - check if j is within cutoff */ + for ( pj = start_i; pj < end_i; ++pj ) + { + j = far_nbr_list->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; + + if ( renbr == TRUE ) + { + if ( far_nbr_list->far_nbr_list.d[pj] <= cutoff ) + { + flag = TRUE; + } + else + { + flag = FALSE; + } + } + else + { + far_nbr_list->far_nbr_list.dvec[pj][0] = atom_j->x[0] - atom_i->x[0]; + far_nbr_list->far_nbr_list.dvec[pj][1] = atom_j->x[1] - atom_i->x[1]; + far_nbr_list->far_nbr_list.dvec[pj][2] = atom_j->x[2] - atom_i->x[2]; + far_nbr_list->far_nbr_list.d[pj] = rvec_Norm_Sqr( far_nbr_list->far_nbr_list.dvec[pj] ); + + if ( far_nbr_list->far_nbr_list.d[pj] <= SQR(cutoff) ) + { + far_nbr_list->far_nbr_list.d[pj] = SQRT( far_nbr_list->far_nbr_list.d[pj] ); + flag = TRUE; + } + else + { + flag = FALSE; + } + } + + if ( flag == TRUE ) + { + type_j = atom_j->type; + sbp_j = &system->reax_param.sbp[type_j]; + twbp = &system->reax_param.tbp[ + index_tbp(type_i, type_j, system->reax_param.num_atom_types) ]; + + if ( local == TRUE ) + { + /* hydrogen bond lists */ + if ( control->hbond_cut > 0.0 + && (ihb == H_ATOM || ihb == H_BONDING_ATOM) + && far_nbr_list->far_nbr_list.d[pj] <= control->hbond_cut ) + { + jhb = sbp_j->p_hbond; + + if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) + { + hbond_list->hbond_list[ihb_top].nbr = j; + hbond_list->hbond_list[ihb_top].scl = 1; + hbond_list->hbond_list[ihb_top].ptr = pj; + ++ihb_top; + } + /* only add to list for local j (far nbrs is half-list) */ + else if ( far_nbr_list->format == HALF_LIST + && ihb == H_BONDING_ATOM && jhb == H_ATOM ) + { + jhb_top = End_Index( atom_j->Hindex, hbond_list ); + hbond_list->hbond_list[jhb_top].nbr = i; + hbond_list->hbond_list[jhb_top].scl = -1; + hbond_list->hbond_list[jhb_top].ptr = pj; + Set_End_Index( atom_j->Hindex, jhb_top + 1, hbond_list ); + } + } + } + + /* uncorrected bond orders */ + if ( //(workspace->bond_mark[i] < 3 || workspace->bond_mark[j] < 3) && + far_nbr_list->far_nbr_list.d[pj] <= control->bond_cut + && BOp( workspace, bond_list, control->bo_cut, + i, btop_i, far_nbr_list->far_nbr_list.nbr[pj], + &far_nbr_list->far_nbr_list.rel_box[pj], far_nbr_list->far_nbr_list.d[pj], + &far_nbr_list->far_nbr_list.dvec[pj], far_nbr_list->format, + sbp_i, sbp_j, twbp ) == TRUE ) + { + ++btop_i; + + if ( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) + { + workspace->bond_mark[j] = workspace->bond_mark[i] + 1; + } + else if ( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) + { + workspace->bond_mark[i] = workspace->bond_mark[j] + 1; + } + } + } + } + + Set_End_Index( i, btop_i, bond_list ); + + if ( local == TRUE && ihb == H_ATOM ) + { + Set_End_Index( atom_i->Hindex, ihb_top, hbond_list ); + } + } + + if ( far_nbr_list->format == FULL_LIST ) + { + /* set sym_index for bonds list (far_nbrs full list) */ + for ( i = 0; i < system->N; ++i ) + { + start_i = Start_Index( i, bond_list ); + end_i = End_Index( i, bond_list ); + + for ( btop_i = start_i; btop_i < end_i; ++btop_i ) + { + j = bond_list->bond_list[btop_i].nbr; + start_j = Start_Index( j, bond_list ); + end_j = End_Index( j, bond_list ); + + for ( btop_j = start_j; btop_j < end_j; ++btop_j ) + { + if ( bond_list->bond_list[btop_j].nbr == i ) + { + bond_list->bond_list[btop_i].sym_index = btop_j; + break; + } + } + } + } + } + + /* reallocation checks */ + for ( i = 0; i < system->N; ++i ) + { + if ( Num_Entries( i, bond_list ) > system->max_bonds[i] ) + { + workspace->realloc.bonds = TRUE; + } + + if ( i < system->n + && system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == H_ATOM + && Num_Entries( system->my_atoms[i].Hindex, hbond_list ) + > system->max_hbonds[system->my_atoms[i].Hindex] ) + { + workspace->realloc.hbonds = TRUE; + } + } + + return (workspace->realloc.bonds == TRUE + || workspace->realloc.hbonds == TRUE) ? FAILURE : SUCCESS; +} + + +void Estimate_Storages( reax_system * const system, control_params * const control, + reax_list ** const lists, storage *workspace, int realloc_cm, + int realloc_bonds, int * const matrix_dim, int cm_format ) +{ + if ( realloc_cm == TRUE ) + { + Estimate_Storages_CM( system, control, lists, matrix_dim, cm_format ); + } + + if ( realloc_bonds == TRUE ) + { + Estimate_Storages_Bonds( system, control, lists ); + } } @@ -2145,7 +2292,11 @@ int Compute_Forces( reax_system * const system, control_params * const control, if ( ret != SUCCESS ) { - Estimate_Storages( system, control, lists, &matrix_dim, workspace->H.format ); + Estimate_Storages( system, control, lists, workspace, + workspace->realloc.cm, + (workspace->realloc.bonds == TRUE + || workspace->realloc.hbonds == TRUE ? TRUE : FALSE), + &matrix_dim, workspace->H.format ); } #if defined(LOG_PERFORMANCE) diff --git a/PG-PuReMD/src/forces.h b/PG-PuReMD/src/forces.h index 9e0c9a89..52da90bc 100644 --- a/PG-PuReMD/src/forces.h +++ b/PG-PuReMD/src/forces.h @@ -35,7 +35,7 @@ int Compute_Forces( reax_system * const, control_params * const, simulation_data storage * const, reax_list ** const, output_controls * const, mpi_datatypes * const ); void Estimate_Storages( reax_system * const, control_params * const, - reax_list ** const, int * const, int ); + reax_list ** const, storage *, int, int, int * const, int ); #ifdef __cplusplus } diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c index 6ccc3bf9..1ea4b700 100644 --- a/PG-PuReMD/src/init_md.c +++ b/PG-PuReMD/src/init_md.c @@ -671,8 +671,9 @@ void Init_Lists( reax_system * const system, control_params * const control, fprintf( stderr, "[ERROR] p%d: failed to generate neighbor lists. Terminating...\n", system->my_rank ); MPI_Abort( MPI_COMM_WORLD, CANNOT_INITIALIZE ); } - - Estimate_Storages( system, control, lists, &matrix_dim, cm_format ); + + Estimate_Storages( system, control, lists, workspace, TRUE, TRUE, + &matrix_dim, cm_format ); #if defined(NEUTRAL_TERRITORY) Allocate_Matrix( &workspace->H, matrix_dim, system->local_cap, system->total_cm_entries, cm_format ); diff --git a/PG-PuReMD/src/reset_tools.c b/PG-PuReMD/src/reset_tools.c index 915a9efe..b4dac553 100644 --- a/PG-PuReMD/src/reset_tools.c +++ b/PG-PuReMD/src/reset_tools.c @@ -169,13 +169,22 @@ void Reset_Test_Forces( reax_system * const system, storage * const workspace ) void Reset_Workspace( reax_system * const system, storage * const workspace ) { - memset( workspace->total_bond_order, 0, sizeof(real) * system->total_cap ); - memset( workspace->dDeltap_self, 0, sizeof(rvec) * system->total_cap ); - memset( workspace->CdDelta, 0, sizeof(real) * system->total_cap ); - memset( workspace->f, 0, sizeof(rvec) * system->total_cap ); + int i; + + for ( i = 0; i < system->total_cap; ++i ) + { + workspace->CdDelta[i] = 0.0; + } + for ( i = 0; i < system->total_cap; ++i ) + { + rvec_MakeZero( workspace->f[i] ); + } #ifdef TEST_FORCES - memset( workspace->dDelta, 0, sizeof(rvec) * system->total_cap ); + for ( i = 0; i < system->total_cap; ++i ) + { + rvec_MakeZero( workspace->dDelta[i] ); + } Reset_Test_Forces( system, workspace ); #endif } @@ -211,37 +220,6 @@ void Reset_Out_Buffers( mpi_out_data * const out_buf, int n ) } -void Reset_Lists( reax_system * const system, control_params * const control, - storage * const workspace, reax_list ** const lists ) -{ - int i; - reax_list * const bond_list = lists[BONDS]; - reax_list * const hbond_list = lists[HBONDS]; - - if ( system->N > 0 ) - { - for ( i = 0; i < bond_list->n; ++i ) - { - Set_End_Index( i, Start_Index( i, bond_list ), bond_list ); - } - - if ( control->hbond_cut > 0.0 && system->numH > 0 ) - { - for ( i = 0; i < hbond_list->n; ++i ) - { - /* do not use Hindex, unconditionally reset end indices */ - Set_End_Index( i, Start_Index( i, hbond_list ), hbond_list ); - } - } - - for ( i = 0; i < workspace->H.n_max; ++i ) - { - workspace->H.end[i] = workspace->H.start[i]; - } - } -} - - void Reset( reax_system * const system, control_params * const control, simulation_data * const data, storage * const workspace, reax_list ** const lists ) @@ -256,6 +234,4 @@ void Reset( reax_system * const system, control_params * const control, } Reset_Workspace( system, workspace ); - - Reset_Lists( system, control, workspace, lists ); } diff --git a/PG-PuReMD/src/reset_tools.h b/PG-PuReMD/src/reset_tools.h index f47ce364..9c78ca61 100644 --- a/PG-PuReMD/src/reset_tools.h +++ b/PG-PuReMD/src/reset_tools.h @@ -37,9 +37,6 @@ void Reset_Timing( reax_timing * const ); void Reset_Workspace( reax_system * const, storage * const ); -void Reset_Lists( reax_system * const, control_params * const, - storage * const, reax_list** const ); - void Reset_Grid( grid * const ); void Reset_Out_Buffers( mpi_out_data * const, int ); -- GitLab