From 5afec69ffe89c0b2d3c3f2b5eeefe44156ede746 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@msu.edu> Date: Wed, 7 Jun 2017 12:46:31 -0400 Subject: [PATCH] PG-PuReMD: add CUB (v1.6.4) for optimized CUDA primitives. Change three-body lists to use CUB. --- .gitmodules | 4 + PG-PuReMD/src/allocate.c | 22 +++--- PG-PuReMD/src/cub | 1 + PG-PuReMD/src/cuda_allocate.cu | 3 + PG-PuReMD/src/cuda_forces.cu | 77 ++++++++----------- PG-PuReMD/src/cuda_neighbors.cu | 25 +++--- PG-PuReMD/src/cuda_reduction.cu | 109 ++++++++++++++++++++++----- PG-PuReMD/src/cuda_reduction.h | 1 + PG-PuReMD/src/cuda_reset_tools.cu | 1 - PG-PuReMD/src/cuda_valence_angles.cu | 26 +++---- PG-PuReMD/src/init_md.c | 3 +- PG-PuReMD/src/parallelreax.c | 2 +- PG-PuReMD/src/reax_types.h | 1 + 13 files changed, 172 insertions(+), 103 deletions(-) create mode 100644 .gitmodules create mode 160000 PG-PuReMD/src/cub diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..1129b088 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "PG-PuReMD/src/cub"] + path = PG-PuReMD/src/cub + url = https://github.com/NVlabs/cub.git + branch = 89de7ab20167909bc2c4f8acd397671c47cf3c0d diff --git a/PG-PuReMD/src/allocate.c b/PG-PuReMD/src/allocate.c index a3af86dc..25c89abc 100644 --- a/PG-PuReMD/src/allocate.c +++ b/PG-PuReMD/src/allocate.c @@ -1207,7 +1207,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, /* delete three-body list */ Dev_Delete_List( *dev_lists + THREE_BODIES ); -// Delete_List( *lists + THREE_BODIES ); + Delete_List( *lists + THREE_BODIES ); /* recreate Three-body list */ Dev_Make_List( (*dev_lists + BONDS)->num_intrs, realloc->num_3body, @@ -1255,30 +1255,31 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, // to ensure correct values at mpi_buffers for update_boundary_positions if ( !renbr ) { - mpi_flag = 0; + mpi_flag = FALSE; } // check whether in_buffer capacity is enough else if ( system->max_recved >= system->est_recv * 0.90 ) { - mpi_flag = 1; + mpi_flag = TRUE; } else { // otherwise check individual outgoing buffers - mpi_flag = 0; + mpi_flag = FALSE; for ( p = 0; p < MAX_NBRS; ++p ) { nbr_pr = &( system->my_nbrs[p] ); nbr_data = &( mpi_data->out_buffers[p] ); + if ( nbr_data->cnt >= nbr_pr->est_send * 0.90 ) { - mpi_flag = 1; + mpi_flag = TRUE; break; } } } - if ( mpi_flag ) + if ( mpi_flag == TRUE ) { #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: reallocating mpi_buf: old_recv=%d\n", @@ -1305,13 +1306,14 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: reallocating mpi_buf: recv=%d send=%d total=%dMB\n", - system->my_rank, system->est_recv, total_send, - (int)((system->est_recv + total_send)*sizeof(boundary_atom) / - (1024 * 1024))); + system->my_rank, system->est_recv, total_send, + (int)((system->est_recv + total_send)*sizeof(boundary_atom) / + (1024 * 1024))); + for ( p = 0; p < MAX_NBRS; ++p ) { fprintf( stderr, "p%d: nbr%d new_send=%d\n", - system->my_rank, p, system->my_nbrs[p].est_send ); + system->my_rank, p, system->my_nbrs[p].est_send ); } #endif diff --git a/PG-PuReMD/src/cub b/PG-PuReMD/src/cub new file mode 160000 index 00000000..89de7ab2 --- /dev/null +++ b/PG-PuReMD/src/cub @@ -0,0 +1 @@ +Subproject commit 89de7ab20167909bc2c4f8acd397671c47cf3c0d diff --git a/PG-PuReMD/src/cuda_allocate.cu b/PG-PuReMD/src/cuda_allocate.cu index 9d56b4c9..9a31eed3 100644 --- a/PG-PuReMD/src/cuda_allocate.cu +++ b/PG-PuReMD/src/cuda_allocate.cu @@ -156,6 +156,9 @@ void dev_alloc_system( reax_system *system ) cuda_malloc( (void **) &system->d_my_atoms, system->total_cap * sizeof(reax_atom), TRUE, "system:d_my_atoms" ); + /* list reallocation */ + cuda_malloc( (void **) &system->d_num_thbodies, sizeof(int), TRUE, "system:d_num_thbodies" ); + /* simulation boxes */ cuda_malloc( (void **) &system->d_big_box, sizeof(simulation_box), TRUE, "system:d_big_box" ); cuda_malloc( (void **) &system->d_my_box, sizeof(simulation_box), TRUE, "system:d_my_box" ); diff --git a/PG-PuReMD/src/cuda_forces.cu b/PG-PuReMD/src/cuda_forces.cu index ebe53090..82a0137e 100644 --- a/PG-PuReMD/src/cuda_forces.cu +++ b/PG-PuReMD/src/cuda_forces.cu @@ -22,9 +22,7 @@ #include "tool_box.h" #include "cuda_nonbonded.h" - -extern "C" void Make_List( int, int, int, reax_list* ); -extern "C" void Delete_List( reax_list* ); +#include "cub/cub/device/device_reduce.cuh" CUDA_GLOBAL void k_disable_hydrogen_bonding( control_params *control ) @@ -291,28 +289,36 @@ void Cuda_Estimate_Storages( reax_system *system, control_params *control, void Cuda_Estimate_Storages_Three_Body( reax_system *system, control_params *control, reax_list **lists, int *num_3body, int *thbody ) { - int i; - real *spad = (real *) scratch; + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; - cuda_memset( spad, 0, (*dev_lists + BONDS)->num_intrs * sizeof(int), "scratch" ); + cuda_memset( thbody, 0, (*dev_lists + BONDS)->num_intrs * sizeof(int), "scratch::thbody" ); Estimate_Cuda_Valence_Angles <<<BLOCKS_N, BLOCK_SIZE>>> ( system->d_my_atoms, (control_params *)control->d_control_params, - *(*dev_lists + BONDS), system->n, system->N, (int *)spad ); + *(*dev_lists + BONDS), system->n, system->N, (int *)thbody ); cudaThreadSynchronize( ); cudaCheckError( ); - copy_host_device( thbody, spad, (*dev_lists + BONDS)->num_intrs * sizeof(int), - cudaMemcpyDeviceToHost, "thb:offsets" ); + /* determine temporary device storage requirements */ + cub::DeviceReduce::Sum( d_temp_storage, temp_storage_bytes, + thbody, system->d_num_thbodies, (*dev_lists + BONDS)->num_intrs ); - *num_3body = 0; - for ( i = 0; i < (*dev_lists + BONDS)->num_intrs; i++ ) - { - *num_3body += thbody[i]; - thbody[i] += thbody[i - 1]; - } + /* allocate temporary storage */ + cuda_malloc( &d_temp_storage, temp_storage_bytes, FALSE, + "cub::sum::temp_storage" ); - system->num_thbodies = thbody[(*dev_lists + BONDS)->num_intrs - 1]; + /* run sum-reduction */ + cub::DeviceReduce::Sum( d_temp_storage, temp_storage_bytes, + thbody, system->d_num_thbodies, (*dev_lists + BONDS)->num_intrs ); + + /* deallocate temporary storage */ + cuda_free( d_temp_storage, "cub::sum::temp_storage" ); + + copy_host_device( num_3body, system->d_num_thbodies, sizeof(int), + cudaMemcpyDeviceToHost, "d_num_thbodies" ); + + system->num_thbodies = *num_3body; } @@ -1161,53 +1167,32 @@ int Cuda_Validate_Lists( reax_system *system, storage *workspace, } /* three body interactions */ - cuda_memset( spad, 0, (*lists + BONDS)->num_intrs * sizeof (int), "scratch" ); - Estimate_Cuda_Valence_Angles <<<BLOCKS_N, BLOCK_SIZE>>> - ( system->d_my_atoms, (control_params *)control->d_control_params, - *(*lists + BONDS), system->n, system->N, (int *)spad); - cudaThreadSynchronize( ); - cudaCheckError( ); - fprintf( stderr, " [ESTIMATE_CUDA_VALENCE_ANGLES]\n" ); - - thbody = (int *) host_scratch; - memset( thbody, 0, sizeof(int) * (*lists + BONDS)->num_intrs ); - copy_host_device( thbody, spad, (*lists + BONDS)->num_intrs * sizeof(int), - cudaMemcpyDeviceToHost, "thb:offsets" ); - - total_3body = 0; - for (i = 0; i < (*lists + BONDS)->num_intrs; i++) - { - total_3body += thbody[i]; - thbody[i] += thbody[i - 1]; - } + thbody = (int *) scratch; - system->num_thbodies = thbody[(*lists + BONDS)->num_intrs - 1]; + Cuda_Estimate_Storages_Three_Body( system, control, dev_lists, + &total_3body, thbody ); if ( system->num_thbodies > (*lists + THREE_BODIES)->num_intrs || (*lists + THREE_BODIES)->n < (*lists + BONDS)->num_intrs ) { - realloc->num_3body = total_3body; - system->num_thbodies = total_3body; + system->num_thbodies = total_3body * SAFE_ZONE; + realloc->num_3body = system->num_thbodies; ret = FAILURE; } + +#if defined(DEBUG) fprintf( stderr, "system->num_thbodies = %d, lists:THREE_BODIES->num_intrs = %d,\n", system->num_thbodies, (*lists + THREE_BODIES)->num_intrs ); fprintf( stderr, "lists:THREE_BODIES->n = %d, lists:BONDS->num_intrs = %d,\n", (*lists + THREE_BODIES)->n, (*lists + BONDS)->num_intrs ); fprintf( stderr, "total_3body = %d\n", total_3body ); +#endif if ( ret == SUCCESS ) { - /* copy the indexes into the thb list */ - copy_host_device( thbody, (*lists + THREE_BODIES)->index + 1, - sizeof(int) * ((*lists + BONDS)->num_intrs - 1), - cudaMemcpyHostToDevice, "dev_thb:index" ); - copy_host_device( thbody, (*lists + THREE_BODIES)->end_index + 1, - sizeof(int) * ((*lists + BONDS)->num_intrs - 1), - cudaMemcpyHostToDevice, "dev_thb:end_index" ); + Cuda_Init_Three_Body_Indices( thbody, (*dev_lists + BONDS)->num_intrs ); } - return ret; } diff --git a/PG-PuReMD/src/cuda_neighbors.cu b/PG-PuReMD/src/cuda_neighbors.cu index 7f0f848c..094ea8a4 100644 --- a/PG-PuReMD/src/cuda_neighbors.cu +++ b/PG-PuReMD/src/cuda_neighbors.cu @@ -28,8 +28,7 @@ #include "cuda_utils.h" #include "tool_box.h" -//extern "C" real Get_Time( ); -//extern "C" real Get_Timing_Info( real ); +#include "cub/cub/device/device_scan.cuh" CUDA_DEVICE real Dev_DistSqr_to_Special_Point( rvec cp, rvec x ) @@ -722,14 +721,22 @@ void Cuda_Init_Bond_Indices( int *indices, int entries ) void Cuda_Init_Three_Body_Indices( int *indices, int entries ) { - int i; reax_list *thbody = *dev_lists + THREE_BODIES; + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + + /* determine temporary device storage requirements */ + cub::DeviceScan::ExclusiveSum( d_temp_storage, temp_storage_bytes, + indices, thbody->index, entries ); + + /* allocate temporary storage */ + cuda_malloc( &d_temp_storage, temp_storage_bytes, FALSE, + "cub::devicescan::temp_storage" ); - copy_host_device( indices, thbody->index + 1, - sizeof(int) * (entries - 1), - cudaMemcpyHostToDevice, "dev_thb:index" ); - copy_host_device( indices, thbody->end_index, - sizeof(int) * entries, - cudaMemcpyHostToDevice, "dev_thb:end_index" ); + /* run exclusive prefix sum */ + cub::DeviceScan::ExclusiveSum( d_temp_storage, temp_storage_bytes, + indices, thbody->index, entries ); + /* deallocate temporary storage */ + cuda_free( d_temp_storage, "cub::devicescan::temp_storage" ); } diff --git a/PG-PuReMD/src/cuda_reduction.cu b/PG-PuReMD/src/cuda_reduction.cu index 86d99322..fee4595c 100644 --- a/PG-PuReMD/src/cuda_reduction.cu +++ b/PG-PuReMD/src/cuda_reduction.cu @@ -6,6 +6,81 @@ #include "vector.h" +CUDA_GLOBAL void k_reduction_int( const int *input, int *per_block_results, + const size_t n ) +{ +#if defined(__SM_35__) + extern __shared__ int my_iresults[]; + int idata; + unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; + int x = 0, z, offset; + + if( i < n ) + { + x = input[i]; + } + + idata = x; + __syncthreads(); + + for( z = 16; z >=1; z/=2 ) + { + idata += shfl( idata, z ); + } + + if ( threadIdx.x % 32 == 0 ) + { + my_iresults[threadIdx.x >> 5] = idata; + } + + __syncthreads(); + + for( offset = blockDim.x >> 6; offset > 0; offset >>= 1 ) + { + if( threadIdx.x < offset ) + { + my_iresults[threadIdx.x] += my_iresults[threadIdx.x + offset]; + } + + __syncthreads(); + } + + if( threadIdx.x == 0 ) + { + per_block_results[blockIdx.x] = my_iresults[0]; + } + +#else + extern __shared__ int idata[]; + unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; + int x = 0, offset; + + if( i < n ) + { + x = input[i]; + } + + idata[threadIdx.x] = x; + __syncthreads( ); + + for ( offset = blockDim.x / 2; offset > 0; offset >>= 1 ) + { + if( threadIdx.x < offset ) + { + idata[threadIdx.x] += idata[threadIdx.x + offset]; + } + + __syncthreads(); + } + + if( threadIdx.x == 0 ) + { + per_block_results[blockIdx.x] = idata[0]; + } +#endif +} + + CUDA_GLOBAL void k_reduction( const real *input, real *per_block_results, const size_t n ) { @@ -85,7 +160,7 @@ CUDA_GLOBAL void k_reduction_rvec( rvec *input, rvec *results, size_t n ) #if defined(__SM_35__) extern __shared__ rvec my_rvec[]; rvec sdata; - unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int i = blockIdx.x * blockDim.x + threadIdx.x, z, offset; rvec_MakeZero( sdata ); @@ -94,9 +169,9 @@ CUDA_GLOBAL void k_reduction_rvec( rvec *input, rvec *results, size_t n ) rvec_Copy( sdata, input[i] ); } - __syncthreads(); + __syncthreads( ); - for( int z = 16; z >=1; z/=2 ) + for( z = 16; z >=1; z/=2 ) { sdata[0] += shfl( sdata[0], z); sdata[1] += shfl( sdata[1], z); @@ -108,16 +183,16 @@ CUDA_GLOBAL void k_reduction_rvec( rvec *input, rvec *results, size_t n ) rvec_Copy( my_rvec[threadIdx.x >> 5] , sdata ); } - __syncthreads (); + __syncthreads( ); - for( int offset = blockDim.x >> 6; offset > 0; offset >>= 1 ) + for( offset = blockDim.x >> 6; offset > 0; offset >>= 1 ) { if( threadIdx.x < offset ) { rvec_Add( my_rvec[threadIdx.x], my_rvec[threadIdx.x + offset] ); } - __syncthreads(); + __syncthreads( ); } if( threadIdx.x == 0 ) @@ -127,33 +202,33 @@ CUDA_GLOBAL void k_reduction_rvec( rvec *input, rvec *results, size_t n ) #else extern __shared__ rvec svec_data[]; - unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int i = blockIdx.x * blockDim.x + threadIdx.x, offset; rvec x; rvec_MakeZero( x ); - if(i < n) + if ( i < n ) { rvec_Copy( x, input[i] ); } - rvec_Copy(svec_data[threadIdx.x], x); - __syncthreads(); + rvec_Copy( svec_data[threadIdx.x], x ); + __syncthreads( ); - for(int offset = blockDim.x / 2; offset > 0; offset >>= 1) + for ( offset = blockDim.x / 2; offset > 0; offset >>= 1 ) { - if(threadIdx.x < offset) + if ( threadIdx.x < offset ) { - rvec_Add (svec_data[threadIdx.x], svec_data[threadIdx.x + offset]); + rvec_Add( svec_data[threadIdx.x], svec_data[threadIdx.x + offset] ); } - __syncthreads(); + __syncthreads( ); } - if(threadIdx.x == 0) + if ( threadIdx.x == 0 ) { - //rvec_Copy (results[blockIdx.x], svec_data[0]); - rvec_Add (results[blockIdx.x], svec_data[0]); + //rvec_Copy( results[blockIdx.x], svec_data[0] ); + rvec_Add( results[blockIdx.x], svec_data[0] ); } #endif } diff --git a/PG-PuReMD/src/cuda_reduction.h b/PG-PuReMD/src/cuda_reduction.h index a494cc32..0960018e 100644 --- a/PG-PuReMD/src/cuda_reduction.h +++ b/PG-PuReMD/src/cuda_reduction.h @@ -8,6 +8,7 @@ #define FINAL 1 +CUDA_GLOBAL void k_reduction_int( const int *, int *, const size_t ); CUDA_GLOBAL void k_reduction( const real *, real *, const size_t ); CUDA_GLOBAL void k_reduction_rvec( rvec *, rvec *, size_t ); CUDA_GLOBAL void k_reduction_rvec2( rvec2 *, rvec2 *, size_t ); diff --git a/PG-PuReMD/src/cuda_reset_tools.cu b/PG-PuReMD/src/cuda_reset_tools.cu index 66ee2cbd..3da7a9c5 100644 --- a/PG-PuReMD/src/cuda_reset_tools.cu +++ b/PG-PuReMD/src/cuda_reset_tools.cu @@ -42,7 +42,6 @@ void Cuda_Reset_Workspace( reax_system *system, storage *workspace ) CUDA_GLOBAL void k_reset_hindex( reax_atom *my_atoms, int N ) { - int Hindex = 0; int i = blockIdx.x * blockDim.x + threadIdx.x; if (i >= N) diff --git a/PG-PuReMD/src/cuda_valence_angles.cu b/PG-PuReMD/src/cuda_valence_angles.cu index dfe9e769..c0936fc0 100644 --- a/PG-PuReMD/src/cuda_valence_angles.cu +++ b/PG-PuReMD/src/cuda_valence_angles.cu @@ -139,25 +139,16 @@ CUDA_GLOBAL void Cuda_Valence_Angles( reax_atom *my_atoms, for( pi = start_j; pi < end_j; ++pi ) { - - //num_thb_intrs = pi * THREE_BODY_OFFSET; - //Dev_Set_Start_Index( pi, num_thb_intrs, thb_intrs ); num_thb_intrs = Dev_Start_Index( pi, thb_intrs ); pbond_ij = &(bonds->select.bond_list[pi]); bo_ij = &(pbond_ij->bo_data); BOA_ij = bo_ij->BO - control->thb_cut; - //TODO REMOVE THIS - //TODO REMOVE THIS - //TODO REMOVE THIS - //TODO REMOVE THIS - //TODO REMOVE THIS - - if( BOA_ij/*bo_ij->BO*/ > 0.0 && + //if( BOA_ij/*bo_ij->BO*/ > 0.0) { + if ( BOA_ij > 0.0 && ( j < n || pbond_ij->nbr < n ) ) { - // if( BOA_ij/*bo_ij->BO*/ > 0.0) { i = pbond_ij->nbr; r_ij = pbond_ij->d; type_i = my_atoms[i].type; @@ -245,7 +236,7 @@ CUDA_GLOBAL void Cuda_Valence_Angles( reax_atom *my_atoms, ++num_thb_intrs; if ( j < n && BOA_jk > 0.0 && - bo_ij->BO * bo_jk->BO > SQR(control->thb_cut)/*0*/ ) + bo_ij->BO * bo_jk->BO > SQR(control->thb_cut) ) { r_jk = pbond_jk->d; thbh = &( d_thbh[ index_thbp (type_i,type_j,type_k,num_atom_types) ] ); @@ -350,12 +341,12 @@ CUDA_GLOBAL void Cuda_Valence_Angles( reax_atom *my_atoms, need to prevent all energies becoming duplicates */ if ( pk < pi ) { - data_e_pen [j] += e_pen = - p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; + e_pen = p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; + data_e_pen[j] += e_pen; } CEpen1 = e_pen * Cf9j / f9_Dj; - temp = -2.0 * p_pen2 * e_pen; + temp = -2.0 * p_pen2 * e_pen; CEpen2 = temp * (BOA_ij - 2.0); CEpen3 = temp * (BOA_jk - 2.0); /* END PENALTY ENERGY */ @@ -372,12 +363,13 @@ CUDA_GLOBAL void Cuda_Valence_Angles( reax_atom *my_atoms, if ( pk < pi ) { - data_e_coa [j] += e_coa = + e_coa = p_coa1 / (1. + exp_coa2) * EXP( -p_coa3 * SQR(workspace->total_bond_order[i]-BOA_ij) ) * EXP( -p_coa3 * SQR(workspace->total_bond_order[k]-BOA_jk) ) * EXP( -p_coa4 * SQR(BOA_ij - 1.5) ) * EXP( -p_coa4 * SQR(BOA_jk - 1.5) ); + data_e_coa [j] += e_coa; } CEcoa1 = -2 * p_coa4 * (BOA_ij - 1.5) * e_coa; @@ -561,7 +553,7 @@ CUDA_GLOBAL void Cuda_Valence_Angles( reax_atom *my_atoms, } } - Dev_Set_End_Index(pi, num_thb_intrs, thb_intrs ); + Dev_Set_End_Index( pi, num_thb_intrs, thb_intrs ); } } diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c index 8bdc36a6..b050392a 100644 --- a/PG-PuReMD/src/init_md.c +++ b/PG-PuReMD/src/init_md.c @@ -1037,8 +1037,7 @@ int Cuda_Init_Lists( reax_system *system, control_params *control, #endif /* 3bodies list */ - thbody = (int *) host_scratch; - memset( thbody, 0, sizeof(int) * (*dev_lists + BONDS)->num_intrs ); + thbody = (int *) scratch; Cuda_Estimate_Storages_Three_Body( system, control, dev_lists, &total_3body, thbody ); diff --git a/PG-PuReMD/src/parallelreax.c b/PG-PuReMD/src/parallelreax.c index 5220aa58..b5681c33 100644 --- a/PG-PuReMD/src/parallelreax.c +++ b/PG-PuReMD/src/parallelreax.c @@ -156,7 +156,7 @@ int Cuda_Post_Evolve( reax_system* system, control_params* control, #ifdef HAVE_CUDA -void init_blocks(reax_system *system) +void init_blocks( reax_system *system ) { compute_blocks( &BLOCKS, &BLOCK_SIZE, system->n ); compute_nearest_pow_2( BLOCKS, &BLOCKS_POW_2 ); diff --git a/PG-PuReMD/src/reax_types.h b/PG-PuReMD/src/reax_types.h index 0ddb0523..7876218f 100644 --- a/PG-PuReMD/src/reax_types.h +++ b/PG-PuReMD/src/reax_types.h @@ -1095,6 +1095,7 @@ typedef struct int max_sparse_entries; /**/ int num_thbodies; + int *d_num_thbodies; //TODO: move to reax_atom int max_hbonds; } reax_system; -- GitLab