From 20a75e0f3eb106f47627ff4f01c9d1f4edca2cb2 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@msu.edu> Date: Thu, 22 Jun 2017 08:39:27 -0400 Subject: [PATCH] PG-PuReMD: add hydrogen bonding. --- PG-PuReMD/src/allocate.c | 11 +- PG-PuReMD/src/cuda_allocate.cu | 27 +- PG-PuReMD/src/cuda_bond_orders.cu | 25 +- PG-PuReMD/src/cuda_copy.cu | 68 ++++-- PG-PuReMD/src/cuda_forces.cu | 353 +++++++++++++-------------- PG-PuReMD/src/cuda_forces.h | 2 +- PG-PuReMD/src/cuda_hydrogen_bonds.cu | 140 ++++++----- PG-PuReMD/src/cuda_neighbors.cu | 163 +++++++++++-- PG-PuReMD/src/cuda_neighbors.h | 2 +- PG-PuReMD/src/cuda_reset_tools.cu | 123 ++-------- PG-PuReMD/src/cuda_reset_tools.h | 3 + PG-PuReMD/src/cuda_validation.cu | 6 +- PG-PuReMD/src/ffield.c | 97 +++----- PG-PuReMD/src/forces.c | 111 ++++++--- PG-PuReMD/src/hydrogen_bonds.c | 158 ++++++------ PG-PuReMD/src/index_utils.h | 8 +- PG-PuReMD/src/init_md.c | 89 ++----- PG-PuReMD/src/integrate.c | 21 +- PG-PuReMD/src/io_tools.c | 81 +++++- PG-PuReMD/src/io_tools.h | 4 +- PG-PuReMD/src/parallelreax.c | 41 ++-- PG-PuReMD/src/reax_types.h | 37 +-- PG-PuReMD/src/reset_tools.c | 4 +- PG-PuReMD/src/traj.c | 138 +++++++---- PG-PuReMD/src/traj.h | 30 ++- 25 files changed, 958 insertions(+), 784 deletions(-) diff --git a/PG-PuReMD/src/allocate.c b/PG-PuReMD/src/allocate.c index c1811b52..2a4e48f3 100644 --- a/PG-PuReMD/src/allocate.c +++ b/PG-PuReMD/src/allocate.c @@ -83,7 +83,7 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) { int i; - if ( !workspace->allocated ) + if ( workspace->allocated == FALSE ) { return; } @@ -204,13 +204,6 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) sfree( workspace->id_all, "id_all" ); sfree( workspace->f_all, "f_all" ); #endif - - /* hbond storage */ - //sfree( workspace->Hindex, "Hindex" ); - //sfree( workspace->num_bonds ); - //sfree( workspace->num_hbonds ); - //sfree( workspace->hash, "hash" ); - //sfree( workspace->rev_hash, "rev_hash" ); } @@ -409,7 +402,7 @@ void Reallocate_HBonds_List( reax_system *system, reax_list *hbonds ) if ( (id = system->my_atoms[i].Hindex) >= 0 ) { system->my_atoms[i].num_hbonds = MAX( Num_Entries(id, hbonds) * SAFER_ZONE, - MIN_HBONDS ); + MIN_HBONDS ); total_hbonds += system->my_atoms[i].num_hbonds; } } diff --git a/PG-PuReMD/src/cuda_allocate.cu b/PG-PuReMD/src/cuda_allocate.cu index c16f17a6..dcf99114 100644 --- a/PG-PuReMD/src/cuda_allocate.cu +++ b/PG-PuReMD/src/cuda_allocate.cu @@ -162,6 +162,7 @@ void dev_alloc_system( reax_system *system ) cuda_malloc( (void **) &system->d_my_atoms, system->total_cap * sizeof(reax_atom), TRUE, "system:d_my_atoms" ); + cuda_malloc( (void **) &system->d_numH, sizeof(int), TRUE, "system:d_numH" ); /* list management */ cuda_malloc( (void **) &system->d_far_nbrs, @@ -625,6 +626,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, realloc->far_nbrs = FALSE; } } + fprintf( stderr, " [CUDA_REALLOCATE::far nbrs]\n" ); /* charge coef matrix */ //if( nflag || realloc->Htop >= system->max_sparse_entries * DANGER_ZONE ) { @@ -671,27 +673,26 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, //workspace->U = NULL; realloc->Htop = 0; } + fprintf( stderr, " [CUDA_REALLOCATE::charge matrix\n" ); /* hydrogen bonds list */ - // FIX - 4 - Added additional check here for hydrogen Bond fix - if ( control->hbond_cut > 0 && system->numH > 0 ) + if ( control->hbond_cut > 0.0 && system->numH > 0 ) { - if ( Nflag == TRUE || realloc->hbonds ) + if ( Nflag == TRUE || realloc->hbonds == TRUE ) { - fprintf( stderr, "p:%d - *** Reallocating Hbonds *** Step:%d\n", system->my_rank, data->step ); - //MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); - - Cuda_Reallocate_HBonds_List( (*dev_lists) + HBONDS, system->total_cap, realloc->num_hbonds ); -// Cuda_Init_HBond_Indices( system ); - realloc->hbonds = 0; - #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: reallocating hbonds: total_hbonds=%d space=%dMB\n", - system->my_rank, ret, (int)(ret * sizeof(hbond_data) / (1024 * 1024)) ); + system->my_rank, system->total_hbonds, + (int)(system->total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); #endif + + Cuda_Reallocate_HBonds_List( (*dev_lists) + HBONDS, system->total_cap, system->total_hbonds ); + Cuda_Init_HBond_Indices( system ); + realloc->hbonds = FALSE; } } + fprintf( stderr, " [CUDA_REALLOCATE::hbonds\n" ); /* bonds list */ if ( Nflag == TRUE || realloc->bonds == TRUE ) @@ -706,6 +707,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, Cuda_Init_Bond_Indices( system ); realloc->bonds = FALSE; } + fprintf( stderr, " [CUDA_REALLOCATE::bonds\n" ); /* 3-body list */ if ( Nflag == TRUE || realloc->num_3body > 0 ) @@ -721,6 +723,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, (*dev_lists + BONDS)->num_intrs, system->total_thbodies ); realloc->num_3body = -1; } + fprintf( stderr, " [CUDA_REALLOCATE::thbody\n" ); /* grid */ if ( renbr && realloc->gcell_atoms > -1 ) @@ -753,6 +756,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, //dev_alloc_grid_cell_atoms (system, realloc->gcell_atoms); realloc->gcell_atoms = -1; } + fprintf( stderr, " [CUDA_REALLOCATE::grid\n" ); /* mpi buffers */ // we have to be at a renbring step - @@ -825,6 +829,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, Deallocate_MPI_Buffers( mpi_data ); Allocate_MPI_Buffers( mpi_data, system->est_recv, system->my_nbrs, msg ); } + fprintf( stderr, " [CUDA_REALLOCATE::MPI buffers\n" ); #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ step%d: reallocate done\n", diff --git a/PG-PuReMD/src/cuda_bond_orders.cu b/PG-PuReMD/src/cuda_bond_orders.cu index 9910fad3..b0ee399f 100644 --- a/PG-PuReMD/src/cuda_bond_orders.cu +++ b/PG-PuReMD/src/cuda_bond_orders.cu @@ -784,24 +784,28 @@ CUDA_DEVICE void Cuda_dbond_to_Forces_postprocess( int i, reax_atom *atoms, } -CUDA_GLOBAL void ker_total_forces_postprocess( reax_atom *my_atoms, +CUDA_GLOBAL void k_total_forces_postprocess( reax_atom *my_atoms, reax_list p_bonds, storage p_workspace, int N ) { - int i = blockIdx.x * blockDim.x + threadIdx.x; + int i; + reax_list *bonds; + storage *workspace; + + i = blockIdx.x * blockDim.x + threadIdx.x; if ( i >= N ) { return; } - reax_list *bonds = &( p_bonds ); - storage *workspace = &( p_workspace ); + bonds = &p_bonds; + workspace = &p_workspace; Cuda_dbond_to_Forces_postprocess( i, my_atoms, bonds, workspace ); } -CUDA_GLOBAL void ker_total_forces( storage p_workspace, reax_list p_bonds, +CUDA_GLOBAL void k_total_forces( storage p_workspace, reax_list p_bonds, control_params *control, simulation_data *data, rvec *data_ext_press, int N ) { @@ -846,7 +850,7 @@ void Cuda_Total_Forces( reax_system *system, control_params *control, blocks = system->N / DEF_BLOCK_SIZE + ((system->N % DEF_BLOCK_SIZE == 0) ? 0 : 1); - ker_total_forces <<< blocks, DEF_BLOCK_SIZE >>> + k_total_forces <<< blocks, DEF_BLOCK_SIZE >>> ( *dev_workspace, *(*dev_lists + BONDS), (control_params *) control->d_control_params, (simulation_data *)data->d_simulation_data, @@ -869,14 +873,14 @@ void Cuda_Total_Forces( reax_system *system, control_params *control, } //do the post processing for the atomic forces here - ker_total_forces_postprocess <<< blocks, DEF_BLOCK_SIZE >>> + k_total_forces_postprocess <<< blocks, DEF_BLOCK_SIZE >>> ( system->d_my_atoms, *(*dev_lists + BONDS), *dev_workspace, system->N ); cudaThreadSynchronize( ); cudaCheckError( ); } -CUDA_GLOBAL void ker_total_forces_pure( reax_atom *my_atoms, int n, +CUDA_GLOBAL void k_total_forces_pure( reax_atom *my_atoms, int n, storage p_workspace ) { int i; @@ -889,7 +893,7 @@ CUDA_GLOBAL void ker_total_forces_pure( reax_atom *my_atoms, int n, return; } - workspace = &( p_workspace ); + workspace = &p_workspace; rvec_Copy( my_atoms[i].f, workspace->f[i] ); } @@ -901,7 +905,8 @@ void Cuda_Total_Forces_PURE( reax_system *system, storage *workspace ) blocks = system->n / DEF_BLOCK_SIZE + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); - ker_total_forces_pure <<< blocks, DEF_BLOCK_SIZE >>> + + k_total_forces_pure <<< blocks, DEF_BLOCK_SIZE >>> ( system->d_my_atoms, system->n, *dev_workspace); cudaThreadSynchronize( ); cudaCheckError( ); diff --git a/PG-PuReMD/src/cuda_copy.cu b/PG-PuReMD/src/cuda_copy.cu index a28dbeb4..a3bfca30 100644 --- a/PG-PuReMD/src/cuda_copy.cu +++ b/PG-PuReMD/src/cuda_copy.cu @@ -5,6 +5,7 @@ #include "vector.h" +/* Copy grid info from host to device */ void Sync_Grid( grid *host, grid *device ) { int total; @@ -47,59 +48,62 @@ void Sync_Grid( grid *host, grid *device ) } +/* Copy atom info from host to device */ void Sync_Atoms( reax_system *sys ) { //TODO METIN FIX, coredump on his machine - //copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) * sys->total_cap, cudaMemcpyHostToDevice, "system:my_atoms" ); +// copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) * sys->total_cap, +// cudaMemcpyHostToDevice, "Sync_Atoms::system->my_atoms" ); #if defined(__CUDA_DEBUG_LOG__) fprintf( stderr, "p:%d - Synching atoms: n: %d N: %d, total_cap: %d \n", sys->my_rank, sys->n, sys->N, sys->total_cap ); #endif - copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) * - sys->N, cudaMemcpyHostToDevice, "system:my_atoms" ); + copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) * sys->N, + cudaMemcpyHostToDevice, "Sync_Atoms::system->my_atoms" ); //TODO METIN FIX, coredump on his machine } +/* Copy atomic system info from host to device */ void Sync_System( reax_system *sys ) { - //fprintf (stderr, "p:%d - trying to copy atoms : %d \n", sys->my_rank, sys->local_cap); Sync_Atoms( sys ); copy_host_device( &sys->my_box, sys->d_my_box, sizeof(simulation_box), - cudaMemcpyHostToDevice, "system:my_box" ); + cudaMemcpyHostToDevice, "Sync_System::system->my_box" ); copy_host_device( &sys->my_ext_box, sys->d_my_ext_box, sizeof(simulation_box), cudaMemcpyHostToDevice, - "system:my_ext_box" ); + "Sync_System::system->my_ext_box" ); copy_host_device( sys->reax_param.sbp, sys->reax_param.d_sbp, sizeof(single_body_parameters) * sys->reax_param.num_atom_types, - cudaMemcpyHostToDevice, "system:sbp" ); + cudaMemcpyHostToDevice, "Sync_System::system->sbp" ); copy_host_device( sys->reax_param.tbp, sys->reax_param.d_tbp, sizeof(two_body_parameters) * POW(sys->reax_param.num_atom_types, 2), - cudaMemcpyHostToDevice, "system:tbp" ); + cudaMemcpyHostToDevice, "Sync_System::system->tbp" ); copy_host_device( sys->reax_param.thbp, sys->reax_param.d_thbp, sizeof(three_body_header) * POW(sys->reax_param.num_atom_types, 3), - cudaMemcpyHostToDevice, "system:thbh" ); + cudaMemcpyHostToDevice, "Sync_System::system->thbh" ); copy_host_device( sys->reax_param.hbp, sys->reax_param.d_hbp, sizeof(hbond_parameters) * POW(sys->reax_param.num_atom_types, 3), - cudaMemcpyHostToDevice, "system:hbond" ); + cudaMemcpyHostToDevice, "Sync_System::system->hbond" ); copy_host_device( sys->reax_param.fbp, sys->reax_param.d_fbp, sizeof(four_body_header) * POW(sys->reax_param.num_atom_types, 4), - cudaMemcpyHostToDevice, "system:four_header" ); + cudaMemcpyHostToDevice, "Sync_System::system->four_header" ); copy_host_device( sys->reax_param.gp.l, sys->reax_param.d_gp.l, sizeof(real) * sys->reax_param.gp.n_global, cudaMemcpyHostToDevice, - "system:global_parameters" ); + "Sync_System::system->global_parameters" ); sys->reax_param.d_gp.n_global = sys->reax_param.gp.n_global; sys->reax_param.d_gp.vdw_type = sys->reax_param.gp.vdw_type; } +/* Copy atom info from device to host */ void Output_Sync_Atoms( reax_system *sys ) { copy_host_device( sys->my_atoms, sys->d_my_atoms, sizeof(reax_atom) * @@ -107,6 +111,7 @@ void Output_Sync_Atoms( reax_system *sys ) } +/* Copy simulation data from device to host */ void Output_Sync_Simulation_Data( simulation_data *host, simulation_data *dev ) { copy_host_device( &host->my_en, &dev->my_en, sizeof(energy_data), @@ -120,36 +125,49 @@ void Output_Sync_Simulation_Data( simulation_data *host, simulation_data *dev ) } +/* Copy interaction lists from device to host */ void Output_Sync_Lists( reax_list *host, reax_list *device, int type ) { - //fprintf (stderr, " Trying to copy *%d* list from device to host \n", type); - - //list is already allocated -- discard it first - //if (host->n > 0) - //if (host->allocated > 0) - // Delete_List (host); +#if defined(DEBUG) + fprintf( stderr, " Trying to copy *%d* list from device to host \n", type ); +#endif - //memory is allocated on the host - //Make_List(device->n, device->num_intrs, type, host); +// if ( host->allocated == TRUE ) +// { +// Delete_List( host ); +// } +// Make_List( device->n, device->num_intrs, type, host ); copy_host_device( host->index, device->index, sizeof(int) * device->n, - cudaMemcpyDeviceToHost, "output_sync_list:list:index" ); + cudaMemcpyDeviceToHost, "Output_Sync_Lists::list->index" ); copy_host_device( host->end_index, device->end_index, sizeof(int) * - device->n, cudaMemcpyDeviceToHost, "output_sync:list:end_index" ); + device->n, cudaMemcpyDeviceToHost, "Output_Sync_Lists::list->end_index" ); - switch (type) + switch ( type ) { + case TYP_FAR_NEIGHBOR: + copy_host_device( host->select.far_nbr_list, device->select.far_nbr_list, + sizeof(far_neighbor_data) * device->num_intrs, + cudaMemcpyDeviceToHost, "Output_Sync_Lists::far_neighbor_list" ); + break; + case TYP_BOND: copy_host_device( host->select.bond_list, device->select.bond_list, sizeof(bond_data) * device->num_intrs, - cudaMemcpyDeviceToHost, "bond_list" ); + cudaMemcpyDeviceToHost, "Output_Sync_Lists::bond_list" ); + break; + + case TYP_HBOND: + copy_host_device( host->select.hbond_list, device->select.hbond_list, + sizeof(hbond_data) * device->num_intrs, + cudaMemcpyDeviceToHost, "Output_Sync_Lists::hbond_list" ); break; case TYP_THREE_BODY: copy_host_device( host->select.three_body_list, device->select.three_body_list, sizeof(three_body_interaction_data )* device->num_intrs, - cudaMemcpyDeviceToHost, "three_body_list" ); + cudaMemcpyDeviceToHost, "Output_Sync_Lists::three_body_list" ); break; default: diff --git a/PG-PuReMD/src/cuda_forces.cu b/PG-PuReMD/src/cuda_forces.cu index c8e4ebe4..67009b7c 100644 --- a/PG-PuReMD/src/cuda_forces.cu +++ b/PG-PuReMD/src/cuda_forces.cu @@ -32,7 +32,7 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms, control_params *control, reax_list far_nbrs, int num_atom_types, int n, int N, int Hcap, int total_cap, int *Htop, int *bonds, int *max_bonds, int *realloc_bonds, - int *hbonds ) + int *hbonds, int *max_hbonds, int *realloc_hbonds ) { int i, j, pj; int start_i, end_i; @@ -68,15 +68,17 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms, local = TRUE; cutoff = control->nonb_cut; atomicAdd( Htop, 1 ); - ihb = sbp_i->p_hbond; +// ihb = sbp_i->p_hbond; } else { local = FALSE; cutoff = control->bond_cut; - ihb = -1; +// ihb = NON_H_BONDING_ATOM; } + ihb = NON_H_BONDING_ATOM; + for ( pj = start_i; pj < end_i; ++pj ) { nbr_pj = &( far_nbrs.select.far_nbr_list[pj] ); @@ -89,17 +91,19 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms, sbp_j = &(sbp[type_j]); ihb = sbp_i->p_hbond; jhb = sbp_j->p_hbond; - if ( control->hbond_cut > 0.1 - && nbr_pj->d <= control->hbond_cut - && ihb == 2 && jhb == 1 && j < n && i > n ) + + /* atom i: H bonding, ghost + * atom j: H atom, native */ + if ( control->hbond_cut > 0.0 && nbr_pj->d <= control->hbond_cut + && ihb == H_BONDING_ATOM && jhb == H_ATOM && i >= n && j < n ) { atomicAdd( hbonds + i, 1 ); } - if ( i >= n ) - { - ihb = -1; - } +// if ( i >= n ) +// { +// ihb = NON_H_BONDING_ATOM; +// } } if ( nbr_pj->d <= cutoff ) @@ -107,7 +111,7 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms, type_j = my_atoms[j].type; r_ij = nbr_pj->d; sbp_j = &(sbp[type_j]); - twbp = &(tbp[index_tbp (type_i,type_j,num_atom_types)]); + twbp = &(tbp[ index_tbp(type_i ,type_j, num_atom_types) ]); if ( local == TRUE ) { @@ -120,15 +124,21 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms, atomicAdd( Htop, 1 ); } - if ( control->hbond_cut > 0.1 && (ihb == 1 || ihb == 2) && + /* atom i: H atom OR H bonding atom, native */ + if ( control->hbond_cut > 0.0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) && nbr_pj->d <= control->hbond_cut ) { jhb = sbp_j->p_hbond; - if( ihb == 1 && jhb == 2 ) + + /* atom i: H atom, native + * atom j: H bonding atom */ + if( ihb == H_ATOM && jhb == H_BONDING_ATOM ) { atomicAdd( hbonds + i, 1 ); } - else if( ihb == 2 && jhb == 1 && j < n ) + /* atom i: H bonding atom, native + * atom j: H atom, native */ + else if( ihb == H_BONDING_ATOM && jhb == H_ATOM && j < n ) { atomicAdd( hbonds + i, 1 ); } @@ -186,20 +196,20 @@ CUDA_GLOBAL void k_estimate_storages( reax_atom *my_atoms, else { bonds[i] = MIN_BONDS; - hbonds[i] = MIN_HBONDS; + hbonds[i] = 0; } if ( bonds[i] > max_bonds[i] ) { - max_bonds[i] = MAX( (int)(bonds[i] * SAFE_ZONE), MIN_BONDS ); + max_bonds[i] = MAX( (int)(bonds[i] * 2), MIN_BONDS ); *realloc_bonds = TRUE; } -// if ( hbonds[i] > max_hbonds[i] ) -// { -// max_hbonds[i] = MAX( (int)(hbonds[i] * SAFE_ZONE), MIN_HBONDS ); -// *realloc_hbonds = TRUE; -// } + if ( hbonds[i] > max_hbonds[i] ) + { + max_hbonds[i] = MAX( (int)(hbonds[i] * SAFE_ZONE), MIN_HBONDS ); + *realloc_hbonds = TRUE; + } } @@ -227,14 +237,12 @@ CUDA_GLOBAL void k_init_system_atoms( reax_atom *my_atoms, int N, int Cuda_Estimate_Storages( reax_system *system, control_params *control, - reax_list **lists, int *Htop, int *hb_top, int step ) + reax_list **lists, int *Htop, int step ) { int i, ret, ret_bonds, ret_hbonds; int blocks = 0; - int *d_Htop, *d_hb_top; + int *d_Htop; int *tmp = (int*) scratch; - int hbond_count = 0; - int max_hbonds = 0, min_hbonds = 999999; ret = SUCCESS; @@ -250,11 +258,8 @@ int Cuda_Estimate_Storages( reax_system *system, control_params *control, "Cuda_Estimate_Storages::d_hbonds" ); d_Htop = tmp; - d_hb_top = d_Htop + 1; cuda_memset( d_Htop, 0, sizeof(int), "Cuda_Estimate_Storages::dHtop" ); - cuda_memset( d_hb_top, 0, system->total_cap * sizeof(int), - "Cuda_Estimate_Storages::d_hb_top" ); blocks = (int)CEIL( (real)system->total_cap / ST_BLOCK_SIZE ); @@ -264,14 +269,12 @@ int Cuda_Estimate_Storages( reax_system *system, control_params *control, *(*dev_lists + FAR_NBRS), system->reax_param.num_atom_types, system->n, system->N, system->Hcap, system->total_cap, d_Htop, system->d_bonds, system->d_max_bonds, system->d_realloc_bonds, - d_hb_top ); + system->d_hbonds, system->d_max_hbonds, system->d_realloc_hbonds ); cudaThreadSynchronize( ); cudaCheckError( ); copy_host_device( Htop, d_Htop, sizeof(int), cudaMemcpyDeviceToHost, "Htop"); - copy_host_device( hb_top, d_hb_top, sizeof(int) * system->total_cap, - cudaMemcpyDeviceToHost, "hb_top"); /* check reallocation flags on device */ copy_host_device( &ret_bonds, system->d_realloc_bonds, sizeof(int), @@ -294,55 +297,40 @@ int Cuda_Estimate_Storages( reax_system *system, control_params *control, ret = FAILURE; } -// if ( ret_hbonds == TRUE ) -// { -// Cuda_Reduction_Sum( system->d_max_hbonds, system->d_total_hbonds, -// system->total_cap ); -// -// copy_host_device( &(system->total_hbonds), system->d_total_hbonds, sizeof(int), -// cudaMemcpyDeviceToHost, "Cuda_Estimate_Storages::d_total_hbonds" ); -// -// fprintf( stderr, "system->total_hbonds = %d\n", system->total_bonds ); -// -// if ( step > 0 ) -// { -// dev_workspace->realloc.hbonds = TRUE; -// } -// ret = FAILURE; -// } - - //TODO: change - for ( i = 0; i < system->N; i++ ) + if ( system->numH > 0 && control->hbond_cut > 0.0 && ret_hbonds == TRUE ) { - if ( hb_top[i] >= max_hbonds ) + Cuda_Reduction_Sum( system->d_max_hbonds, system->d_total_hbonds, + system->total_cap ); + + copy_host_device( &(system->total_hbonds), system->d_total_hbonds, sizeof(int), + cudaMemcpyDeviceToHost, "Cuda_Estimate_Storages::d_total_hbonds" ); + + if ( step > 0 ) { - max_hbonds = hb_top[i]; + dev_workspace->realloc.hbonds = TRUE; } - if ( hb_top[i] <= min_hbonds ) + ret = FAILURE; + } + else + { + /* if number of hydrogen atoms is 0, disable hydrogen bond functionality */ + if ( system->numH == 0 && step == 0 ) { - min_hbonds = hb_top[i]; + fprintf( stderr, "WARNING: DISABLING HYDROGEN BONDS\n" ); + control->hbond_cut = 0.0; + k_disable_hydrogen_bonding <<< 1, 1 >>> ( (control_params *)control->d_control_params ); } - - hbond_count += hb_top[i]; } - system->max_hbonds = max_hbonds * SAFER_ZONE; #if defined(DEBUG) fprintf( stderr, "p:%d -->\n", system->my_rank ); fprintf( stderr, " TOTAL DEVICE BOND COUNT: %d \n", system->total_bonds ); - fprintf( stderr, " TOTAL DEVICE HBOND COUNT: %d \n", hbond_count ); + fprintf( stderr, " TOTAL DEVICE HBOND COUNT: %d \n", system->total_hbonds ); fprintf( stderr, " TOTAL DEVICE SPARSE COUNT: %d \n", *Htop ); #endif - /* if number of hydrogen atoms is 0, disable hydrogen bond functionality */ - if ( hbond_count == 0 ) - { - control->hbond_cut = 0.0; - k_disable_hydrogen_bonding <<< 1, 1 >>> ( (control_params *)control->d_control_params ); - } - k_init_system_atoms <<< blocks, ST_BLOCK_SIZE >>> - ( system->d_my_atoms, system->N, d_hb_top, system->d_bonds ); + ( system->d_my_atoms, system->N, system->d_hbonds, system->d_bonds ); cudaThreadSynchronize( ); cudaCheckError( ); @@ -613,11 +601,48 @@ int Cuda_Estimate_Storage_Sparse_Matrix( reax_system *system, control_params *co } +CUDA_GLOBAL void k_print_hbond_info( reax_atom *my_atoms, single_body_parameters *sbp, + control_params *control, reax_list hbonds, int N ) +{ + int i; + int type_i; + int ihb, ihb_top; + single_body_parameters *sbp_i; + reax_atom *atom_i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= N ) + { + return; + } + + atom_i = &(my_atoms[i]); + type_i = atom_i->type; + sbp_i = &(sbp[type_i]); + + if ( control->hbond_cut > 0.0 ) + { + ihb = sbp_i->p_hbond; + if ( ihb == H_ATOM || ihb == H_BONDING_ATOM ) + { + ihb_top = Dev_Start_Index( atom_i->Hindex, &hbonds ); + } + else + { + ihb_top = -1; + } + } + + printf( "atom %6d: ihb = %2d, ihb_top = %2d\n", i, ihb, ihb_top ); +} + + CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp, two_body_parameters *tbp, storage workspace, control_params *control, reax_list far_nbrs, reax_list bonds, reax_list hbonds, LR_lookup_table *t_LR, int n, int N, int num_atom_types, - int max_sparse_entries, int renbr, int max_hbonds ) + int max_sparse_entries, int renbr ) { int i, j, pj; int start_i, end_i; @@ -649,7 +674,6 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp start_i = Dev_Start_Index( i, &far_nbrs ); end_i = Dev_End_Index( i, &far_nbrs ); btop_i = Dev_Start_Index( i, &bonds ); - sbp_i = &(sbp[type_i]); if ( i < n ) @@ -669,9 +693,8 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp workspace.bond_mark[i] = 1000; } - ihb = -1; + ihb = NON_H_BONDING_ATOM; ihb_top = -1; - //CHANGE ORIGINAL H->start[i] = Htop; if ( local == TRUE ) @@ -680,18 +703,14 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp H->entries[Htop].val = sbp_i->eta; ++Htop; } - //CHANGE ORIGINAL if ( control->hbond_cut > 0.0 ) { ihb = sbp_i->p_hbond; - //CHANGE ORIGINAL - if ( ihb == 1 || ihb == 2 ) + + if ( ihb == H_ATOM || ihb == H_BONDING_ATOM ) { - //CHANGE ORIGINAL - //ihb_top = Dev_Start_Index( atom_i->Hindex, &hbonds ); - ihb_top = i * max_hbonds; - Dev_Set_Start_Index( atom_i->Hindex, ihb_top, &hbonds ); + ihb_top = Dev_Start_Index( atom_i->Hindex, &hbonds ); } else { @@ -705,6 +724,7 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp nbr_pj = &( far_nbrs.select.far_nbr_list[pj] ); j = nbr_pj->nbr; atom_j = &(my_atoms[j]); + if ( renbr ) { if ( nbr_pj->d <= cutoff ) @@ -764,12 +784,15 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp } if ( flag2 == TRUE ) { - ihb = sbp_i->p_hbond; type_j = atom_j->type; sbp_j = &(sbp[type_j]); + ihb = sbp_i->p_hbond; jhb = sbp_j->p_hbond; + + /* atom i: H bonding, ghost + * atom j: H atom, native */ if ( control->hbond_cut > 0.0 && nbr_pj->d <= control->hbond_cut - && ihb == 2 && jhb == 1 && i >= n && j < n ) + && ihb == H_BONDING_ATOM && jhb == H_ATOM && i >= n && j < n ) { hbonds.select.hbond_list[ihb_top].nbr = j; hbonds.select.hbond_list[ihb_top].scl = -1; @@ -848,14 +871,17 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp //bool condition = !((i >= n) && (j >= n)); /* hydrogen bond lists */ - if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) && - nbr_pj->d <= control->hbond_cut // && i < j - ) + if ( control->hbond_cut > 0.0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) && + nbr_pj->d <= control->hbond_cut ) { jhb = sbp_j->p_hbond; - if ( ihb == 1 && jhb == 2 ) + + /* atom i: H atom, native + * atom j: H bonding atom */ + if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) { hbonds.select.hbond_list[ihb_top].nbr = j; + if ( i < j ) { hbonds.select.hbond_list[ihb_top].scl = 1; @@ -872,7 +898,9 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp ++ihb_top; } - else if ( ihb == 2 && jhb == 1 && j < n ) + /* atom i: H bonding atom, native + * atom j: H atom, native */ + else if ( ihb == H_BONDING_ATOM && jhb == H_ATOM && j < n ) { //jhb_top = End_Index( atom_j->Hindex, hbonds ); hbonds.select.hbond_list[ihb_top].nbr = j; @@ -884,9 +912,6 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp rvec_MakeZero( hbonds.select.hbond_list[ihb_top].hb_f ); ++ihb_top; - - //Set_End_Index( atom_j->Hindex, jhb_top+1, hbonds ); - //++num_hbonds; } } } @@ -900,26 +925,28 @@ CUDA_GLOBAL void k_init_forces( reax_atom *my_atoms, single_body_parameters *sbp //num_bonds += 2; ++btop_i; - /* Need to do later... since i and j are parallel - if( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) - workspace->bond_mark[j] = workspace->bond_mark[i] + 1; - else if( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) { - workspace->bond_mark[i] = workspace->bond_mark[j] + 1; - } - */ + /* Need to do later... since i and j are parallel */ +// if( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) +// { +// workspace->bond_mark[j] = workspace->bond_mark[i] + 1; +// } +// else if( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) +// { +// workspace->bond_mark[i] = workspace->bond_mark[j] + 1; +// } } } } Dev_Set_End_Index( i, btop_i, &bonds ); - // if( local == TRUE ) { H->end[i] = Htop; - // } - //CHANGE ORIGINAL - if ( ( ihb == 1 || ihb == 2 ) && ihb_top > 0 && control->hbond_cut > 0.0 ) - { - Dev_Set_End_Index( atom_i->Hindex, ihb_top, &hbonds ); - } +// if( local == TRUE ) +// { + if ( control->hbond_cut > 0.0 && ihb_top > 0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) ) + { + Dev_Set_End_Index( atom_i->Hindex, ihb_top, &hbonds ); + } +// } //} Commented for cuda kernel } @@ -998,13 +1025,13 @@ CUDA_GLOBAL void New_fix_sym_hbond_indices( reax_atom *my_atoms, reax_list hbond } i = warp_id; - j = start + lane_id; start = Dev_Start_Index( my_atoms[i].Hindex, &hbonds ); end = Dev_End_Index( my_atoms[i].Hindex, &hbonds ); + j = start + lane_id; while ( j < end ) { - ihbond = &( hbonds.select.hbond_list [j] ); + ihbond = &( hbonds.select.hbond_list[j] ); nbr = ihbond->nbr; nbrstart = Dev_Start_Index( my_atoms[nbr].Hindex, &hbonds ); @@ -1012,7 +1039,7 @@ CUDA_GLOBAL void New_fix_sym_hbond_indices( reax_atom *my_atoms, reax_list hbond for ( k = nbrstart; k < nbrend; k++ ) { - jhbond = &( hbonds.select.hbond_list [k] ); + jhbond = &( hbonds.select.hbond_list[k] ); if ( jhbond->nbr == i ) { @@ -1040,8 +1067,9 @@ CUDA_GLOBAL void k_update_bonds( reax_atom *my_atoms, reax_list bonds, int n ) return; } - my_atoms[i].num_bonds = - MAX( Dev_Num_Entries(i, &bonds) * 2, MIN_BONDS ); +// my_atoms[i].num_bonds = +// MAX( Dev_Num_Entries(i, &bonds) * 2, MIN_BONDS ); + my_atoms[i].num_bonds = Dev_Num_Entries( i, &bonds ); } @@ -1058,8 +1086,9 @@ CUDA_GLOBAL void k_update_hbonds( reax_atom *my_atoms, reax_list hbonds, int n ) } Hindex = my_atoms[i].Hindex; - my_atoms[i].num_hbonds = - MAX( Dev_Num_Entries(Hindex, &hbonds) * SAFER_ZONE, MIN_HBONDS ); +// my_atoms[i].num_hbonds = +// MAX( Dev_Num_Entries(Hindex, &hbonds) * SAFER_ZONE, MIN_HBONDS ); + my_atoms[i].num_hbonds = Dev_Num_Entries( Hindex, &hbonds ); } //////////////////////// //////////////////////// @@ -1076,10 +1105,8 @@ int Cuda_Validate_Lists( reax_system *system, storage *workspace, int *thbody; reax_list *bonds, *hbonds; reallocate_data *realloc; - int max_sp_entries, num_hbonds, num_bonds; + int max_sp_entries; int total_sp_entries; - int max_hbonds; - real *spad = (real *) scratch; realloc = &( dev_workspace->realloc ); blocks = system->n / DEF_BLOCK_SIZE + @@ -1097,8 +1124,7 @@ int Cuda_Validate_Lists( reax_system *system, storage *workspace, if ( control->hbond_cut > 0.0 && system->numH > 0 ) { k_update_hbonds <<< blocks, DEF_BLOCK_SIZE >>> - (system->d_my_atoms, *(*lists + HBONDS), - system->n); + ( system->d_my_atoms, *(*lists + HBONDS), system->n ); cudaThreadSynchronize( ); cudaCheckError( ); } @@ -1166,68 +1192,6 @@ int Cuda_Validate_Lists( reax_system *system, storage *workspace, } fprintf( stderr, " [sparse_matrix: %d]\n", ret ); - /* validate Hbonds list */ - num_hbonds = 0; - // FIX - 4 - added additional check here - if ( numH > 0 && control->hbond_cut > 0.0 ) - { - hbonds = *lists + HBONDS; - memset( host_scratch, 0, 2 * hbonds->n * sizeof(int) ); - index = (int *) host_scratch; - end_index = index + hbonds->n; - - copy_host_device( index, hbonds->index, hbonds->n * sizeof(int), - cudaMemcpyDeviceToHost, "hbonds:index" ); - copy_host_device( end_index, hbonds->end_index, hbonds->n * sizeof(int), - cudaMemcpyDeviceToHost, "hbonds:end_index" ); - - /* - for (i = 0; i < N-1; i++) { - Hindex = my_atoms [i].Hindex; - if (Hindex > -1) - comp = index [Hindex + 1]; - else - comp = hbonds->num_intrs; - - if (end_index [Hindex] > comp) { - fprintf(stderr,"step%d-atom:%d hbondchk failed: H=%d start(H)=%d end(H)=%d str(H+1)=%d\n", - step, i, Hindex, index[Hindex], end_index[Hindex], comp ); - return FAILURE; - } - - num_hbonds += MAX( (end_index [Hindex] - index [Hindex]) * 2, MIN_HBONDS * 2); - } - if (end_index [my_atoms[i].Hindex] > hbonds->num_intrs) { - fprintf(stderr,"step%d-atom:%d hbondchk failed: H=%d start(H)=%d end(H)=%d num_intrs=%d\n", - step, i, Hindex, index[Hindex], end_index[Hindex], hbonds->num_intrs); - return FAILURE; - } - - num_hbonds += MIN( (end_index [my_atoms[i].Hindex] - index [my_atoms[i].Hindex]) * 2, - 2 * MIN_HBONDS); - num_hbonds = MAX( num_hbonds, MIN_CAP*MIN_HBONDS ); - realloc->num_hbonds = num_hbonds; - */ - - max_hbonds = 0; - for ( i = 0; i < system->N; i++ ) - { - if ( end_index[i] - index[i] >= system->max_hbonds ) - { - //TODO: update -// fprintf( stderr, "step%d-hbondchk failed: i=%d start(i)=%d end(i)=%d max_hbonds=%d\n", -// step, i, index[i], end_index[i], system->max_hbonds ); -// return FAILURE; - } - if ( end_index[i] - index[i] >= max_hbonds ) - { - max_hbonds = end_index[i] - index[i]; - } - } - realloc->num_hbonds = max_hbonds; - } - fprintf( stderr, " [hbonds: %d]\n", ret ); - /* 3bodies list: since a more accurate estimate of the num. * of three body interactions requires that bond orders have * been computed, delay validation until for computation */ @@ -1304,13 +1268,10 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control ) { - int i, ret, Htop, *hb_top; + int i, ret, Htop; int blocks, hblocks; - hb_top = (int*) host_scratch; - - ret = Cuda_Estimate_Storages( system, control, dev_lists, &Htop, - hb_top, data->step ); + ret = Cuda_Estimate_Storages( system, control, dev_lists, &Htop, data->step ); if ( ret == SUCCESS ) { @@ -1332,6 +1293,13 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, // ( system->d_my_atoms, *(*dev_lists + FAR_NBRS), *(*dev_lists + BONDS), // dev_workspace->total_bond_order, system->N ); // cudaThreadSynchronize( ); +// cudaCheckError( ); + +// k_print_hbond_info <<< blocks, DEF_BLOCK_SIZE >>> +// ( system->d_my_atoms, system->reax_param.d_sbp, +// (control_params *)control->d_control_params, +// *(*dev_lists + HBONDS), system->N ); +// cudaThreadSynchronize( ); // cudaCheckError( ); k_init_forces <<< blocks, DEF_BLOCK_SIZE >>> @@ -1342,7 +1310,7 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, *(*dev_lists + HBONDS), d_LR, system->n, system->N, system->reax_param.num_atom_types, system->max_sparse_entries, (((data->step-data->prev_steps) % - control->reneighbor) == 0), system->max_hbonds ); + control->reneighbor) == 0) ); cudaThreadSynchronize( ); cudaCheckError( ); @@ -1358,7 +1326,7 @@ int Cuda_Init_Forces( reax_system *system, control_params *control, if ( control->hbond_cut > 0 && system->numH > 0 ) { /* make hbond_list symmetric */ - hblocks = (system->N * HB_KER_SYM_THREADS_PER_ATOM) / HB_SYM_BLOCK_SIZE + + hblocks = (system->N * HB_KER_SYM_THREADS_PER_ATOM / HB_SYM_BLOCK_SIZE) + ((((system->N * HB_KER_SYM_THREADS_PER_ATOM) % HB_SYM_BLOCK_SIZE) == 0) ? 0 : 1); New_fix_sym_hbond_indices <<< hblocks, HB_BLOCK_SIZE >>> @@ -1397,7 +1365,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control ) { - int i, hbs, hnbrs_bl, ret; + int i, hbs, hnbrs_blocks, ret; int *thbody; static int compute_bonded_part1 = FALSE; real t_start, t_elapsed; @@ -1446,6 +1414,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, cudaGetLastError( ), t_elapsed ); fprintf( stderr, "Cuda_Calculate_Bond_Orders Done... \n" ); #endif + fprintf( stderr, " [CUDA_COMPUTE_BONDED_FORCES:BO]\n" ); /* 2. Bond Energy Interactions */ t_start = Get_Time( ); @@ -1469,6 +1438,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, cudaGetLastError( ), t_elapsed ); fprintf( stderr, "Cuda_Bond_Energy Done... \n" ); #endif + fprintf( stderr, " [CUDA_COMPUTE_BONDED_FORCES:Bond E]\n" ); /* 3. Atom Energy Interactions */ t_start = Get_Time( ); @@ -1512,6 +1482,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, cudaGetLastError( ), t_elapsed ); fprintf( stderr, "test_LonePair_postprocess Done... \n"); #endif + fprintf( stderr, " [CUDA_COMPUTE_BONDED_FORCES:Atom E]\n" ); compute_bonded_part1 = TRUE; } @@ -1589,6 +1560,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, t_elapsed ); fprintf( stderr, "Three_Body_Interactions Done... \n" ); #endif + fprintf( stderr, " [CUDA_COMPUTE_BONDED_FORCES:Valence Angles]\n" ); /* 5. Torsion Angles Interactions */ t_start = Get_Time( ); @@ -1642,6 +1614,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, cudaGetLastError( ), t_elapsed ); fprintf( stderr, " Four_Body_ Done... \n"); #endif + fprintf( stderr, " [CUDA_COMPUTE_BONDED_FORCES:Torsion]\n" ); /* 6. Hydrogen Bonds Interactions */ // FIX - 4 - Added additional check here @@ -1651,12 +1624,12 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, cuda_memset( spad, 0, 2 * sizeof(real) * system->n + sizeof(rvec) * system->n * 2, "scratch" ); - hbs = ((system->n * HB_KER_THREADS_PER_ATOM)/ HB_BLOCK_SIZE) + + hbs = (system->n * HB_KER_THREADS_PER_ATOM / HB_BLOCK_SIZE) + (((system->n * HB_KER_THREADS_PER_ATOM) % HB_BLOCK_SIZE) == 0 ? 0 : 1); +// Cuda_Hydrogen_Bonds <<< BLOCKS, BLOCK_SIZE >>> Cuda_Hydrogen_Bonds_MT <<< hbs, HB_BLOCK_SIZE, HB_BLOCK_SIZE * (2 * sizeof(real) + 2 * sizeof(rvec)) >>> -// Cuda_Hydrogen_Bonds <<< BLOCKS, BLOCK_SIZE>>> ( system->d_my_atoms, system->reax_param.d_sbp, system->reax_param.d_hbp, system->reax_param.d_gp, (control_params *) control->d_control_params, @@ -1665,6 +1638,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, spad, (rvec *) (spad + 2 * system->n) ); cudaThreadSynchronize( ); cudaCheckError( ); + fprintf( stderr, " [CUDA_COMPUTE_BONDED_FORCES:Hbonds]\n" ); /* reduction for E_HB */ Cuda_Reduction_Sum( spad, @@ -1692,17 +1666,20 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, *(*dev_lists + BONDS), system->N ); cudaThreadSynchronize( ); cudaCheckError( ); + fprintf( stderr, " [CUDA_COMPUTE_BONDED_FORCES:Hbonds_PostProcess]\n" ); /* post process step2 */ + hnbrs_blocks = (system->N * HB_POST_PROC_KER_THREADS_PER_ATOM / HB_POST_PROC_BLOCK_SIZE) + + (((system->N * HB_POST_PROC_KER_THREADS_PER_ATOM) % HB_POST_PROC_BLOCK_SIZE) == 0 ? 0 : 1); + // Cuda_Hydrogen_Bonds_HNbrs <<< system->N, 32, 32 * sizeof(rvec) >>> // ( system->d_my_atoms, *dev_workspace, *(*dev_lists + HBONDS) ); - hnbrs_bl = ((system->N * HB_POST_PROC_KER_THREADS_PER_ATOM)/ HB_POST_PROC_BLOCK_SIZE) + - (((system->N * HB_POST_PROC_KER_THREADS_PER_ATOM) % HB_POST_PROC_BLOCK_SIZE) == 0 ? 0 : 1); - Cuda_Hydrogen_Bonds_HNbrs_BL <<< hnbrs_bl, HB_POST_PROC_BLOCK_SIZE, + Cuda_Hydrogen_Bonds_HNbrs_BL <<< hnbrs_blocks, HB_POST_PROC_BLOCK_SIZE, HB_POST_PROC_BLOCK_SIZE * sizeof(rvec) >>> ( system->d_my_atoms, *dev_workspace, *(*dev_lists + HBONDS), system->N ); cudaThreadSynchronize( ); cudaCheckError( ); + fprintf( stderr, " [CUDA_COMPUTE_BONDED_FORCES:Hbonds_HNbrs]\n" ); t_elapsed = Get_Timing_Info( t_start ); diff --git a/PG-PuReMD/src/cuda_forces.h b/PG-PuReMD/src/cuda_forces.h index f0340a83..a4ae2655 100644 --- a/PG-PuReMD/src/cuda_forces.h +++ b/PG-PuReMD/src/cuda_forces.h @@ -11,7 +11,7 @@ extern "C" { int Cuda_Estimate_Storages( reax_system *, control_params *, reax_list **, - int *, int *, int ); + int *, int ); int Cuda_Estimate_Storage_Three_Body( reax_system *, control_params *, int, reax_list **, int *, int * ); diff --git a/PG-PuReMD/src/cuda_hydrogen_bonds.cu b/PG-PuReMD/src/cuda_hydrogen_bonds.cu index 3adafa09..89682a39 100644 --- a/PG-PuReMD/src/cuda_hydrogen_bonds.cu +++ b/PG-PuReMD/src/cuda_hydrogen_bonds.cu @@ -19,10 +19,11 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ +#include "cuda_hydrogen_bonds.h" + #include "reax_types.h" #include "index_utils.h" -#include "cuda_hydrogen_bonds.h" #include "cuda_valence_angles.h" #include "cuda_helpers.h" #include "cuda_list.h" @@ -41,13 +42,11 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter int start_j, end_j, hb_start_j, hb_end_j; int hblist[MAX_BONDS]; int itr, top; - int num_hb_intrs; ivec rel_jk; real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2; real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3; rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk; rvec dvec_jk, force, ext_press; - // rtensor temp_rtensor, total_rtensor; hbond_parameters *hbp; bond_order_data *bo_ij; bond_data *pbond_ij; @@ -56,6 +55,9 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter bond_data *bond_list; hbond_data *hbond_list, *hbond_jk; storage *workspace; +#if defined(DEBUG) + int num_hb_intrs; +#endif j = blockIdx.x * blockDim.x + threadIdx.x; @@ -66,28 +68,29 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter bonds = &( p_bonds ); bond_list = bonds->select.bond_list; - hbonds = & ( p_hbonds ); + hbonds = &( p_hbonds ); hbond_list = hbonds->select.hbond_list; workspace = &( p_workspace ); +#if defined(DEBUG) num_hb_intrs = 0; +#endif /* loops below discover the Hydrogen bonds between i-j-k triplets. - here j is H atom and there has to be some bond between i and j. - Hydrogen bond is between j and k. - so in this function i->X, j->H, k->Z when we map - variables onto the ones in the handout.*/ + * here j is H atom and there has to be some bond between i and j. + * Hydrogen bond is between j and k. + * so in this function i->X, j->H, k->Z when we map + * variables onto the ones in the handout.*/ //for( j = 0; j < system->n; ++j ) - /* j has to be of type H */ - if ( sbp[ my_atoms[j].type ].p_hbond == 1 ) + if ( sbp[ my_atoms[j].type ].p_hbond == H_ATOM ) { - /*set j's variables */ type_j = my_atoms[j].type; - start_j = Dev_Start_Index(j, bonds); - end_j = Dev_End_Index(j, bonds); + start_j = Dev_Start_Index( j, bonds ); + end_j = Dev_End_Index( j, bonds ); hb_start_j = Dev_Start_Index( my_atoms[j].Hindex, hbonds ); hb_end_j = Dev_End_Index( my_atoms[j].Hindex, hbonds ); top = 0; + /* search bonded atoms to atom j (i.e., hydrogen atom) for potential hydrogen bonding */ for( pi = start_j; pi < end_j; ++pi ) { pbond_ij = &( bond_list[pi] ); @@ -95,16 +98,17 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter bo_ij = &(pbond_ij->bo_data); type_i = my_atoms[i].type; - if( sbp[type_i].p_hbond == 2 && + if( sbp[type_i].p_hbond == H_BONDING_ATOM && bo_ij->BO >= HB_THRESHOLD ) { hblist[top++] = pi; } } - // fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n", - // j, top, hb_start_j, hb_end_j ); +// fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n", +// j, top, hb_start_j, hb_end_j ); + /* for each hbond of atom j */ for ( pk = hb_start_j; pk < hb_end_j; ++pk ) { /* set k's varibles */ @@ -114,9 +118,10 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter r_jk = nbr_jk->d; rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec ); - hbond_jk = &( hbond_list [pk] ); - rvec_MakeZero (hbond_jk->hb_f); + hbond_jk = &( hbond_list[pk] ); + rvec_MakeZero( hbond_jk->hb_f ); + /* find matching hbond to atom k */ for ( itr = 0; itr < top; ++itr ) { pi = hblist[itr]; @@ -129,7 +134,10 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter type_i = my_atoms[i].type; r_ij = pbond_ij->d; hbp = &(d_hbp[ index_hbp (type_i,type_j,type_k,num_atom_types) ]); + +#if defined(DEBUG) ++num_hb_intrs; +#endif Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, &theta, &cos_theta ); @@ -139,7 +147,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter &dcos_theta_dk ); /* hydrogen bond energy */ - sin_theta2 = SIN( theta/2.0 ); + sin_theta2 = SIN( theta / 2.0 ); sin_xhz4 = SQR(sin_theta2); sin_xhz4 *= sin_xhz4; cos_xhz1 = ( 1.0 - cos_theta ); @@ -169,7 +177,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter { // dcos terms //rvec_ScaledAdd( workspace->f[i], +CEhb2, dcos_theta_di ); - //atomic_rvecScaledAdd (workspace->f[i], +CEhb2, dcos_theta_di ); + //atomic_rvecScaledAdd( workspace->f[i], +CEhb2, dcos_theta_di ); rvec_ScaledAdd( pbond_ij->hb_f, +CEhb2, dcos_theta_di ); rvec_ScaledAdd( workspace->f[j], +CEhb2, dcos_theta_dj ); @@ -188,12 +196,12 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter else { /* for pressure coupling, terms that are not related to bond order - derivatives are added directly into pressure vector/tensor */ + * derivatives are added directly into pressure vector/tensor */ rvec_Scale( force, +CEhb2, dcos_theta_di ); // dcos terms //rvec_Add( workspace->f[i], force ); rvec_Add( pbond_ij->hb_f, force ); rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); - rvec_ScaledAdd( data_ext_press [j], 1.0, ext_press ); + rvec_ScaledAdd( data_ext_press[j], 1.0, ext_press ); rvec_ScaledAdd( workspace->f[j], +CEhb2, dcos_theta_dj ); @@ -206,7 +214,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds( reax_atom *my_atoms, single_body_parameter // dr terms rvec_ScaledAdd( workspace->f[j], -CEhb3/r_jk, dvec_jk ); - rvec_Scale( force, CEhb3/r_jk, dvec_jk ); + rvec_Scale( force, CEhb3 / r_jk, dvec_jk ); //rvec_Add( workspace->f[k], force ); rvec_Add( hbond_jk->hb_f, force ); rvec_iMultiply( ext_press, rel_jk, force ); @@ -267,20 +275,19 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame rvec *sh_atomf = (rvec *)(sh_cdbo + blockDim.x); rvec *sh_hf = (rvec *) (sh_atomf + blockDim.x); #endif - int __THREADS_PER_ATOM__, thread_id, warp_id, lane_id; - int i, j, k, pi, pk; - int type_i, type_j, type_k; - int start_j, end_j, hb_start_j, hb_end_j; - int hblist[MAX_BONDS]; - int itr, top; - int num_hb_intrs; + int __THREADS_PER_ATOM__, thread_id, group_id, lane_id; + int i, j, k, pi, pk; + int type_i, type_j, type_k; + int start_j, end_j, hb_start_j, hb_end_j; + //TODO: re-write and remove + int hblist[MAX_BONDS]; + int itr, top; int loopcount, count; ivec rel_jk; real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2; real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3; rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk; rvec dvec_jk, force, ext_press; - // rtensor temp_rtensor, total_rtensor; hbond_parameters *hbp; bond_order_data *bo_ij; bond_data *pbond_ij; @@ -289,24 +296,29 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame bond_data *bond_list; hbond_data *hbond_list, *hbond_jk; storage *workspace; +#if defined(DEBUG) + int num_hb_intrs; +#endif __THREADS_PER_ATOM__ = HB_KER_THREADS_PER_ATOM; thread_id = blockIdx.x * blockDim.x + threadIdx.x; - warp_id = thread_id / __THREADS_PER_ATOM__; + group_id = thread_id / __THREADS_PER_ATOM__; lane_id = thread_id & (__THREADS_PER_ATOM__ - 1); - if ( warp_id >= n ) + if ( group_id >= n ) { return; } - num_hb_intrs = 0; workspace = &( p_workspace ); bonds = &( p_bonds ); bond_list = bonds->select.bond_list; - hbonds = & ( p_hbonds ); + hbonds = &( p_hbonds ); hbond_list = hbonds->select.hbond_list; - j = warp_id; + j = group_id; +#if defined(DEBUG) + num_hb_intrs = 0; +#endif /* loops below discover the Hydrogen bonds between i-j-k triplets. here j is H atom and there has to be some bond between i and j. @@ -316,7 +328,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame //for( j = 0; j < system->n; ++j ) #if defined( __SM_35__) - sh_hb = 0; + sh_hb = 0; rvec_MakeZero( sh_atomf ); #else sh_hb[threadIdx.x] = 0; @@ -324,7 +336,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame #endif /* j has to be of type H */ - if ( sbp[ my_atoms[j].type ].p_hbond == 1 ) + if ( sbp[ my_atoms[j].type ].p_hbond == H_ATOM ) { /* set j's variables */ type_j = my_atoms[j].type; @@ -341,7 +353,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame bo_ij = &(pbond_ij->bo_data); type_i = my_atoms[i].type; - if ( sbp[type_i].p_hbond == 2 && + if ( sbp[type_i].p_hbond == H_BONDING_ATOM && bo_ij->BO >= HB_THRESHOLD ) { hblist[top++] = pi; @@ -396,7 +408,10 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame type_i = my_atoms[i].type; r_ij = pbond_ij->d; hbp = &(d_hbp[ index_hbp(type_i,type_j,type_k,num_atom_types) ]); + +#if defined(DEBUG) ++num_hb_intrs; +#endif Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, &theta, &cos_theta ); @@ -405,7 +420,7 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_MT( reax_atom *my_atoms, single_body_parame &dcos_theta_di, &dcos_theta_dj, &dcos_theta_dk ); /* hydrogen bond energy */ - sin_theta2 = SIN( theta/2.0 ); + sin_theta2 = SIN( theta / 2.0 ); sin_xhz4 = SQR(sin_theta2); sin_xhz4 *= sin_xhz4; cos_xhz1 = ( 1.0 - cos_theta ); @@ -598,10 +613,10 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_PostProcess( reax_atom *atoms, storage p_workspace, reax_list p_bonds, int N ) { int i, pj; - storage *workspace = &( p_workspace ); + storage *workspace; bond_data *pbond; bond_data *sym_index_bond; - reax_list *bonds = &p_bonds; + reax_list *bonds; i = blockIdx.x * blockDim.x + threadIdx.x; @@ -610,10 +625,13 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_PostProcess( reax_atom *atoms, return; } + workspace = &p_workspace; + bonds = &p_bonds; + for ( pj = Dev_Start_Index(i, bonds); pj < Dev_End_Index(i, bonds); ++pj ) { pbond = &(bonds->select.bond_list[pj]); - sym_index_bond = &( bonds->select.bond_list[ pbond->sym_index ] ); + sym_index_bond = &( bonds->select.bond_list[pbond->sym_index] ); //rvec_Add( atoms[i].f, sym_index_bond->hb_f ); rvec_Add( workspace->f[i], sym_index_bond->hb_f ); @@ -631,11 +649,13 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_HNbrs( reax_atom *atoms, #endif int i, pj,j; int start, end; - storage *workspace = &( p_workspace ); + storage *workspace; hbond_data *nbr_pj, *sym_index_nbr; - reax_list *hbonds = &p_hbonds; + reax_list *hbonds; i = blockIdx.x; + workspace = &p_workspace; + hbonds = &p_hbonds; start = Dev_Start_Index( i, hbonds ); end = Dev_End_Index( i, hbonds ); @@ -710,25 +730,33 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_HNbrs_BL( reax_atom *atoms, { #if defined(__SM_35__) rvec __f; + int s; #else extern __shared__ rvec __f[]; #endif int i, pj,j; int start, end; - storage *workspace = &( p_workspace ); + storage *workspace; hbond_data *nbr_pj, *sym_index_nbr; - reax_list *hbonds = &p_hbonds; - int __THREADS_PER_ATOM__ = HB_POST_PROC_KER_THREADS_PER_ATOM; - int thread_id = blockIdx.x * blockDim.x + threadIdx.x; - int warp_id = thread_id / __THREADS_PER_ATOM__; - int lane_id = thread_id & (__THREADS_PER_ATOM__ -1); + reax_list *hbonds; + int __THREADS_PER_ATOM__; + int thread_id; + int group_id; + int lane_id; - if ( warp_id >= N ) + __THREADS_PER_ATOM__ = HB_POST_PROC_KER_THREADS_PER_ATOM; + thread_id = blockIdx.x * blockDim.x + threadIdx.x; + group_id = thread_id / __THREADS_PER_ATOM__; + lane_id = thread_id & (__THREADS_PER_ATOM__ - 1); + + if ( group_id >= N ) { return; } - i = warp_id; + workspace = &( p_workspace ); + hbonds = &p_hbonds; + i = group_id; start = Dev_Start_Index( i, hbonds ); end = Dev_End_Index( i, hbonds ); pj = start + lane_id; @@ -745,16 +773,16 @@ CUDA_GLOBAL void Cuda_Hydrogen_Bonds_HNbrs_BL( reax_atom *atoms, sym_index_nbr = &(hbonds->select.hbond_list[ nbr_pj->sym_index ]); #if defined(__SM_35__) - rvec_Add ( __f, sym_index_nbr->hb_f ); + rvec_Add( __f, sym_index_nbr->hb_f ); #else - rvec_Add ( __f[threadIdx.x], sym_index_nbr->hb_f ); + rvec_Add( __f[threadIdx.x], sym_index_nbr->hb_f ); #endif pj += __THREADS_PER_ATOM__; } #if defined(__SM_35__) - for ( int s = __THREADS_PER_ATOM__ >> 1; s >= 1; s/=2 ) + for ( s = __THREADS_PER_ATOM__ >> 1; s >= 1; s /= 2 ) { __f[0] += shfl( __f[0], s ); __f[1] += shfl( __f[1], s ); diff --git a/PG-PuReMD/src/cuda_neighbors.cu b/PG-PuReMD/src/cuda_neighbors.cu index 18af354b..54f6d021 100644 --- a/PG-PuReMD/src/cuda_neighbors.cu +++ b/PG-PuReMD/src/cuda_neighbors.cu @@ -344,7 +344,8 @@ CUDA_GLOBAL void k_mt_generate_neighbor_lists( reax_atom *my_atoms, { //do leader selection here leader = -1; - for ( ll = my_bucket *__THREADS_PER_ATOM__; ll < (my_bucket)*__THREADS_PER_ATOM__ + __THREADS_PER_ATOM__; ll++ ) + for ( ll = my_bucket *__THREADS_PER_ATOM__; + ll < (my_bucket) * __THREADS_PER_ATOM__ + __THREADS_PER_ATOM__; ll++ ) { if ( tnbr[ll] ) { @@ -541,7 +542,7 @@ CUDA_GLOBAL void k_estimate_neighbors( reax_atom *my_atoms, ivec_Copy( nbrs_x, g.nbrs_x[index_grid_nbrs(i, j, k, itr, &g)] ); if ( //(g.str[index_grid_3d(i, j, k, &g)] <= g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]) && - Dev_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs(i, j, k, itr, &g)],atom1->x) <= cutoff ) + Dev_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs(i, j, k, itr, &g)], atom1->x) <= cutoff ) { /* pick up another atom from the neighbor cell */ for ( m = g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; @@ -637,8 +638,8 @@ int Cuda_Estimate_Neighbors( reax_system *system, int step ) k_estimate_neighbors <<< blocks, DEF_BLOCK_SIZE >>> ( system->d_my_atoms, system->my_ext_box, system->d_my_grid, - system->n, system->N, system->total_cap, system->d_far_nbrs, - system->d_max_far_nbrs, system->d_realloc_far_nbrs ); + system->n, system->N, system->total_cap, + system->d_far_nbrs, system->d_max_far_nbrs, system->d_realloc_far_nbrs ); cudaThreadSynchronize( ); cudaCheckError( ); @@ -680,6 +681,103 @@ CUDA_GLOBAL void k_init_end_index( int * intr_cnt, int *indices, int *end_indice } +CUDA_GLOBAL void k_setup_hindex( reax_atom *my_atoms, int N ) +{ + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= N ) + { + return; + } + + my_atoms[i].Hindex = i; +} + + +CUDA_GLOBAL void k_setup_hindex_part1( reax_atom *my_atoms, int * hindex, int n ) +{ + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= n ) + { + return; + } + + hindex[i] = my_atoms[i].Hindex; +} + + +CUDA_GLOBAL void k_setup_hindex_part2( reax_atom *my_atoms, int * hindex, int n ) +{ + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= n ) + { + return; + } + + if ( hindex[i + 1] - hindex[i] > 0 ) + { + my_atoms[i].Hindex = hindex[i]; + } + else + { + my_atoms[i].Hindex = -1; + } +} + + +CUDA_GLOBAL void k_init_hbond_indices( reax_atom * atoms, single_body_parameters *sbp, + int *hbonds, int *max_hbonds, int *indices, int *end_indices, int N ) +{ + int i, hindex, my_hbonds; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= N ) + { + return; + } + + hindex = atoms[i].Hindex; + + if ( sbp[ atoms[i].type ].p_hbond == H_ATOM || + sbp[ atoms[i].type ].p_hbond == H_BONDING_ATOM ) + { + my_hbonds = hbonds[i]; + indices[hindex] = max_hbonds[i]; + end_indices[hindex] = indices[hindex] + hbonds[i]; + } + else + { + my_hbonds = 0; + indices[hindex] = 0; + end_indices[hindex] = 0; + } + atoms[i].num_hbonds = my_hbonds; + +// hindex = atoms[i].Hindex; +// +// if ( hindex >= 0 ) +// { +// my_hbonds = hbonds[i]; +// indices[hindex] = max_hbonds[i]; +// end_indices[hindex] = indices[hindex] + hbonds[i]; +// } +// else +// { +// my_hbonds = 0; +// } +// atoms[i].num_hbonds = my_hbonds; +} + + /* Initialize indices for far neighbors list post reallocation * * system: atomic system info. */ @@ -688,10 +786,10 @@ void Cuda_Init_Neighbor_Indices( reax_system *system ) int blocks; reax_list *far_nbrs = *dev_lists + FAR_NBRS; - /* init indinces */ + /* init indices */ Cuda_Scan_Excl_Sum( system->d_max_far_nbrs, far_nbrs->index, system->total_cap ); - /* init end_indinces */ + /* init end_indices */ blocks = system->N / DEF_BLOCK_SIZE + ((system->N % DEF_BLOCK_SIZE == 0) ? 0 : 1); k_init_end_index <<< blocks, DEF_BLOCK_SIZE >>> @@ -701,23 +799,48 @@ void Cuda_Init_Neighbor_Indices( reax_system *system ) } -void Cuda_Init_HBond_Indices( int *indices, int entries ) +void Cuda_Init_HBond_Indices( reax_system *system ) { - int i; + int blocks; + int *temp; reax_list *hbonds = *dev_lists + HBONDS; - for ( i = 1 ; i < entries; i++ ) - { - indices[i] += indices[i - 1]; - } + temp = (int *) scratch; +// cuda_memset( temp, 0, 2 * (system->N + 1) * sizeof(int), +// "Cuda_Init_HBond_Indices::temp" ); + + /* init Hindices */ + blocks = system->N / DEF_BLOCK_SIZE + + ((system->N % DEF_BLOCK_SIZE == 0) ? 0 : 1); + + k_setup_hindex <<< blocks, DEF_BLOCK_SIZE >>> + ( system->d_my_atoms, system->N ); + cudaThreadSynchronize( ); + cudaCheckError( ); -// Cuda_Scan_Excl_Sum( indices, hbonds->index, entries ); -// //TODO: kernel to compute end_index[i] += index[i] +// blocks = system->n / DEF_BLOCK_SIZE + +// ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); +// +// k_setup_hindex_part1 <<< blocks, DEF_BLOCK_SIZE >>> +// ( system->d_my_atoms, temp, system->n ); +// cudaThreadSynchronize( ); +// cudaCheckError( ); +// +// Cuda_Scan_Excl_Sum( temp, temp + system->n + 1, system->n + 1 ); +// +// k_setup_hindex_part2 <<< blocks, DEF_BLOCK_SIZE >>> +// ( system->d_my_atoms, temp + system->n + 1, system->n ); +// cudaThreadSynchronize( ); +// cudaCheckError( ); + + /* init indices and end_indices */ + Cuda_Scan_Excl_Sum( system->d_max_hbonds, temp, system->total_cap ); - copy_host_device( indices, hbonds->index + 1, (entries - 1) * sizeof(int), - cudaMemcpyHostToDevice, "dev_hbonds:index" ); - copy_host_device( indices, hbonds->end_index, entries * sizeof(int), - cudaMemcpyHostToDevice, "dev_hbonds:end_index" ); + k_init_hbond_indices <<< blocks, DEF_BLOCK_SIZE >>> + ( system->d_my_atoms, system->reax_param.d_sbp, system->d_hbonds, temp, + hbonds->index, hbonds->end_index, system->N ); + cudaThreadSynchronize( ); + cudaCheckError( ); } @@ -726,10 +849,10 @@ void Cuda_Init_Bond_Indices( reax_system *system ) int blocks; reax_list *bonds = *dev_lists + BONDS; - /* init indinces */ + /* init indices */ Cuda_Scan_Excl_Sum( system->d_max_bonds, bonds->index, system->total_cap ); - /* init end_indinces */ + /* init end_indices */ blocks = system->N / DEF_BLOCK_SIZE + ((system->N % DEF_BLOCK_SIZE == 0) ? 0 : 1); k_init_end_index <<< blocks, DEF_BLOCK_SIZE >>> diff --git a/PG-PuReMD/src/cuda_neighbors.h b/PG-PuReMD/src/cuda_neighbors.h index 1a1cd72b..d6b0e1d7 100644 --- a/PG-PuReMD/src/cuda_neighbors.h +++ b/PG-PuReMD/src/cuda_neighbors.h @@ -16,7 +16,7 @@ int Cuda_Estimate_Neighbors( reax_system *, int ); void Cuda_Init_Neighbor_Indices( reax_system * ); -void Cuda_Init_HBond_Indices( int *, int ); +void Cuda_Init_HBond_Indices( reax_system * ); void Cuda_Init_Bond_Indices( reax_system * ); diff --git a/PG-PuReMD/src/cuda_reset_tools.cu b/PG-PuReMD/src/cuda_reset_tools.cu index 47d03674..27cb4580 100644 --- a/PG-PuReMD/src/cuda_reset_tools.cu +++ b/PG-PuReMD/src/cuda_reset_tools.cu @@ -1,34 +1,13 @@ #include "cuda_reset_tools.h" -#include "cuda_utils.h" #include "cuda_list.h" +#include "cuda_utils.h" +#include "cuda_reduction.h" #include "reset_tools.h" -CUDA_GLOBAL void k_reset_hbond_list( reax_atom *my_atoms, - reax_list hbonds, int N ) -{ - int Hindex; - int i; - - i = blockIdx.x * blockDim.x + threadIdx.x; - - if ( i >= N ) - { - return; - } - - Hindex = my_atoms[i].Hindex; - - if ( Hindex > 1 ) - { - Dev_Set_End_Index( Hindex, Dev_Start_Index (Hindex, &hbonds), &hbonds ); - } -} - - extern "C" { @@ -45,7 +24,8 @@ void Cuda_Reset_Workspace( reax_system *system, storage *workspace ) } -CUDA_GLOBAL void k_reset_hindex( reax_atom *my_atoms, int N ) +CUDA_GLOBAL void k_reset_hindex( reax_atom *my_atoms, single_body_parameters *sbp, + int * hindex, int N ) { int i; @@ -56,95 +36,44 @@ CUDA_GLOBAL void k_reset_hindex( reax_atom *my_atoms, int N ) return; } + if ( sbp[ my_atoms[i].type ].p_hbond == H_ATOM || + sbp[ my_atoms[i].type ].p_hbond == H_BONDING_ATOM ) + { + hindex[i] = 1; + } + else + { + hindex[i] = 0; + } + +// my_atoms[i].Hindex = hindex[i]; my_atoms[i].Hindex = i; } void Cuda_Reset_Atoms( reax_system* system, control_params *control ) { - int i; - reax_atom *atom; int blocks; + int *hindex; - /* - if( control->hbond_cut > 0 ) - //TODO - for( i = 0; i < system->N; ++i ) { - atom = &(system->my_atoms[i]); - //if( system->reax_param.sbp[ atom->type ].p_hbond == 1 ) - atom->Hindex = system->numH++; - //else atom->Hindex = -1; - } - //TODO - */ - //////////////////////////////// - //////////////////////////////// - //////////////////////////////// - //////////////////////////////// - // FIX - 3 - Commented out this line for Hydrogen Bond fix - // FIX - HBOND ISSUE - // FIX - HBOND ISSUE - // FIX - HBOND ISSUE - // COMMENTED OUT THIS LINE BELOW - //system->numH = system->N; - // FIX - HBOND ISSUE - // FIX - HBOND ISSUE - // FIX - HBOND ISSUE - //////////////////////////////// - //////////////////////////////// - //////////////////////////////// - //////////////////////////////// - //////////////////////////////// + hindex = (int *) scratch; + cuda_memset( scratch, 0, system->N * sizeof(int), + "Cuda_Reset_Atoms::scratch" ); blocks = system->N / DEF_BLOCK_SIZE + ((system->N % DEF_BLOCK_SIZE == 0 ) ? 0 : 1); + k_reset_hindex <<< blocks, DEF_BLOCK_SIZE >>> - ( system->d_my_atoms, system->N ); + ( system->d_my_atoms, system->reax_param.d_sbp, hindex + 1, system->N ); cudaThreadSynchronize( ); cudaCheckError( ); -} + Cuda_Reduction_Sum( hindex, system->d_numH, system->N ); -int Cuda_Reset_Neighbor_Lists( reax_system *system, control_params *control, - storage *workspace, reax_list **lists ) -{ - int total_hbonds; - reax_list *hbonds; - int blocks; + copy_host_device( &(system->numH), system->d_numH, sizeof(int), + cudaMemcpyDeviceToHost, "Cuda_Reset_Atoms::d_numH" ); - //HBonds processing - //FIX - 4 - Added additional check - if ( control->hbond_cut > 0.0 && system->numH > 0 ) - { - hbonds = (*dev_lists) + HBONDS; - total_hbonds = 0; - - /* reset start-end indexes */ - //TODO - blocks = system->N / DEF_BLOCK_SIZE + - ((system->N % DEF_BLOCK_SIZE == 0 ) ? 0 : 1); - k_reset_hbond_list <<< blocks, DEF_BLOCK_SIZE >>> - ( system->d_my_atoms, *(*dev_lists + HBONDS), system->N ); - cudaThreadSynchronize( ); - cudaCheckError( ); - - //TODO compute the total hbonds here - total_hbonds = 0; - - /* is reallocation needed? */ - if ( total_hbonds >= hbonds->num_intrs * DANGER_ZONE ) - { - workspace->realloc.hbonds = 1; - if ( total_hbonds >= hbonds->num_intrs ) - { - fprintf( stderr, "p%d: not enough space for hbonds! total=%d allocated=%d\n", - system->my_rank, total_hbonds, hbonds->num_intrs ); - return FAILURE; - } - } - } - - return SUCCESS; + system->Hcap = MAX( system->numH * SAFER_ZONE, MIN_CAP ); } @@ -162,8 +91,6 @@ void Cuda_Reset( reax_system *system, control_params *control, Cuda_Reset_Workspace( system, workspace ); - Cuda_Reset_Neighbor_Lists( system, control, workspace, lists ); - #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ step%d: reset done\n", system->my_rank, data->step ); MPI_Barrier( MPI_COMM_WORLD ); diff --git a/PG-PuReMD/src/cuda_reset_tools.h b/PG-PuReMD/src/cuda_reset_tools.h index 4477a188..f158afec 100644 --- a/PG-PuReMD/src/cuda_reset_tools.h +++ b/PG-PuReMD/src/cuda_reset_tools.h @@ -10,9 +10,12 @@ extern "C" { void Cuda_Reset_Workspace( reax_system *, storage * ); + void Cuda_Reset_Atoms( reax_system *, control_params * ); + int Cuda_Reset_Neighbor_Lists( reax_system *, control_params *, storage *, reax_list ** ); + void Cuda_Reset( reax_system*, control_params*, simulation_data*, storage*, reax_list** ); diff --git a/PG-PuReMD/src/cuda_validation.cu b/PG-PuReMD/src/cuda_validation.cu index 89bb698d..830b889c 100644 --- a/PG-PuReMD/src/cuda_validation.cu +++ b/PG-PuReMD/src/cuda_validation.cu @@ -461,7 +461,7 @@ int validate_hbonds( reax_system *system, storage *workspace, sym_count = 0; for (int i = 0; i < system->n; i++) { - if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == 1 ) + if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond == H_ATOM ) { count += End_Index (i, hbonds) - Start_Index (i, hbonds); dev_count += d_end [i] - d_start[i]; @@ -485,7 +485,7 @@ int validate_hbonds( reax_system *system, storage *workspace, sym_count = 0; for (int i = system->n; i < system->N; i++) { - //if (system->reax_param.sbp[ system->my_atoms[i].type].p_hbond == 2) + //if (system->reax_param.sbp[ system->my_atoms[i].type].p_hbond == H_BONDING_ATOM ) { sym_count += d_end[i] - d_start[i]; } @@ -504,7 +504,7 @@ int validate_hbonds( reax_system *system, storage *workspace, d_end[d_index] - d_start[d_index]); */ - if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond != 1 ) + if ( system->reax_param.sbp[ system->my_atoms[i].type ].p_hbond != H_ATOM ) { /* int x; diff --git a/PG-PuReMD/src/ffield.c b/PG-PuReMD/src/ffield.c index 019d5cd5..a00e7511 100644 --- a/PG-PuReMD/src/ffield.c +++ b/PG-PuReMD/src/ffield.c @@ -64,7 +64,7 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax, /* reading the number of global parameters */ n = atoi(tmp[0]); - if (n < 1) + if ( n < 1 ) { fprintf( stderr, "WARNING: number of globals in ffield file is 0!\n" ); return 1; @@ -98,80 +98,41 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax, fgets(s, MAX_LINE, fp); /* Allocating structures in reax_interaction */ - /* - reax->sbp = (single_body_parameters*) - scalloc( reax->num_atom_types, sizeof(single_body_parameters), "sbp" ); - reax->tbp = (two_body_parameters**) - scalloc( reax->num_atom_types, sizeof(two_body_parameters*), "tbp" ); - reax->thbp= (three_body_header***) - scalloc( reax->num_atom_types, sizeof(three_body_header**), "thbp" ); - reax->hbp = (hbond_parameters***) - scalloc( reax->num_atom_types, sizeof(hbond_parameters**), "hbp" ); - reax->fbp = (four_body_header****) - scalloc( reax->num_atom_types, sizeof(four_body_header***), "fbp" ); - tor_flag = (char****) - scalloc( reax->num_atom_types, sizeof(char***), "tor_flag" ); - - for( i = 0; i < reax->num_atom_types; i++ ) { - reax->tbp[i] = (two_body_parameters*) - scalloc( reax->num_atom_types, sizeof(two_body_parameters), "tbp[i]" ); - reax->thbp[i]= (three_body_header**) - scalloc( reax->num_atom_types, sizeof(three_body_header*), "thbp[i]" ); - reax->hbp[i] = (hbond_parameters**) - scalloc( reax->num_atom_types, sizeof(hbond_parameters*), "hbp[i]" ); - reax->fbp[i] = (four_body_header***) - scalloc( reax->num_atom_types, sizeof(four_body_header**), "fbp[i]" ); - tor_flag[i] = (char***) - scalloc( reax->num_atom_types, sizeof(char**), "tor_flag[i]" ); - - for( j = 0; j < reax->num_atom_types; j++ ) { - reax->thbp[i][j]= (three_body_header*) - scalloc( reax->num_atom_types, sizeof(three_body_header), "thbp[i,j]" ); - reax->hbp[i][j] = (hbond_parameters*) - scalloc( reax->num_atom_types, sizeof(hbond_parameters), "hbp[i,j]" ); - reax->fbp[i][j] = (four_body_header**) - scalloc( reax->num_atom_types, sizeof(four_body_header*), "fbp[i,j]" ); - tor_flag[i][j] = (char**) - scalloc( reax->num_atom_types, sizeof(char*), "tor_flag[i,j]" ); - - for (k=0; k < reax->num_atom_types; k++) { - reax->fbp[i][j][k] = (four_body_header*) - scalloc(reax->num_atom_types, sizeof(four_body_header), "fbp[i,j,k]"); - tor_flag[i][j][k] = (char*) - scalloc( reax->num_atom_types, sizeof(char), "tor_flag[i,j,k]" ); - } - } - } - */ - __N = reax->num_atom_types; reax->sbp = (single_body_parameters*) - calloc( reax->num_atom_types, sizeof(single_body_parameters) ); + scalloc( reax->num_atom_types, sizeof(single_body_parameters), + "Read_Force_Field::reax->sbp" ); reax->tbp = (two_body_parameters*) - calloc( pow (reax->num_atom_types, 2), sizeof(two_body_parameters) ); + scalloc( POW(reax->num_atom_types, 2), sizeof(two_body_parameters), + "Read_Force_Field::reax->tbp" ); reax->thbp = (three_body_header*) - calloc( pow (reax->num_atom_types, 3), sizeof(three_body_header) ); + scalloc( POW(reax->num_atom_types, 3), sizeof(three_body_header), + "Read_Force_Field::reax->thbp" ); + reax->hbp = (hbond_parameters*) - calloc( pow (reax->num_atom_types, 3), sizeof(hbond_parameters) ); + scalloc( POW(reax->num_atom_types, 3), sizeof(hbond_parameters), + "Read_Force_Field::reax->hbp" ); reax->fbp = (four_body_header*) - calloc( pow (reax->num_atom_types, 4), sizeof(four_body_header) ); + scalloc( POW(reax->num_atom_types, 4), sizeof(four_body_header), + "Read_Force_Field::reax->fbp" ); - tor_flag = (char*) - calloc( pow (reax->num_atom_types, 4), sizeof(char) ); + tor_flag = (char*) scalloc( POW(reax->num_atom_types, 4), sizeof(char), + "Read_Force_Field::tor_flag" ); - // vdWaals type: 1: Shielded Morse, no inner-wall - // 2: inner wall, no shielding - // 3: inner wall+shielding + /* vdWaals type: + * 1: Shielded Morse, no inner-wall + * 2: inner wall, no shielding + * 3: inner wall+shielding */ reax->gp.vdw_type = 0; /* reading single atom parameters */ /* there are 4 lines of each single atom parameters in ff files. these - parameters later determine some of the pair and triplet parameters using - combination rules. */ + * parameters later determine some of the pair and triplet parameters using + * combination rules. */ for ( i = 0; i < reax->num_atom_types; i++ ) { /* line one */ @@ -183,7 +144,9 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax, reax->sbp[i].name[j] = toupper( tmp[0][j] ); } - //fprintf( stderr, "Atom Name in the force field : %s \n", reax->sbp[i].name); +#if defined(DEBUG_FOCUS) + fprintf( stderr, "Atom Name in the force field : %s \n", reax->sbp[i].name ); +#endif val = atof(tmp[1]); reax->sbp[i].r_s = val; @@ -221,7 +184,7 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax, val = atof(tmp[6]); reax->sbp[i].eta = 2.0 * val; val = atof(tmp[7]); - reax->sbp[i].p_hbond = (int) val; + reax->sbp[i].p_hbond = (int)val; /* line 3 */ fgets( s, MAX_LINE, fp ); @@ -645,12 +608,12 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax, } /* 4-body parameters are entered in compact form. i.e. 0-X-Y-0 - correspond to any type of pair of atoms in 1 and 4 - position. However, explicit X-Y-Z-W takes precedence over the - default description. - supports multi-well potentials (upto MAX_4BODY_PARAM in mytypes.h) - IMPORTANT: for now, directions on how to read multi-entries from ffield - is not clear */ + * correspond to any type of pair of atoms in 1 and 4 + * position. However, explicit X-Y-Z-W takes precedence over the + * default description. + * supports multi-well potentials (upto MAX_4BODY_PARAM in mytypes.h) + * IMPORTANT: for now, directions on how to read multi-entries from ffield + * is not clear */ /* clear all entries first */ for ( i = 0; i < reax->num_atom_types; ++i ) diff --git a/PG-PuReMD/src/forces.c b/PG-PuReMD/src/forces.c index 0647c737..ae60867e 100644 --- a/PG-PuReMD/src/forces.c +++ b/PG-PuReMD/src/forces.c @@ -65,8 +65,8 @@ #ifdef HAVE_CUDA -void Cuda_Total_Forces (reax_system *, control_params *, simulation_data *, storage *); -void Cuda_Total_Forces_PURE (reax_system *, storage *); +void Cuda_Total_Forces( reax_system *, control_params *, simulation_data *, storage * ); +void Cuda_Total_Forces_PURE( reax_system *, storage * ); #endif @@ -225,16 +225,18 @@ void Cuda_Compute_Total_Force( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, mpi_datatypes *mpi_data ) { - rvec *f = (rvec *) host_scratch; + rvec *f; + + f = (rvec *) host_scratch; memset( f, 0, sizeof(rvec) * system->N ); Cuda_Total_Forces( system, control, data, workspace ); #if defined(PURE_REAX) /* now all forces are computed to their partially-final values - based on the neighbors information each processor has had. - final values of force on each atom needs to be computed by adding up - all partially-final pieces */ + * based on the neighbors information each processor has had. + * final values of force on each atom needs to be computed by adding up + * all partially-final pieces */ //MVAPICH2 copy_host_device( f, dev_workspace->f, sizeof(rvec) * system->N , @@ -876,7 +878,7 @@ int Init_Forces( reax_system *system, control_params *control, cutoff = control->bond_cut; } - ihb = -1; + ihb = NON_H_BONDING_ATOM; ihb_top = -1; if ( local ) { @@ -888,7 +890,7 @@ int Init_Forces( reax_system *system, control_params *control, if ( control->hbond_cut > 0.0 ) { ihb = sbp_i->p_hbond; - if ( ihb == 1 ) + if ( ihb == H_ATOM ) { ihb_top = End_Index( atom_i->Hindex, hbonds ); } @@ -972,13 +974,13 @@ int Init_Forces( reax_system *system, control_params *control, } /* hydrogen bond lists */ - if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) && + if ( control->hbond_cut > 0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) && nbr_pj->d <= control->hbond_cut ) { // fprintf( stderr, "%d %d\n", atom1, atom2 ); jhb = sbp_j->p_hbond; - if ( ihb == 1 && jhb == 2 ) + if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) { hbonds->select.hbond_list[ihb_top].nbr = j; hbonds->select.hbond_list[ihb_top].scl = 1; @@ -986,7 +988,7 @@ int Init_Forces( reax_system *system, control_params *control, ++ihb_top; ++num_hbonds; } - else if ( j < system->n && ihb == 2 && jhb == 1 ) + else if ( j < system->n && ihb == H_BONDING_ATOM && jhb == H_ATOM ) { jhb_top = End_Index( atom_j->Hindex, hbonds ); hbonds->select.hbond_list[jhb_top].nbr = i; @@ -1032,7 +1034,7 @@ int Init_Forces( reax_system *system, control_params *control, //printf("Htop: %d \n", Htop); H->end[i] = Htop; - if ( ihb == 1 ) + if ( ihb == H_ATOM ) { Set_End_Index( atom_i->Hindex, ihb_top, hbonds ); } @@ -1161,12 +1163,12 @@ int Init_Forces_No_Charges( reax_system *system, control_params *control, cutoff = control->bond_cut; } - ihb = -1; + ihb = NON_H_BONDING_ATOM; ihb_top = -1; if ( local && control->hbond_cut > 0 ) { ihb = sbp_i->p_hbond; - if ( ihb == 1 ) + if ( ihb == H_ATOM ) { ihb_top = End_Index( atom_i->Hindex, hbonds ); } @@ -1223,12 +1225,12 @@ int Init_Forces_No_Charges( reax_system *system, control_params *control, if ( local ) { /* hydrogen bond lists */ - if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) && + if ( control->hbond_cut > 0 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) && nbr_pj->d <= control->hbond_cut ) { // fprintf( stderr, "%d %d\n", atom1, atom2 ); jhb = sbp_j->p_hbond; - if ( ihb == 1 && jhb == 2 ) + if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) { hbonds->select.hbond_list[ihb_top].nbr = j; hbonds->select.hbond_list[ihb_top].scl = 1; @@ -1236,7 +1238,7 @@ int Init_Forces_No_Charges( reax_system *system, control_params *control, ++ihb_top; ++num_hbonds; } - else if ( j < system->n && ihb == 2 && jhb == 1 ) + else if ( j < system->n && ihb == H_BONDING_ATOM && jhb == H_ATOM ) { jhb_top = End_Index( atom_j->Hindex, hbonds ); hbonds->select.hbond_list[jhb_top].nbr = i; @@ -1273,7 +1275,7 @@ int Init_Forces_No_Charges( reax_system *system, control_params *control, } Set_End_Index( i, btop_i, bonds ); - if ( local && ihb == 1 ) + if ( local && ihb == H_ATOM ) { Set_End_Index( atom_i->Hindex, ihb_top, hbonds ); } @@ -1455,16 +1457,16 @@ void Estimate_Storages( reax_system *system, control_params *control, if ( i < system->n ) { - local = 1; + local = TRUE; cutoff = control->nonb_cut; ++(*Htop); ihb = sbp_i->p_hbond; } else { - local = 0; + local = FALSE; cutoff = control->bond_cut; - ihb = -1; + ihb = NON_H_BONDING_ATOM; } for ( pj = start_i; pj < end_i; ++pj ) @@ -1482,19 +1484,21 @@ void Estimate_Storages( reax_system *system, control_params *control, //twbp = &(system->reax_param.tbp[type_i][type_j]); twbp = &(system->reax_param.tbp[index_tbp (type_i, type_j, system->reax_param.num_atom_types)]); - if ( local ) + if ( local == TRUE ) { if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) //tryQEq ||1 ++(*Htop); - if ( control->hbond_cut > 0.1 && (ihb == 1 || ihb == 2) && + if ( control->hbond_cut > 0.1 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) && nbr_pj->d <= control->hbond_cut ) { jhb = sbp_j->p_hbond; - if ( ihb == 1 && jhb == 2 ) + if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) + { ++hb_top[i]; - else if ( j < system->n && ihb == 2 && jhb == 1 ) + } + else if ( j < system->n && ihb == H_BONDING_ATOM && jhb == H_ATOM ) { ++hb_top[j]; @@ -1573,8 +1577,8 @@ void Estimate_Storages( reax_system *system, control_params *control, #else void Estimate_Storages( reax_system *system, control_params *control, - reax_list **lists, int *Htop, int *hb_top, - int *bond_top, int *num_3body) + reax_list **lists, int *Htop, int *hb_top, int *bond_top, + int *num_3body ) { int i, j, pj; @@ -1608,16 +1612,16 @@ void Estimate_Storages( reax_system *system, control_params *control, if ( i < system->n ) { - local = 1; + local = TRUE; cutoff = control->nonb_cut; ++(*Htop); ihb = sbp_i->p_hbond; } else { - local = 0; + local = FALSE; cutoff = control->bond_cut; - ihb = -1; + ihb = NON_H_BONDING_ATOM; } for ( pj = start_i; pj < end_i; ++pj ) @@ -1633,22 +1637,26 @@ void Estimate_Storages( reax_system *system, control_params *control, r_ij = nbr_pj->d; sbp_j = &(system->reax_param.sbp[type_j]); //twbp = &(system->reax_param.tbp[type_i][type_j]); - twbp = &(system->reax_param.tbp[index_tbp (type_i, type_j, system->reax_param.num_atom_types)]); + twbp = &(system->reax_param.tbp[index_tbp(type_i, type_j, system->reax_param.num_atom_types)]); - if ( local ) + if ( local == TRUE ) { if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) //tryQEq ||1 ++(*Htop); /* hydrogen bond lists */ - if ( control->hbond_cut > 0.1 && (ihb == 1 || ihb == 2) && + if ( control->hbond_cut > 0.1 && (ihb == H_ATOM || ihb == H_BONDING_ATOM) && nbr_pj->d <= control->hbond_cut ) { jhb = sbp_j->p_hbond; - if ( ihb == 1 && jhb == 2 ) + if ( ihb == H_ATOM && jhb == H_BONDING_ATOM ) + { ++hb_top[i]; - else if ( j < system->n && ihb == 2 && jhb == 1 ) + } + else if ( j < system->n && ihb == H_BONDING_ATOM && jhb == H_ATOM ) + { ++hb_top[j]; + } } } @@ -1875,6 +1883,39 @@ int Cuda_Compute_Forces( reax_system *system, control_params *control, { retVal = Cuda_Init_Forces( system, control, data, workspace, lists, out_control ); fprintf( stderr, " [CUDA_INIT_FORCES: %d] STEP %d\n", retVal, data->step ); + +// int i; +// static reax_list **temp_lists; +// +// if ( data->step == 0 ) +// { +// temp_lists = (reax_list **) smalloc( LIST_N * sizeof (reax_list *), "temp_lists" ); +// for ( i = 0; i < LIST_N; ++i ) +// { +// temp_lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" ); +// temp_lists[i]->allocated = FALSE; +// } +// Make_List( (*dev_lists + BONDS)->n, (*dev_lists + BONDS)->num_intrs, +// TYP_BOND, *temp_lists + BONDS ); +// Make_List( (*dev_lists + HBONDS)->n, (*dev_lists + HBONDS)->num_intrs, +// TYP_HBOND, *temp_lists + HBONDS ); +// } +// else +// { +// Delete_List( *temp_lists + BONDS ); +// Make_List( (*dev_lists + BONDS)->n, (*dev_lists + BONDS)->num_intrs, +// TYP_BOND, *temp_lists + BONDS ); +// Delete_List( *temp_lists + HBONDS ); +// Make_List( (*dev_lists + HBONDS)->n, (*dev_lists + HBONDS)->num_intrs, +// TYP_HBOND, *temp_lists + HBONDS ); +// +// } +// Output_Sync_Lists( *temp_lists + BONDS, *dev_lists + BONDS, TYP_BOND ); +// Print_Bonds( system, temp_lists, control ); +// Output_Sync_Lists( *temp_lists + HBONDS, *dev_lists + HBONDS, TYP_HBOND ); +// Print_HBonds( system, temp_lists, control, data->step ); +// Print_HBond_Indices( system, temp_lists, control, data->step ); +// exit( 0 ); } else { diff --git a/PG-PuReMD/src/hydrogen_bonds.c b/PG-PuReMD/src/hydrogen_bonds.c index 493379ce..6021f392 100644 --- a/PG-PuReMD/src/hydrogen_bonds.c +++ b/PG-PuReMD/src/hydrogen_bonds.c @@ -20,41 +20,41 @@ ----------------------------------------------------------------------*/ #include "reax_types.h" + #include "index_utils.h" + #if defined(PURE_REAX) -#include "hydrogen_bonds.h" -#include "bond_orders.h" -#include "list.h" -#include "valence_angles.h" -#include "vector.h" + #include "hydrogen_bonds.h" + #include "bond_orders.h" + #include "list.h" + #include "valence_angles.h" + #include "vector.h" #elif defined(LAMMPS_REAX) -#include "reax_hydrogen_bonds.h" -#include "reax_bond_orders.h" -#include "reax_list.h" -#include "reax_valence_angles.h" -#include "reax_vector.h" + #include "reax_hydrogen_bonds.h" + #include "reax_bond_orders.h" + #include "reax_list.h" + #include "reax_valence_angles.h" + #include "reax_vector.h" #endif + // DANIEL // This function is taken straight from PuReMD, with minimal changes to accomodate the new datastructures // Attempting to fix ehb being way off in MPI_Not_GPU - void Hydrogen_Bonds( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int i, j, k, pi, pk; - int type_i, type_j, type_k; - int start_j, end_j, hb_start_j, hb_end_j; - int hblist[MAX_BONDS]; - int itr, top; - int num_hb_intrs = 0; + int i, j, k, pi, pk; + int type_i, type_j, type_k; + int start_j, end_j, hb_start_j, hb_end_j; + int hblist[MAX_BONDS]; + int itr, top; ivec rel_jk; real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2; real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3; rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk; rvec dvec_jk, force, ext_press; - // rtensor temp_rtensor, total_rtensor; hbond_parameters *hbp; bond_order_data *bo_ij; bond_data *pbond_ij; @@ -62,6 +62,9 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, reax_list *bonds, *hbonds; bond_data *bond_list; hbond_data *hbond_list; +#if defined(DEBUG) + int num_hb_intrs = 0; +#endif bonds = (*lists) + BONDS; bond_list = bonds->select.bond_list; @@ -69,22 +72,24 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, hbond_list = hbonds->select.hbond_list; /* loops below discover the Hydrogen bonds between i-j-k triplets. - here j is H atom and there has to be some bond between i and j. - Hydrogen bond is between j and k. - so in this function i->X, j->H, k->Z when we map - variables onto the ones in the handout.*/ + * here j is H atom and there has to be some bond between i and j. + * Hydrogen bond is between j and k. + * so in this function i->X, j->H, k->Z when we map + * variables onto the ones in the handout.*/ for ( j = 0; j < system->n; ++j ) + { /* j has to be of type H */ - if ( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == 1 ) + if ( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == H_ATOM ) { - /*set j's variables */ - type_j = system->my_atoms[j].type; - start_j = Start_Index(j, bonds); - end_j = End_Index(j, bonds); + /* set j's variables */ + type_j = system->my_atoms[j].type; + start_j = Start_Index( j, bonds ); + end_j = End_Index( j, bonds ); hb_start_j = Start_Index( system->my_atoms[j].Hindex, hbonds ); - hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds ); + hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds ); top = 0; + /* search bonded atoms to atom j (i.e., hydrogen atom) for potential hydrogen bonding */ for ( pi = start_j; pi < end_j; ++pi ) { pbond_ij = &( bond_list[pi] ); @@ -92,14 +97,17 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, bo_ij = &(pbond_ij->bo_data); type_i = system->my_atoms[i].type; - if ( system->reax_param.sbp[type_i].p_hbond == 2 && + if ( system->reax_param.sbp[type_i].p_hbond == H_BONDING_ATOM && bo_ij->BO >= HB_THRESHOLD ) + { hblist[top++] = pi; + } } // fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n", // j, top, hb_start_j, hb_end_j ); + /* for each hbond of atom j */ for ( pk = hb_start_j; pk < hb_end_j; ++pk ) { /* set k's varibles */ @@ -109,11 +117,12 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, r_jk = nbr_jk->d; rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec ); + /* find matching hbond to atom k */ for ( itr = 0; itr < top; ++itr ) { pi = hblist[itr]; - // DANIEL - // pbond_ij = &( bonds->bond_list[pi] ); + //DANIEL + //pbond_ij = &( bonds->bond_list[pi] ); pbond_ij = &( bonds->select.bond_list[pi] ); i = pbond_ij->nbr; @@ -122,35 +131,34 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, bo_ij = &(pbond_ij->bo_data); type_i = system->my_atoms[i].type; r_ij = pbond_ij->d; - //hbp = &(system->reax_param.hbp[ type_i ][ type_j ][ type_k ]); - // SUDHIR - hbp = &(system->reax_param.hbp[ index_hbp (type_i, type_j, type_k, system->reax_param.num_atom_types) ]); + hbp = &(system->reax_param.hbp[ index_hbp(type_i, type_j, type_k, system->reax_param.num_atom_types) ]); +#if defined(DEBUG) ++num_hb_intrs; +#endif Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, - &theta, &cos_theta ); + &theta, &cos_theta ); /* the derivative of cos(theta) */ Calculate_dCos_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, - &dcos_theta_di, &dcos_theta_dj, - &dcos_theta_dk ); + &dcos_theta_di, &dcos_theta_dj, &dcos_theta_dk ); - /* hyrogen bond energy*/ + /* hyrogen bond energy */ sin_theta2 = sin( theta / 2.0 ); sin_xhz4 = SQR(sin_theta2); sin_xhz4 *= sin_xhz4; cos_xhz1 = ( 1.0 - cos_theta ); exp_hb2 = exp( -hbp->p_hb2 * bo_ij->BO ); exp_hb3 = exp( -hbp->p_hb3 * ( hbp->r0_hb / r_jk + - r_jk / hbp->r0_hb - 2.0 ) ); + r_jk / hbp->r0_hb - 2.0 ) ); - data->my_en.e_hb += e_hb = - hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4; + e_hb = hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4; + data->my_en.e_hb += e_hb; CEhb1 = hbp->p_hb1 * hbp->p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4; CEhb2 = -hbp->p_hb1 / 2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1; CEhb3 = -hbp->p_hb3 * - (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb; + (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb; /*fprintf( stdout, "%6d%6d%6d%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f\n", @@ -175,7 +183,7 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, else { /* for pressure coupling, terms that are not related to bond order - derivatives are added directly into pressure vector/tensor */ + * derivatives are added directly into pressure vector/tensor */ rvec_Scale( force, +CEhb2, dcos_theta_di ); // dcos terms rvec_Add( workspace->f[i], force ); rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); @@ -212,6 +220,7 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, system->my_atoms[k].orig_id, r_jk, theta, bo_ij->BO, e_hb, data->my_en.e_hb ); #endif + #ifdef TEST_FORCES Add_dBO( system, lists, j, pi, +CEhb1, workspace->f_hb ); //dbo term // dcos terms @@ -226,31 +235,31 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, } } } + } #if defined(DEBUG) fprintf( stderr, "Number of hydrogen bonds: %d\n", num_hb_intrs ); fprintf( stderr, "Hydrogen Bond Energy: %g\n", data->my_en.e_hb ); fprintf( stderr, "hydbonds: ext_press (%24.15e %24.15e %24.15e)\n", - data->ext_press[0], data->ext_press[1], data->ext_press[2] ); + data->ext_press[0], data->ext_press[1], data->ext_press[2] ); #endif } + void Old_Hydrogen_Bonds( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control ) { int i, j, k, pi, pk; int type_i, type_j, type_k; int start_j, end_j, hb_start_j, hb_end_j; int hblist[MAX_BONDS]; int itr, top; - int num_hb_intrs = 0; ivec rel_jk; real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2; real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3; rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk; rvec dvec_jk, force, ext_press; - // rtensor temp_rtensor, total_rtensor; hbond_parameters *hbp; bond_order_data *bo_ij; bond_data *pbond_ij; @@ -258,6 +267,9 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control, reax_list *bonds, *hbonds; bond_data *bond_list; hbond_data *hbond_list; +#if defined(DEBUG) + int num_hb_intrs = 0; +#endif bonds = (*lists) + BONDS; bond_list = bonds->select.bond_list; @@ -265,20 +277,21 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control, hbond_list = hbonds->select.hbond_list; /* loops below discover the Hydrogen bonds between i-j-k triplets. - here j is H atom and there has to be some bond between i and j. - Hydrogen bond is between j and k. - so in this function i->X, j->H, k->Z when we map - variables onto the ones in the handout.*/ + * here j is H atom and there has to be some bond between i and j. + * Hydrogen bond is between j and k. + * so in this function i->X, j->H, k->Z when we map + * variables onto the ones in the handout.*/ for ( j = 0; j < system->n; ++j ) + { /* j has to be of type H */ - if ( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == 1 ) + if ( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == H_ATOM ) { /*set j's variables */ - type_j = system->my_atoms[j].type; - start_j = Start_Index(j, bonds); - end_j = End_Index(j, bonds); + type_j = system->my_atoms[j].type; + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); hb_start_j = Start_Index( system->my_atoms[j].Hindex, hbonds ); - hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds ); + hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds ); top = 0; for ( pi = start_j; pi < end_j; ++pi ) @@ -288,9 +301,11 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control, bo_ij = &(pbond_ij->bo_data); type_i = system->my_atoms[i].type; - if ( system->reax_param.sbp[type_i].p_hbond == 2 && + if ( system->reax_param.sbp[type_i].p_hbond == H_BONDING_ATOM && bo_ij->BO >= HB_THRESHOLD ) + { hblist[top++] = pi; + } } // fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n", @@ -318,33 +333,34 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control, r_ij = pbond_ij->d; //SUDHIR //hbp = &(system->reax_param.hbp[ type_i ][ type_j ][ type_k ]); - hbp = &(system->reax_param.hbp[ index_hbp (type_i, type_j, type_k, system->reax_param.num_atom_types) ]); + hbp = &(system->reax_param.hbp[ index_hbp(type_i, type_j, type_k, system->reax_param.num_atom_types) ]); + +#if defined(DEBUG) ++num_hb_intrs; +#endif Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, - &theta, &cos_theta ); + &theta, &cos_theta ); /* the derivative of cos(theta) */ Calculate_dCos_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, - &dcos_theta_di, &dcos_theta_dj, - &dcos_theta_dk ); + &dcos_theta_di, &dcos_theta_dj, &dcos_theta_dk ); - /* hyrogen bond energy*/ + /* hyrogen bond energy */ sin_theta2 = SIN( theta / 2.0 ); sin_xhz4 = SQR(sin_theta2); sin_xhz4 *= sin_xhz4; cos_xhz1 = ( 1.0 - cos_theta ); exp_hb2 = EXP( -hbp->p_hb2 * bo_ij->BO ); exp_hb3 = EXP( -hbp->p_hb3 * ( hbp->r0_hb / r_jk + - r_jk / hbp->r0_hb - 2.0 ) ); + r_jk / hbp->r0_hb - 2.0 ) ); - //data->my_en.e_hb += e_hb = hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4; - data->my_en.e_hb += 1; + data->my_en.e_hb += e_hb; CEhb1 = hbp->p_hb1 * hbp->p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4; CEhb2 = -hbp->p_hb1 / 2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1; CEhb3 = -hbp->p_hb3 * - (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb; + (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb; /*fprintf( stdout, "%6d%6d%6d%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f\n", @@ -406,6 +422,7 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control, system->my_atoms[k].orig_id, r_jk, theta, bo_ij->BO, e_hb, data->my_en.e_hb ); #endif + #ifdef TEST_FORCES Add_dBO( system, lists, j, pi, +CEhb1, workspace->f_hb ); //dbo term // dcos terms @@ -420,11 +437,12 @@ void Old_Hydrogen_Bonds( reax_system *system, control_params *control, } } } + } #if defined(DEBUG) fprintf( stderr, "Number of hydrogen bonds: %d\n", num_hb_intrs ); fprintf( stderr, "Hydrogen Bond Energy: %g\n", data->my_en.e_hb ); fprintf( stderr, "hydbonds: ext_press (%24.15e %24.15e %24.15e)\n", - data->ext_press[0], data->ext_press[1], data->ext_press[2] ); + data->ext_press[0], data->ext_press[1], data->ext_press[2] ); #endif } diff --git a/PG-PuReMD/src/index_utils.h b/PG-PuReMD/src/index_utils.h index 02677132..337e1844 100644 --- a/PG-PuReMD/src/index_utils.h +++ b/PG-PuReMD/src/index_utils.h @@ -45,20 +45,20 @@ static inline CUDA_HOST_DEVICE int index_tbp( int i, int j, int num_atom_types ) /* Indexing routine for three body parameters */ static inline CUDA_HOST_DEVICE int index_thbp( int i, int j, int k, int num_atom_types ) { - return (i * num_atom_types * num_atom_types ) + (j * num_atom_types ) + k; + return (i * num_atom_types * num_atom_types) + (j * num_atom_types) + k; } /* Indexing routine for hydrogen bonding parameters */ static inline CUDA_HOST_DEVICE int index_hbp( int i, int j, int k, int num_atom_types ) { - return (i * num_atom_types * num_atom_types ) + (j * num_atom_types ) + k; + return (i * num_atom_types * num_atom_types) + (j * num_atom_types) + k; } /* Indexing routine for four body parameters */ static inline CUDA_HOST_DEVICE int index_fbp( int i, int j, int k, int l, int num_atom_types ) { - return (i * num_atom_types * num_atom_types * num_atom_types ) + - (j * num_atom_types * num_atom_types ) + (k * num_atom_types ) + l; + return (i * num_atom_types * num_atom_types * num_atom_types) + + (j * num_atom_types * num_atom_types) + (k * num_atom_types) + l; } /* Indexing routine for LR table (force tabulation) */ diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c index 4eb65bbd..2d09ec4c 100644 --- a/PG-PuReMD/src/init_md.c +++ b/PG-PuReMD/src/init_md.c @@ -179,7 +179,7 @@ int Init_System( reax_system *system, control_params *control, { atom = &(system->my_atoms[i]); - if ( system->reax_param.sbp[ atom->type ].p_hbond == 1 ) + if ( system->reax_param.sbp[ atom->type ].p_hbond == H_ATOM ) { atom->Hindex = system->numH++; } @@ -200,7 +200,7 @@ int Init_System( reax_system *system, control_params *control, for ( i = 0; i < system->n; ++i ) { atom = &(system->my_atoms[i]); - if ( system->reax_param.sbp[ atom->type ].p_hbond == 1 ) + if ( system->reax_param.sbp[ atom->type ].p_hbond == H_ATOM ) atom->Hindex = system->numH++; else atom->Hindex = -1; } @@ -275,32 +275,15 @@ int Cuda_Init_System( reax_system *system, control_params *control, Bin_Boundary_Atoms( system ); fprintf( stderr, " [BIN_BOUNDARY_ATOMS]\n" ); - /* estimate numH and Hcap */ - system->numH = 0; - if ( control->hbond_cut > 0.0 ) - { - //TODO - //for( i = 0; i < system->n; ++i ) { - for ( i = 0; i < system->N; ++i ) - { - atom = &(system->my_atoms[i]); - atom->Hindex = i; - //FIX - 4 - Added fix for HBond Issue - if ( system->reax_param.sbp[ atom->type ].p_hbond == 1 ) - { - system->numH++; - } - //else atom->Hindex = -1; - } - } - system->Hcap = MAX( system->numH * SAFER_ZONE, MIN_CAP ); - /* Sync atoms here to continue the computation */ dev_alloc_system( system ); fprintf( stderr, " [DEV ALLOC SYSTEM]\n" ); Sync_System( system ); fprintf( stderr, " [SYNC SYSTEM]\n" ); + /* estimate numH and Hcap */ + Cuda_Reset_Atoms( system, control ); + #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: n=%d local_cap=%d\n", system->my_rank, system->n, system->local_cap ); @@ -312,8 +295,10 @@ int Cuda_Init_System( reax_system *system, control_params *control, Cuda_Compute_Total_Mass( system, data, mpi_data->comm_mesh3D ); fprintf( stderr, " [CUDA COMPUTE TOTAL MASS]\n" ); + Cuda_Compute_Center_of_Mass( system, data, mpi_data, mpi_data->comm_mesh3D ); fprintf( stderr, " [CUDA COMPUTE CENTER OF MASS]\n" ); + // if( Reposition_Atoms( system, control, data, mpi_data, msg ) == FAILURE ) // { // return FAILURE; @@ -915,20 +900,12 @@ int Cuda_Init_Lists( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, mpi_datatypes *mpi_data, char *msg ) { - int i, count, ret; - int total_hbonds, total_bonds, Htop; - int *hb_top; + int ret; + int Htop; - hb_top = (int*) scalloc( system->total_cap, sizeof(int), - "Cuda_Init_Lists::hb_top" ); - /* ignore returned error, as system->d_max_far_nbrs was not valid */ ret = Cuda_Estimate_Neighbors( system, data->step ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "DEVICE total neighbors entries: %d \n", system->total_far_nbrs ); -#endif - Dev_Make_List( system->total_cap, system->total_far_nbrs, TYP_FAR_NEIGHBOR, *dev_lists + FAR_NBRS ); @@ -944,8 +921,7 @@ int Cuda_Init_Lists( reax_system *system, control_params *control, Cuda_Generate_Neighbor_Lists( system, data, workspace, dev_lists ); /* estimate storage for bonds and hbonds */ - Cuda_Estimate_Storages( system, control, dev_lists, &Htop, hb_top, - data->step ); + Cuda_Estimate_Storages( system, control, dev_lists, &Htop, data->step ); /* estimate storage for charge sparse matrix */ Cuda_Estimate_Storage_Sparse_Matrix( system, control, data, dev_lists ); @@ -967,52 +943,23 @@ int Cuda_Init_Lists( reax_system *system, control_params *control, #endif // FIX - 4 - Added addition check here for hydrogen Bonds - if ( control->hbond_cut > 0.0 && system->numH > 0 ) + if ( control->hbond_cut > 0.0 && system->numH > 0 ) { - /* init H indexes */ - total_hbonds = 0; - count = 0; - - for ( i = 0; i < system->N; ++i ) - { - //system->my_atoms[i].num_hbonds = hb_top[i]; - //TODO - hb_top[i] = MAX( hb_top[i] * 4, MIN_HBONDS * 4); - total_hbonds += hb_top[i]; - if ( hb_top[i] > 0 ) - { - ++count; - } - } - total_hbonds = MAX( total_hbonds, MIN_CAP * MIN_HBONDS ); - - Dev_Make_List( system->total_cap, system->total_cap * - system->max_hbonds, TYP_HBOND, *dev_lists + HBONDS ); + Dev_Make_List( system->total_cap, system->total_hbonds, TYP_HBOND, *dev_lists + HBONDS ); +// Make_List( system->total_cap, system->total_hbonds, TYP_HBOND, *lists + HBONDS ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "**** Total HBonds allocated --> %d total_cap: %d per atom: %d, max_hbonds: %d \n", - total_hbonds, system->total_cap, (total_hbonds / - system->total_cap), system->max_hbonds ); -#endif - - //TODO - //Cuda_Init_HBond_Indices (hb_top, system->n); - /****/ - //THIS IS COMMENTED OUT - CHANGE ORIGINAL - //Cuda_Init_HBond_Indices (hb_top, system->N); - //THIS IS COMMENTED OUT - CHANGE ORIGINAL - /****/ + Cuda_Init_HBond_Indices( system ); #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated hbonds: total_hbonds=%d, space=%dMB\n", - system->my_rank, total_hbonds, - (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); + system->my_rank, system->total_hbonds, + (int)(system->total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); #endif } /* bonds list */ Dev_Make_List( system->total_cap, system->total_bonds, TYP_BOND, *dev_lists + BONDS ); - Make_List( system->total_cap, system->total_bonds, TYP_BOND, *lists + BONDS ); +// Make_List( system->total_cap, system->total_bonds, TYP_BOND, *lists + BONDS ); Cuda_Init_Bond_Indices( system ); @@ -1026,8 +973,6 @@ int Cuda_Init_Lists( reax_system *system, control_params *control, * of three body interactions requires that bond orders have * been computed, delay estimation until for computation */ - sfree( hb_top, "Cuda_Init_Lists::hb_top" ); - return SUCCESS; } #endif diff --git a/PG-PuReMD/src/integrate.c b/PG-PuReMD/src/integrate.c index 8537f50f..bfbbe1f7 100644 --- a/PG-PuReMD/src/integrate.c +++ b/PG-PuReMD/src/integrate.c @@ -345,7 +345,7 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con output_controls *out_control, mpi_datatypes *mpi_data ) { int i, steps, renbr, ret; - static int verlet_part1_done = FALSE; + static int verlet_part1_done = FALSE, estimate_nbrs_done = 0; real inv_m, dt, lambda; rvec dx; reax_atom *atom; @@ -414,13 +414,18 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con t_over_start = Get_Time (); #endif - //TODO: move far_nbrs reallocation checks outside of renbr frequency check - ret = Cuda_Estimate_Neighbors( system, data->step ); - fprintf( stderr, " [CUDA_ESTIMATE_NEIGHBORS: %d] STEP %d\n", ret, data->step ); + if ( estimate_nbrs_done == 0 ) + { + //TODO: move far_nbrs reallocation checks outside of renbr frequency check + ret = Cuda_Estimate_Neighbors( system, data->step ); + estimate_nbrs_done = 1; + fprintf( stderr, " [CUDA_ESTIMATE_NEIGHBORS: %d] STEP %d\n", ret, data->step ); + } - if ( ret == SUCCESS ) + if ( ret == SUCCESS && estimate_nbrs_done == 1 ) { Cuda_Generate_Neighbor_Lists( system, data, workspace, lists ); + estimate_nbrs_done = 2; #if defined(DEBUG) t_over_elapsed = Get_Timing_Info( t_over_start ); @@ -449,7 +454,6 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con #endif /* temperature scaler */ - //Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D ); Cuda_Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D ); fprintf( stderr, " [CUDA_COMPUTE_KINETIC_ENERGY] STEP %d\n", data->step ); @@ -476,6 +480,7 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con #endif verlet_part1_done = FALSE; + estimate_nbrs_done = 0; } return ret; @@ -484,8 +489,8 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con /* uses Berendsen-type coupling for both T and P. - All box dimensions are scaled by the same amount, - there is no change in the angles between axes. */ + * All box dimensions are scaled by the same amount, + * there is no change in the angles between axes. */ int Velocity_Verlet_Berendsen_NPT( reax_system* system, control_params* control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control, mpi_datatypes *mpi_data ) diff --git a/PG-PuReMD/src/io_tools.c b/PG-PuReMD/src/io_tools.c index 9393b642..f3a341db 100644 --- a/PG-PuReMD/src/io_tools.c +++ b/PG-PuReMD/src/io_tools.c @@ -846,7 +846,7 @@ void Print_My_Ext_Atoms( reax_system *system ) void Print_Far_Neighbors( reax_system *system, reax_list **lists, - control_params *control ) + control_params *control ) { char fname[100]; int i, j, id_i, id_j, nbr, natoms; @@ -1037,28 +1037,85 @@ void Print_Charges( reax_system *system ) } -void Print_Bonds( reax_system *system, reax_list *bonds, char *fname ) +void Print_HBonds( reax_system *system, reax_list **lists, + control_params *control, int step ) { - int i, j, pj; + int i, pj; + char fname[MAX_STR]; + hbond_data *phbond; + FILE *fout; + reax_list *hbonds = (*lists + HBONDS); + + sprintf( fname, "%s.hbonds.%d.%d", control->sim_name, step, system->my_rank ); + fout = fopen( fname, "w" ); + + for ( i = 0; i < system->N; ++i ) + { + for ( pj = Start_Index(i, hbonds); pj < End_Index(i, hbonds); ++pj ) + { + phbond = &(hbonds->select.hbond_list[pj]); + +// fprintf( fout, "%8d%8d %24.15e %24.15e %24.15e\n", i, phbond->nbr, +// phbond->ptr->dvec[0], phbond->ptr->dvec[1], phbond->ptr->dvec[2] ); + fprintf( fout, "%8d%8d %8d %8d\n", i, phbond->nbr, + phbond->scl, phbond->sym_index ); + } + } + + fclose( fout ); +} + + +void Print_HBond_Indices( reax_system *system, reax_list **lists, + control_params *control, int step ) +{ + int i; + char fname[MAX_STR]; + FILE *fout; + reax_list *hbonds = (*lists + HBONDS); + + sprintf( fname, "%s.hbonds_indices.%d.%d", control->sim_name, step, system->my_rank ); + fout = fopen( fname, "w" ); + + for ( i = 0; i < system->N; ++i ) + { + fprintf( fout, "%8d: start: %8d, end: %8d\n", + i, Start_Index(i, hbonds), End_Index(i, hbonds) ); + } + + fclose( fout ); +} + + +void Print_Bonds( reax_system *system, reax_list **lists, + control_params *control ) +{ + int i, pj; + char fname[MAX_STR]; bond_data *pbond; bond_order_data *bo_ij; - FILE *f = fopen( fname, "w" ); + FILE *fout; + reax_list *bonds = (*lists + BONDS); + + sprintf( fname, "%s.bonds.%d", control->sim_name, system->my_rank ); + fout = fopen( fname, "w" ); for ( i = 0; i < system->N; ++i ) + { for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { pbond = &(bonds->select.bond_list[pj]); bo_ij = &(pbond->bo_data); - j = pbond->nbr; - //fprintf( f, "%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", - // system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, - // pbond->d, bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); - fprintf( f, "%8d%8d %24.15f %24.15f\n", - i, j,//system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, - pbond->d, bo_ij->BO ); +// fprintf( fout, "%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", +// system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, +// pbond->d, bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); + fprintf( fout, "%8d%8d %24.15f %24.15f\n", + i, pbond->nbr, //system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, + pbond->d, bo_ij->BO ); } + } - fclose(f); + fclose( fout ); } diff --git a/PG-PuReMD/src/io_tools.h b/PG-PuReMD/src/io_tools.h index a0998ec5..789991c3 100644 --- a/PG-PuReMD/src/io_tools.h +++ b/PG-PuReMD/src/io_tools.h @@ -46,7 +46,9 @@ void Print_Sparse_Matrix2( reax_system*, sparse_matrix*, char* ); void Print_Linear_System( reax_system*, control_params*, storage*, int ); void Print_LinSys_Soln( reax_system*, real*, real*, real* ); void Print_Charges( reax_system* ); -void Print_Bonds( reax_system*, reax_list*, char* ); +void Print_HBonds( reax_system*, reax_list**, control_params *, int ); +void Print_HBond_Indices( reax_system*, reax_list**, control_params *, int ); +void Print_Bonds( reax_system*, reax_list**, control_params *); void Print_Bond_List2( reax_system*, reax_list*, char* ); void Print_Total_Force( reax_system*, simulation_data*, storage* ); void Output_Results( reax_system*, control_params*, simulation_data*, diff --git a/PG-PuReMD/src/parallelreax.c b/PG-PuReMD/src/parallelreax.c index 8a6dc67a..a3071712 100644 --- a/PG-PuReMD/src/parallelreax.c +++ b/PG-PuReMD/src/parallelreax.c @@ -337,6 +337,17 @@ int main( int argc, char* argv[] ) Generate_Neighbor_Lists( system, data, workspace, lists ); #endif +// reax_list **temp_lists = (reax_list **) smalloc( LIST_N * sizeof (reax_list *), "temp_lists" ); +// for ( i = 0; i < LIST_N; ++i ) +// { +// temp_lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" ); +// temp_lists[i]->allocated = FALSE; +// } +// Make_List( (*dev_lists + FAR_NBRS)->n, (*dev_lists + FAR_NBRS)->num_intrs, +// TYP_FAR_NEIGHBOR, (*temp_lists + FAR_NBRS) ); +// Output_Sync_Lists( (*temp_lists + FAR_NBRS), (*dev_lists + FAR_NBRS), TYP_FAR_NEIGHBOR ); +// Print_Far_Neighbors( system, temp_lists, control ); + #if defined(DEBUG) fprintf( stderr, "p%d: Cuda_Generate_Neighbor_Lists done...\n", system->my_rank ); #endif @@ -395,6 +406,7 @@ int main( int argc, char* argv[] ) if ( control->T_mode && retries == 0 ) { Temperature_Control( control, data ); + fprintf( stderr, " [TEMPERATURE CONTROL] STEP %d\n", data->step ); } #if defined(DEBUG) @@ -512,7 +524,7 @@ int main( int argc, char* argv[] ) //TODO: remove? /* allocate the cuda auxiliary data structures */ dev_workspace = (storage *) smalloc( sizeof(storage), "dev_workspace" ); - dev_lists = (reax_list **) smalloc ( LIST_N * sizeof (reax_list *), "dev_lists" ); + dev_lists = (reax_list **) smalloc( LIST_N * sizeof(reax_list *), "dev_lists" ); for ( i = 0; i < LIST_N; ++i ) { dev_lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" ); @@ -642,29 +654,22 @@ int main( int argc, char* argv[] ) fprintf( out_control->out, "Total Simulation Time: %.2f secs\n", t_elapsed ); } - // Write_PDB( &system, &(lists[BOND]), &out_control ); +// Write_PDB( &system, &(lists[BOND]), &out_control ); Close_Output_Files( system, control, out_control, mpi_data ); MPI_Finalize( ); - /* de-allocate data structures */ - //for( i = 0; i < LIST_N; ++i ) { - //if (lists[i]->index) free (lists[i]->index); - //if (lists[i]->end_index) free (lists[i]->end_index); - //if (lists[i]->select.v) free (lists[i]->select.v); - //free (lists[i] ); - //} - - free( system ); - free( control ); - free( data ); - free( workspace ); - free( lists ); - free( out_control ); - free( mpi_data ); + /* deallocate data structures */ + sfree( system, "main::system" ); + sfree( control, "main::control" ); + sfree( data, "main::data" ); + sfree( workspace, "main::workspace" ); + sfree( lists, "main::lists" ); + sfree( out_control, "main::out_control" ); + sfree( mpi_data, "main::mpi_data" ); #if defined(TEST_ENERGY) || defined(TEST_FORCES) -// Integrate_Results(control); +// Integrate_Results(control); #endif #if defined(DEBUG) diff --git a/PG-PuReMD/src/reax_types.h b/PG-PuReMD/src/reax_types.h index 4570d43f..2ca3be7c 100644 --- a/PG-PuReMD/src/reax_types.h +++ b/PG-PuReMD/src/reax_types.h @@ -292,7 +292,8 @@ enum restart_formats }; /* ensemble type */ -enum ensembles { +enum ensembles +{ NVE = 0, bNVT = 1, nhNVT = 2, @@ -385,20 +386,15 @@ enum gcell_types NATIVE = 8, }; -/* ??? */ -enum atoms +/* atom types as pertains to hydrogen bonding */ +enum hydrogen_bonding_atom_types { - C_ATOM = 0, + NON_H_BONDING_ATOM = -1, H_ATOM = 1, - O_ATOM = 2, - N_ATOM = 3, - S_ATOM = 4, - SI_ATOM = 5, - GE_ATOM = 6, - X_ATOM = 7, + H_BONDING_ATOM = 2, }; -/* ??? */ +/* trajectory file formats */ enum traj_methods { REG_TRAJ = 0, @@ -634,13 +630,13 @@ typedef struct { /* num. of global parameters, from the force field file */ int n_global; - /* global parameters */ + /* global parameters, see above mapping */ real* l; /* van der Waals interaction type, values: - * 0: ??? - * 1: ??? - * 2: ??? - * 3: ??? + * 0: none (???) + * 1: shielded Morse, no inner-wall + * 2: inner wall, no shielding + * 3: inner wall + shielding * */ int vdw_type; } global_parameters; @@ -684,8 +680,12 @@ typedef struct real chi; /**/ real eta; - /* 1 for H, 2 for hbonding atoms (O,S,P,N), 0 for others */ - int p_hbond; + /* info related to hydrogen bonding + * (values correspond to hydrogen_bonding_atom_types enum above): + * 0: non-hydrogen bonding atom + * 1: H atom + * 2: hydrogen bonding atom (e.g., O, S, P, N) */ + int p_hbond; /* Line three in field file */ /**/ @@ -1123,6 +1123,7 @@ typedef struct int bigN; /* num. hydrogen atoms */ int numH; + int *d_numH; /**/ int local_cap; /**/ diff --git a/PG-PuReMD/src/reset_tools.c b/PG-PuReMD/src/reset_tools.c index 59dffae5..a605cc79 100644 --- a/PG-PuReMD/src/reset_tools.c +++ b/PG-PuReMD/src/reset_tools.c @@ -42,13 +42,13 @@ void Reset_Atoms( reax_system* system, control_params *control ) reax_atom *atom; system->numH = 0; - if ( control->hbond_cut > 0 ) + if ( control->hbond_cut > 0.0 ) { //TODO for ( i = 0; i < system->N; ++i ) { atom = &(system->my_atoms[i]); - //if( system->reax_param.sbp[ atom->type ].p_hbond == 1 ) + //if( system->reax_param.sbp[ atom->type ].p_hbond == H_ATOM ) atom->Hindex = system->numH++; //else atom->Hindex = -1; } diff --git a/PG-PuReMD/src/traj.c b/PG-PuReMD/src/traj.c index 9761494a..d561a45f 100644 --- a/PG-PuReMD/src/traj.c +++ b/PG-PuReMD/src/traj.c @@ -79,13 +79,16 @@ int Set_My_Trajectory_View( MPI_File trj, int offset, MPI_Datatype etype, int Reallocate_Output_Buffer( output_controls *out_control, int req_space, - MPI_Comm comm ) + MPI_Comm comm ) { if ( out_control->buffer_len > 0 ) - free( out_control->buffer ); + { + sfree( out_control->buffer, "Reallocate_Output_Buffer::out_control->buffer" ); + } out_control->buffer_len = req_space * SAFE_ZONE; - out_control->buffer = (char*) malloc(out_control->buffer_len * sizeof(char)); + out_control->buffer = (char*) smalloc( out_control->buffer_len * sizeof(char), + "Reallocate_Output_Buffer::out_control->buffer" ); if ( out_control->buffer == NULL ) { fprintf( stderr, @@ -99,7 +102,7 @@ int Reallocate_Output_Buffer( output_controls *out_control, int req_space, void Write_Skip_Line( output_controls *out_control, mpi_datatypes *mpi_data, - int my_rank, int skip, int num_section ) + int my_rank, int skip, int num_section ) { MPI_Status status; @@ -131,33 +134,41 @@ int Write_Header( reax_system *system, control_params *control, { int num_hdr_lines, my_hdr_lines, buffer_req; MPI_Status status; - char ensembles[ens_N][25] = { "NVE", "NVT", "fully flexible NPT", - "semi isotropic NPT", "isotropic NPT" - }; - char reposition[3][25] = { "fit to periodic box", "CoM to center of box", - "CoM to origin" - }; - char t_regime[3][25] = { "T-coupling only", "step-wise", "constant slope" }; - - char traj_methods[TF_N][10] = { "custom", "xyz" }; - char atom_formats[8][40] = { "none", "invalid", "invalid", "invalid", - "xyz_q", - "xyz_q_fxfyfz", - "xyz_q_vxvyvz", - "detailed_atom_info" - }; - char bond_formats[3][30] = { "none", - "basic_bond_info", - "detailed_bond_info" - }; - char angle_formats[2][30] = { "none", "basic_angle_info" }; + char ensembles[ens_N][25] = { + "NVE", "NVT", "fully flexible NPT", + "semi isotropic NPT", "isotropic NPT", + }; + char reposition[3][25] = { + "fit to periodic box", "CoM to center of box", + "CoM to origin", + }; + char t_regime[3][25] = { + "T-coupling only", "step-wise", "constant slope", + }; + + char traj_methods[TF_N][10] = { + "custom", "xyz", + }; + char atom_formats[8][40] = { + "none", "invalid", "invalid", "invalid", + "xyz_q", "xyz_q_fxfyfz", "xyz_q_vxvyvz", + "detailed_atom_info", + }; + char bond_formats[3][30] = { + "none", "basic_bond_info", "detailed_bond_info", + }; + char angle_formats[2][30] = { + "none", "basic_angle_info", + }; /* set header lengths */ num_hdr_lines = NUM_HEADER_LINES; my_hdr_lines = num_hdr_lines * ( system->my_rank == MASTER_NODE ); buffer_req = my_hdr_lines * HEADER_LINE_LEN; if ( buffer_req > out_control->buffer_len * DANGER_ZONE ) + { Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world ); + } /* only the master node writes into trajectory header */ if ( system->my_rank == MASTER_NODE ) @@ -357,7 +368,7 @@ int Write_Header( reax_system *system, control_params *control, int Write_Init_Desc( reax_system *system, control_params *control, - output_controls *out_control, mpi_datatypes *mpi_data ) + output_controls *out_control, mpi_datatypes *mpi_data ) { int i, me, np, cnt, buffer_len, buffer_req; reax_atom *p_atom; @@ -372,11 +383,18 @@ int Write_Init_Desc( reax_system *system, control_params *control, system->bigN * INIT_DESC_LEN, system->bigN ); if ( out_control->traj_method == REG_TRAJ && me == MASTER_NODE ) + { buffer_req = system->bigN * INIT_DESC_LEN + 1; - else buffer_req = system->n * INIT_DESC_LEN + 1; + } + else + { + buffer_req = system->n * INIT_DESC_LEN + 1; + } if ( buffer_req > out_control->buffer_len * DANGER_ZONE ) + { Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world ); + } out_control->line[0] = 0; out_control->buffer[0] = 0; @@ -425,8 +443,7 @@ int Write_Init_Desc( reax_system *system, control_params *control, int Init_Traj( reax_system *system, control_params *control, - output_controls *out_control, mpi_datatypes *mpi_data, - char *msg ) + output_controls *out_control, mpi_datatypes *mpi_data, char *msg ) { char fname[MAX_STR]; int atom_line_len[ NR_OPT_ATOM ] = { 0, 0, 0, 0, @@ -450,7 +467,8 @@ int Init_Traj( reax_system *system, control_params *control, out_control->write_angles = ( out_control->angle_line_len ? 1 : 0 ); /* allocate line & buffer space */ - out_control->line = (char*) calloc( MAX_TRJ_LINE_LEN + 1, sizeof(char) ); + out_control->line = (char*) scalloc( MAX_TRJ_LINE_LEN + 1, sizeof(char), + "Init_Traj::out_control->line" ); out_control->buffer_len = 0; out_control->buffer = NULL; @@ -527,8 +545,8 @@ int Init_Traj( reax_system *system, control_params *control, int Write_Frame_Header( reax_system *system, control_params *control, - simulation_data *data, output_controls *out_control, - mpi_datatypes *mpi_data ) + simulation_data *data, output_controls *out_control, + mpi_datatypes *mpi_data ) { int me, num_frm_hdr_lines, my_frm_hdr_lines, buffer_req; MPI_Status status; @@ -539,7 +557,9 @@ int Write_Frame_Header( reax_system *system, control_params *control, my_frm_hdr_lines = num_frm_hdr_lines * ( me == MASTER_NODE ); buffer_req = my_frm_hdr_lines * HEADER_LINE_LEN; if ( buffer_req > out_control->buffer_len * DANGER_ZONE ) + { Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world ); + } /* only the master node writes into trajectory header */ if ( me == MASTER_NODE ) @@ -666,27 +686,33 @@ int Write_Frame_Header( reax_system *system, control_params *control, } - int Write_Atoms( reax_system *system, control_params *control, - output_controls *out_control, mpi_datatypes *mpi_data ) + output_controls *out_control, mpi_datatypes *mpi_data ) { int i, me, np, line_len, buffer_len, buffer_req, cnt; - MPI_Status status; - reax_atom *p_atom; + MPI_Status status; + reax_atom *p_atom; me = system->my_rank; np = control->nprocs; line_len = out_control->atom_line_len; Write_Skip_Line( out_control, mpi_data, me, - system->bigN * line_len, system->bigN ); + system->bigN * line_len, system->bigN ); if ( out_control->traj_method == REG_TRAJ && me == MASTER_NODE ) + { buffer_req = system->bigN * line_len + 1; - else buffer_req = system->n * line_len + 1; + } + else + { + buffer_req = system->n * line_len + 1; + } if ( buffer_req > out_control->buffer_len * DANGER_ZONE ) + { Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world ); + } /* fill in buffer */ out_control->line[0] = 0; @@ -761,8 +787,8 @@ int Write_Atoms( reax_system *system, control_params *control, } -int Write_Bonds(reax_system *system, control_params *control, reax_list *bonds, - output_controls *out_control, mpi_datatypes *mpi_data) +int Write_Bonds( reax_system *system, control_params *control, reax_list *bonds, + output_controls *out_control, mpi_datatypes *mpi_data ) { int i, j, pj, me, np; int my_bonds, num_bonds; @@ -790,11 +816,18 @@ int Write_Bonds(reax_system *system, control_params *control, reax_list *bonds, Write_Skip_Line( out_control, mpi_data, me, num_bonds * line_len, num_bonds ); if ( out_control->traj_method == REG_TRAJ && me == MASTER_NODE ) + { buffer_req = num_bonds * line_len + 1; - else buffer_req = my_bonds * line_len + 1; + } + else + { + buffer_req = my_bonds * line_len + 1; + } if ( buffer_req > out_control->buffer_len * DANGER_ZONE ) + { Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world ); + } /* fill in the buffer */ my_bonds = 0; @@ -868,8 +901,8 @@ int Write_Bonds(reax_system *system, control_params *control, reax_list *bonds, int Write_Angles( reax_system *system, control_params *control, - reax_list *bonds, reax_list *thb_intrs, - output_controls *out_control, mpi_datatypes *mpi_data ) + reax_list *bonds, reax_list *thb_intrs, + output_controls *out_control, mpi_datatypes *mpi_data ) { int i, j, k, pi, pk, me, np; int my_angles, num_angles; @@ -909,11 +942,18 @@ int Write_Angles( reax_system *system, control_params *control, Write_Skip_Line( out_control, mpi_data, me, num_angles * line_len, num_angles ); if ( out_control->traj_method == REG_TRAJ && me == MASTER_NODE ) + { buffer_req = num_angles * line_len + 1; - else buffer_req = my_angles * line_len + 1; + } + else + { + buffer_req = my_angles * line_len + 1; + } if ( buffer_req > out_control->buffer_len * DANGER_ZONE ) + { Reallocate_Output_Buffer( out_control, buffer_req, mpi_data->world ); + } /* fill in the buffer */ my_angles = 0; @@ -982,8 +1022,8 @@ int Write_Angles( reax_system *system, control_params *control, int Append_Frame( reax_system *system, control_params *control, - simulation_data *data, reax_list **lists, - output_controls *out_control, mpi_datatypes *mpi_data ) + simulation_data *data, reax_list **lists, + output_controls *out_control, mpi_datatypes *mpi_data ) { #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: appending frame %d\n", system->my_rank, data->step ); @@ -1028,12 +1068,16 @@ int Append_Frame( reax_system *system, control_params *control, int End_Traj( int my_rank, output_controls *out_control ) { if ( out_control->traj_method == MPI_TRAJ ) + { MPI_File_close( &(out_control->trj) ); + } else if ( my_rank == MASTER_NODE ) + { fclose( out_control->strj ); + } - free( out_control->buffer ); - free( out_control->line ); + sfree( out_control->buffer, "End_Traj::out_control->buffer" ); + sfree( out_control->line, "End_Traj::out_control->line" ); return SUCCESS; } diff --git a/PG-PuReMD/src/traj.h b/PG-PuReMD/src/traj.h index 7ce82e0f..8f09c4a7 100644 --- a/PG-PuReMD/src/traj.h +++ b/PG-PuReMD/src/traj.h @@ -22,8 +22,10 @@ #ifndef __TRAJ_H__ #define __TRAJ_H__ + #include "reax_types.h" + #define MAX_TRJ_LINE_LEN 120 #define MAX_TRJ_BUFFER_SIZE (MAX_TRJ_LINE_LEN * 100) @@ -62,16 +64,28 @@ #define ANGLE_BASIC "%9d%9d%9d%10.3f\n" // Atom1 Atom2 Atom3 Theta #define ANGLE_BASIC_LEN 38 -enum ATOM_LINE_OPTS { OPT_NOATOM = 0, OPT_ATOM_BASIC = 4, OPT_ATOM_wF = 5, OPT_ATOM_wV = 6, OPT_ATOM_FULL = 7, NR_OPT_ATOM = 8 }; -enum BOND_LINE_OPTS { OPT_NOBOND, OPT_BOND_BASIC, OPT_BOND_FULL, NR_OPT_BOND }; -enum ANGLE_LINE_OPTS { OPT_NOANGLE, OPT_ANGLE_BASIC, NR_OPT_ANGLE }; +enum ATOM_LINE_OPTS +{ + OPT_NOATOM = 0, OPT_ATOM_BASIC = 4, OPT_ATOM_wF = 5, OPT_ATOM_wV = 6, + OPT_ATOM_FULL = 7, NR_OPT_ATOM = 8, +}; +enum BOND_LINE_OPTS +{ + OPT_NOBOND = 0, OPT_BOND_BASIC = 1, OPT_BOND_FULL = 2, NR_OPT_BOND = 3, +}; +enum ANGLE_LINE_OPTS +{ + OPT_NOANGLE = 0, OPT_ANGLE_BASIC = 1, NR_OPT_ANGLE = 2, +}; + + +int Init_Traj( reax_system*, control_params*, output_controls*, mpi_datatypes*, char* ); + +int End_Traj( int, output_controls* ); -int Init_Traj( reax_system*, control_params*, output_controls*, - mpi_datatypes*, char* ); -int End_Traj( int, output_controls* ); +int Append_Frame( reax_system*, control_params*, simulation_data*, reax_list**, + output_controls*, mpi_datatypes* ); -int Append_Frame( reax_system*, control_params*, simulation_data*, - reax_list**, output_controls*, mpi_datatypes* ); #endif -- GitLab