diff --git a/.gitignore b/.gitignore index 78b4824986d629e57c707d0a78fb375efb441428..fd8f967c419e269fb5a95b5521100fb17e46ed6a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ +# PuReMD *.log +*.prs *.pot *.trj diff --git a/PG-PuReMD/Makefile.am b/PG-PuReMD/Makefile.am index 3b051035263dfedbade53966fa45ee9d62da7775..149e16de9b1d84f601590884e39926dc708e02de 100644 --- a/PG-PuReMD/Makefile.am +++ b/PG-PuReMD/Makefile.am @@ -35,7 +35,7 @@ include_HEADERS = src/reax_types.h src/index_utils.h \ if USE_CUDA bin_pg_puremd_SOURCES += src/cuda/cuda_utils.cu src/cuda/cuda_allocate.cu src/cuda/cuda_environment.cu \ - src/cuda/cuda_system_props.cu src/cuda/cuda_reduction.cu \ + src/cuda/cuda_system_props.cu src/cuda/cuda_reduction.cu src/cuda/cuda_box.cu \ src/cuda/cuda_copy.cu src/cuda/cuda_reset_tools.cu src/cuda/cuda_list.cu \ src/cuda/cuda_neighbors.cu src/cuda/cuda_bond_orders.cu src/cuda/cuda_bonds.cu \ src/cuda/cuda_multi_body.cu src/cuda/cuda_valence_angles.cu \ @@ -45,7 +45,7 @@ bin_pg_puremd_SOURCES += src/cuda/cuda_utils.cu src/cuda/cuda_allocate.cu src/cu src/cuda/cuda_init_md.cu src/cuda/cuda_validation.cu src/cuda/cuda_lookup.cu include_HEADERS += src/cuda/cuda_helpers.h src/cuda/cuda_shuffle.h \ src/cuda/cuda_utils.h src/cuda/cuda_allocate.h src/cuda/cuda_environment.h \ - src/cuda/cuda_system_props.h src/cuda/cuda_reduction.h \ + src/cuda/cuda_system_props.h src/cuda/cuda_reduction.h src/cuda/cuda_box.h \ src/cuda/cuda_copy.h src/cuda/cuda_reset_tools.h src/cuda/cuda_list.h \ src/cuda/cuda_neighbors.h src/cuda/cuda_bond_orders.h src/cuda/cuda_bonds.h \ src/cuda/cuda_multi_body.h src/cuda/cuda_valence_angles.h \ diff --git a/PG-PuReMD/src/box.h b/PG-PuReMD/src/box.h index 00e51d063a1126c662f717df938787178ff77549..fa92c72275d2acfad2258a09cc288e0203a8f5bf 100644 --- a/PG-PuReMD/src/box.h +++ b/PG-PuReMD/src/box.h @@ -29,14 +29,16 @@ extern "C" { #endif +void Make_Consistent( simulation_box* ); + /* initializes simulation boxes */ void Setup_Big_Box( real, real, real, real, real, real, simulation_box* ); void Init_Box( rtensor, simulation_box* ); -//void Setup_My_Box( reax_system*, control_params* ); +void Setup_My_Box( reax_system*, control_params* ); -//void Setup_My_Ext_Box( reax_system*, control_params* ); +void Setup_My_Ext_Box( reax_system*, control_params* ); void Setup_Environment( reax_system*, control_params*, mpi_datatypes* ); diff --git a/PG-PuReMD/src/cuda/cuda_allocate.cu b/PG-PuReMD/src/cuda/cuda_allocate.cu index 5c722e56d816a420a93f3ac03c1994f58a1d53c5..e85b4eaee10deabf763901c6b67358cde9a8eaf2 100644 --- a/PG-PuReMD/src/cuda/cuda_allocate.cu +++ b/PG-PuReMD/src/cuda/cuda_allocate.cu @@ -80,46 +80,46 @@ void dev_alloc_grid( reax_system *system ) cuda_malloc( (void **) &device->rel_box, sizeof(ivec) * total, TRUE, "dev_alloc_grid::grid->rel_box" ); - /* - int block_size = 512; - int blocks = (host->max_nbrs) / block_size + ((host->max_nbrs) % block_size == 0 ? 0 : 1); - - Init_Nbrs <<<blocks, block_size>>> - (nbrs_x, host->max_nbrs ); - cudaThreadSynchronize (); - cudaCheckError (); - - cuda_malloc ((void **)& device->cells, - sizeof (grid_cell) * total, - TRUE, "grid:cells"); - fprintf (stderr, " Device cells address --> %ld \n", device->cells ); - cuda_malloc ((void **) &device->order, sizeof (ivec) * (host->total + 1), TRUE, "grid:order"); - - local_cell.top = local_cell.mark = local_cell.str = local_cell.end = 0; - fprintf (stderr, "Total cells to be allocated -- > %d \n", total ); - for (int i = 0; i < total; i++) { - //fprintf (stderr, "Address of the local atom -> %ld \n", &local_cell); - - cuda_malloc ((void **) &local_cell.atoms, sizeof (int) * host->max_atoms, - TRUE, "alloc:grid:cells:atoms"); - //fprintf (stderr, "Allocated address of the atoms --> %ld (%d)\n", local_cell.atoms, host->max_atoms ); - - cuda_malloc ((void **) &local_cell.nbrs_x, sizeof (ivec) * host->max_nbrs, - TRUE, "alloc:grid:cells:nbrs_x" ); - copy_device (local_cell.nbrs_x, nbrs_x, host->max_nbrs * sizeof (ivec), "grid:nbrs_x"); - //fprintf (stderr, "Allocated address of the nbrs_x--> %ld \n", local_cell.nbrs_x); - - cuda_malloc ((void **) &local_cell.nbrs_cp, sizeof (rvec) * host->max_nbrs, - TRUE, "alloc:grid:cells:nbrs_cp" ); - //fprintf (stderr, "Allocated address of the nbrs_cp--> %ld \n", local_cell.nbrs_cp); - - //cuda_malloc ((void **) &local_cell.nbrs, sizeof (grid_cell *) * host->max_nbrs , - // TRUE, "alloc:grid:cells:nbrs" ); - //fprintf (stderr, "Allocated address of the nbrs--> %ld \n", local_cell.nbrs); - - copy_host_device (&local_cell, &device->cells[i], sizeof (grid_cell), cudaMemcpyHostToDevice, "grid:cell-alloc"); - } - */ +// int block_size = 512; +// int blocks = (host->max_nbrs) / block_size + ((host->max_nbrs) % block_size == 0 ? 0 : 1); +// +// Init_Nbrs <<< blocks, block_size >>> +// ( nbrs_x, host->max_nbrs ); +// cudaThreadSynchronize( ); +// cudaCheckError( ); +// +// cuda_malloc( (void **)& device->cells, sizeof(grid_cell) * total, +// TRUE, "grid:cells"); +// fprintf( stderr, " Device cells address --> %ld \n", device->cells ); +// cuda_malloc( (void **) &device->order, +// sizeof(ivec) * (host->total + 1), TRUE, "grid:order" ); +// +// local_cell.top = local_cell.mark = local_cell.str = local_cell.end = 0; +// fprintf( stderr, "Total cells to be allocated -- > %d \n", total ); +// for (int i = 0; i < total; i++) +// { +// //fprintf( stderr, "Address of the local atom -> %ld \n", &local_cell ); +// +// cuda_malloc( (void **) &local_cell.atoms, sizeof(int) * host->max_atoms, +// TRUE, "alloc:grid:cells:atoms" ); +// //fprintf( stderr, "Allocated address of the atoms --> %ld (%d)\n", local_cell.atoms, host->max_atoms ); +// +// cuda_malloc( (void **) &local_cell.nbrs_x, sizeof(ivec) * host->max_nbrs, +// TRUE, "alloc:grid:cells:nbrs_x" ); +// copy_device( local_cell.nbrs_x, nbrs_x, host->max_nbrs * sizeof(ivec), "grid:nbrs_x" ); +// //fprintf( stderr, "Allocated address of the nbrs_x--> %ld \n", local_cell.nbrs_x ); +// +// cuda_malloc( (void **) &local_cell.nbrs_cp, sizeof(rvec) * host->max_nbrs, +// TRUE, "alloc:grid:cells:nbrs_cp" ); +// //fprintf( stderr, "Allocated address of the nbrs_cp--> %ld \n", local_cell.nbrs_cp ); +// +// //cuda_malloc( (void **) &local_cell.nbrs, sizeof(grid_cell *) * host->max_nbrs, +// // TRUE, "alloc:grid:cells:nbrs" ); +// //fprintf( stderr, "Allocated address of the nbrs--> %ld \n", local_cell.nbrs ); +// +// copy_host_device( &local_cell, &device->cells[i], sizeof(grid_cell), +// cudaMemcpyHostToDevice, "grid:cell-alloc" ); +// } } @@ -757,7 +757,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, if ( renbr && realloc->gcell_atoms > -1 ) { #if defined(DEBUG_FOCUS) - fprintf(stderr, "reallocating gcell: g->max_atoms: %d\n", g->max_atoms); + fprintf( stderr, "reallocating gcell: g->max_atoms: %d\n", g->max_atoms ); #endif for ( i = g->native_str[0]; i < g->native_end[0]; i++ ) @@ -769,7 +769,7 @@ void Cuda_ReAllocate( reax_system *system, control_params *control, // reallocate g->atoms sfree( g->cells[ index_grid_3d(i,j,k,g) ].atoms, "g:atoms" ); g->cells[ index_grid_3d(i,j,k,g) ].atoms = (int*) - scalloc(realloc->gcell_atoms, sizeof(int), "g:atoms"); + scalloc( realloc->gcell_atoms, sizeof(int), "g:atoms" ); } } } diff --git a/PG-PuReMD/src/cuda/cuda_box.cu b/PG-PuReMD/src/cuda/cuda_box.cu new file mode 100644 index 0000000000000000000000000000000000000000..2d5f47566f2543a21f6cf4598f30b527913a1bf6 --- /dev/null +++ b/PG-PuReMD/src/cuda/cuda_box.cu @@ -0,0 +1,118 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "cuda_box.h" + +#include "cuda_integrate.h" +#include "cuda_system_props.h" +#include "cuda_utils.h" + +#include "../box.h" +#include "../comm_tools.h" + + +void Cuda_Scale_Box( reax_system *system, control_params *control, + simulation_data *data, mpi_datatypes *mpi_data ) +{ + int d; + real dt, lambda; + rvec mu = {0.0, 0.0, 0.0}; + + dt = control->dt; + + /* pressure scaler */ + if ( control->ensemble == iNPT ) + { + mu[0] = POW( 1.0 + (dt / control->Tau_P[0]) * (data->iso_bar.P - control->P[0]), + 1.0 / 3.0 ); + + if ( mu[0] < MIN_dV ) + { + mu[0] = MIN_dV; + } + else if ( mu[0] > MAX_dV ) + { + mu[0] = MAX_dV; + } + + mu[1] = mu[0]; + mu[2] = mu[1]; + } + else if ( control->ensemble == sNPT ) + { + for ( d = 0; d < 3; ++d ) + { + mu[d] = POW(1.0 + (dt / control->Tau_P[d]) * (data->tot_press[d] - control->P[d]), + 1. / 3 ); + + if ( mu[d] < MIN_dV ) + { + mu[d] = MIN_dV; + } + else if ( mu[d] > MAX_dV ) + { + mu[d] = MAX_dV; + } + } + } + + /* temperature scaler */ + lambda = 1.0 + (dt / control->Tau_T) * (control->T / data->therm.T - 1.0); + if ( lambda < MIN_dT ) + { + lambda = MIN_dT; + } + else if (lambda > MAX_dT ) + { + lambda = MAX_dT; + } + lambda = SQRT( lambda ); + + /* Scale velocities and positions at t+dt */ + bNVP_scale_velocities( system, lambda, mu ); + + Cuda_Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D ); + +#if defined(DEBUG) + fprintf( stderr, "damping - " ); +#endif + + /* update box & grid */ + system->big_box.box[0][0] *= mu[0]; + system->big_box.box[1][1] *= mu[1]; + system->big_box.box[2][2] *= mu[2]; + + Make_Consistent( &(system->big_box) ); + Setup_My_Box( system, control ); + Setup_My_Ext_Box( system, control ); + Update_Comm( system ); + + copy_host_device( &system->big_box, &system->d_big_box, + sizeof(simulation_box), cudaMemcpyHostToDevice, "Cuda_Scale_Box::simulation_data->big_box" ); + copy_host_device( &system->my_box, &system->d_my_box, + sizeof(simulation_box), cudaMemcpyHostToDevice, "Cuda_Scale_Box::simulation_data->my_box" ); + copy_host_device( &system->my_ext_box, &system->d_my_ext_box, + sizeof(simulation_box), cudaMemcpyHostToDevice, "Cuda_Scale_Box::simulation_data->my_ext_box" ); + +#if defined(DEBUG) + fprintf( stderr, "box & grid updated - " ); +#endif +} diff --git a/PG-PuReMD/src/cuda/cuda_box.h b/PG-PuReMD/src/cuda/cuda_box.h new file mode 100644 index 0000000000000000000000000000000000000000..6db597385884cbaf4efe81bb83674c5ab2e48786 --- /dev/null +++ b/PG-PuReMD/src/cuda/cuda_box.h @@ -0,0 +1,40 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __CUDA_BOX_H__ +#define __CUDA_BOX_H__ + +#include "../reax_types.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +void Cuda_Scale_Box( reax_system *, control_params *, + simulation_data *, mpi_datatypes *); + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/PG-PuReMD/src/cuda/cuda_charges.cu b/PG-PuReMD/src/cuda/cuda_charges.cu index ada6bf2f75c7960e1b00e820bf8387cbd98199d2..72018b1d936a472e2c78d78784306283f5c33ffb 100644 --- a/PG-PuReMD/src/cuda/cuda_charges.cu +++ b/PG-PuReMD/src/cuda/cuda_charges.cu @@ -220,7 +220,7 @@ void Cuda_Calculate_Charges( reax_system *system, storage *workspace, my_sum[1] = 0.0; scale = sizeof(real) / sizeof(void); q = (real *) host_scratch; - memset( q, 0, system->N * sizeof (real)); + memset( q, 0, system->N * sizeof(real) ); cuda_charges_x( system, my_sum ); @@ -270,9 +270,9 @@ void Cuda_QEq( reax_system *system, control_params *control, simulation_data #endif //MATRIX CHANGES - s_matvecs = Cuda_dual_CG(system, workspace, &dev_workspace->H, + s_matvecs = Cuda_dual_CG( system, workspace, &dev_workspace->H, dev_workspace->b, control->q_err, dev_workspace->x, mpi_data, - out_control->log, data); + out_control->log, data ); t_matvecs = 0; //fprintf (stderr, "Device: First CG complated with iterations: %d \n", s_matvecs); diff --git a/PG-PuReMD/src/cuda/cuda_copy.cu b/PG-PuReMD/src/cuda/cuda_copy.cu index 42055875624ce003a3f2b4ba6661d3bc1e08eb8e..1098d197ffdce8c39a51fe4ff5033eacd13beeb4 100644 --- a/PG-PuReMD/src/cuda/cuda_copy.cu +++ b/PG-PuReMD/src/cuda/cuda_copy.cu @@ -32,18 +32,18 @@ void Sync_Grid( grid *host, grid *device ) ivec_Copy( device->ghost_bond_span, host->ghost_bond_span ); copy_host_device( host->str, device->str, sizeof(int) * total, - cudaMemcpyHostToDevice, "grid:str"); + cudaMemcpyHostToDevice, "grid:str" ); copy_host_device( host->end, device->end, sizeof(int) * total, - cudaMemcpyHostToDevice, "grid:end"); + cudaMemcpyHostToDevice, "grid:end" ); copy_host_device( host->cutoff, device->cutoff, sizeof(real) * total, - cudaMemcpyHostToDevice, "grid:cutoff"); + cudaMemcpyHostToDevice, "grid:cutoff" ); copy_host_device( host->nbrs_x, device->nbrs_x, sizeof(ivec) * total * - host->max_nbrs, cudaMemcpyHostToDevice, "grid:nbrs_x"); + host->max_nbrs, cudaMemcpyHostToDevice, "grid:nbrs_x" ); copy_host_device( host->nbrs_cp, device->nbrs_cp, sizeof(rvec) * total * - host->max_nbrs, cudaMemcpyHostToDevice, "grid:nbrs_cp"); + host->max_nbrs, cudaMemcpyHostToDevice, "grid:nbrs_cp" ); copy_host_device( host->rel_box, device->rel_box, sizeof(ivec) * total, - cudaMemcpyHostToDevice, "grid:rel_box"); + cudaMemcpyHostToDevice, "grid:rel_box" ); device->max_nbrs = host->max_nbrs; } diff --git a/PG-PuReMD/src/cuda/cuda_forces.cu b/PG-PuReMD/src/cuda/cuda_forces.cu index a790b1a8c7fd570723c27c985c458886f0dcb478..553acbd06d26052379bd6a35817bd51683c4a1b9 100644 --- a/PG-PuReMD/src/cuda/cuda_forces.cu +++ b/PG-PuReMD/src/cuda/cuda_forces.cu @@ -1554,12 +1554,12 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, /* reduction for ext_pres */ rvec_spad = (rvec *) (spad + 6 * system->N); - k_reduction_rvec <<<BLOCKS_N, BLOCK_SIZE, sizeof(rvec) * BLOCK_SIZE >>> + k_reduction_rvec <<< BLOCKS_N, BLOCK_SIZE, sizeof(rvec) * BLOCK_SIZE >>> ( rvec_spad, rvec_spad + system->N, system->N ); cudaThreadSynchronize( ); cudaCheckError( ); - k_reduction_rvec <<<1, BLOCKS_POW_2_N, sizeof(rvec) * BLOCKS_POW_2_N >>> + k_reduction_rvec <<< 1, BLOCKS_POW_2_N, sizeof(rvec) * BLOCKS_POW_2_N >>> ( rvec_spad + system->N, &((simulation_data *)data->d_simulation_data)->my_ext_press, BLOCKS_N ); cudaThreadSynchronize (); cudaCheckError( ); @@ -1692,7 +1692,7 @@ int Cuda_Compute_Bonded_Forces( reax_system *system, control_params *control, ( system->d_my_atoms, *dev_workspace, *(*dev_lists + HBONDS) ); // Cuda_Hydrogen_Bonds_HNbrs_BL <<< hnbrs_blocks, HB_POST_PROC_BLOCK_SIZE, // HB_POST_PROC_BLOCK_SIZE * sizeof(rvec) >>> - ( system->d_my_atoms, *dev_workspace, *(*dev_lists + HBONDS), system->N ); +// ( system->d_my_atoms, *dev_workspace, *(*dev_lists + HBONDS), system->N ); cudaThreadSynchronize( ); cudaCheckError( ); diff --git a/PG-PuReMD/src/cuda/cuda_init_md.cu b/PG-PuReMD/src/cuda/cuda_init_md.cu index fb1ac0df5abdccc5c021ee594caf4c34fe3dcd22..2bb2f89e7b76315462927b659c00efe8102b56c0 100644 --- a/PG-PuReMD/src/cuda/cuda_init_md.cu +++ b/PG-PuReMD/src/cuda/cuda_init_md.cu @@ -17,7 +17,6 @@ #include "../comm_tools.h" #include "../grid.h" #include "../init_md.h" - #include "../integrate.h" #include "../io_tools.h" #ifdef __cplusplus extern "C" { @@ -35,7 +34,6 @@ extern "C" { #include "../reax_comm_tools.h" #include "../reax_grid.h" #include "../reax_init_md.h" - #include "../reax_integrate.h" #include "../reax_io_tools.h" #include "../reax_list.h" #include "../reax_lookup.h" @@ -138,7 +136,7 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, { case NVE: data->N_f = 3 * system->bigN; - Cuda_Evolve = Velocity_Verlet_NVE; + Cuda_Evolve = Cuda_Velocity_Verlet_NVE; control->virial = 0; break; @@ -151,7 +149,7 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, case nhNVT: fprintf( stderr, "[WARNING] Nose-Hoover NVT is still under testing.\n" ); data->N_f = 3 * system->bigN + 1; - Cuda_Evolve = Velocity_Verlet_Nose_Hoover_NVT_Klein; + Cuda_Evolve = Cuda_Velocity_Verlet_Nose_Hoover_NVT_Klein; control->virial = 0; if ( !control->restart || (control->restart && control->random_vel) ) { @@ -165,7 +163,7 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, case sNPT: /* Semi-Isotropic NPT */ data->N_f = 3 * system->bigN + 4; - Cuda_Evolve = Velocity_Verlet_Berendsen_NPT; + Cuda_Evolve = Cuda_Velocity_Verlet_Berendsen_NPT; control->virial = 1; if ( !control->restart ) { @@ -175,7 +173,7 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, case iNPT: /* Isotropic NPT */ data->N_f = 3 * system->bigN + 2; - Cuda_Evolve = Velocity_Verlet_Berendsen_NPT; + Cuda_Evolve = Cuda_Velocity_Verlet_Berendsen_NPT; control->virial = 1; if ( !control->restart ) { @@ -185,7 +183,7 @@ void Cuda_Init_Simulation_Data( reax_system *system, control_params *control, case NPT: /* Anisotropic NPT */ data->N_f = 3 * system->bigN + 9; - Cuda_Evolve = Velocity_Verlet_Berendsen_NPT; + Cuda_Evolve = Cuda_Velocity_Verlet_Berendsen_NPT; control->virial = 1; fprintf( stderr, "p%d: init_simulation_data: option not yet implemented\n", diff --git a/PG-PuReMD/src/cuda/cuda_integrate.cu b/PG-PuReMD/src/cuda/cuda_integrate.cu index dcb972921f735d419e6528a390ed990699c57d2c..772032632b51031dd0abe8bec187959a1080ad4b 100644 --- a/PG-PuReMD/src/cuda/cuda_integrate.cu +++ b/PG-PuReMD/src/cuda/cuda_integrate.cu @@ -2,10 +2,12 @@ #include "cuda_integrate.h" #include "cuda_allocate.h" +#include "cuda_box.h" #include "cuda_forces.h" #include "cuda_integrate.h" #include "cuda_copy.h" #include "cuda_neighbors.h" +#include "cuda_reduction.h" #include "cuda_reset_tools.h" #include "cuda_system_props.h" #include "cuda_utils.h" @@ -21,7 +23,9 @@ CUDA_GLOBAL void k_update_velocity_1( reax_atom *my_atoms, real inv_m; rvec dx; reax_atom *atom; - int i = blockIdx.x * blockDim.x + threadIdx.x; + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; if ( i >= n ) { @@ -39,14 +43,15 @@ CUDA_GLOBAL void k_update_velocity_1( reax_atom *my_atoms, } -void bNVT_update_velocity_part1( reax_system *system, real dt ) +void update_velocity_part1( reax_system *system, real dt ) { int blocks; blocks = system->n / DEF_BLOCK_SIZE + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); + k_update_velocity_1 <<< blocks, DEF_BLOCK_SIZE >>> - (system->d_my_atoms, system->reax_param.d_sbp, dt, system->n); + ( system->d_my_atoms, system->reax_param.d_sbp, dt, system->n ); cudaThreadSynchronize( ); cudaCheckError( ); } @@ -57,7 +62,9 @@ CUDA_GLOBAL void k_update_velocity_2( reax_atom *my_atoms, { reax_atom *atom; real inv_m; - int i = blockIdx.x * blockDim.x + threadIdx.x; + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; if ( i >= n ) { @@ -72,20 +79,140 @@ CUDA_GLOBAL void k_update_velocity_2( reax_atom *my_atoms, } -void bNVT_update_velocity_part2( reax_system *system, real dt ) +void update_velocity_part2( reax_system *system, real dt ) { int blocks; blocks = system->n / DEF_BLOCK_SIZE + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); + k_update_velocity_2 <<< blocks, DEF_BLOCK_SIZE >>> - (system->d_my_atoms, system->reax_param.d_sbp, dt, system->n); + ( system->d_my_atoms, system->reax_param.d_sbp, dt, system->n ); + cudaThreadSynchronize( ); + cudaCheckError( ); +} + + +CUDA_GLOBAL void k_nhNVT_update_velocity_1( reax_atom *my_atoms, + single_body_parameters *sbp, real dt, int n ) +{ + real inv_m; + rvec dx; + reax_atom *atom; + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= n ) + { + return; + } + + /* velocity verlet, 1st part */ + atom = &(my_atoms[i]); + inv_m = 1.0 / sbp[atom->type].mass; + rvec_ScaledSum( dx, dt, atom->v, 0.5 * -F_CONV * inv_m * SQR(dt), atom->f ); + rvec_Add( atom->x, dx ); + rvec_Copy( atom->f_old, atom->f ); +} + + +void nhNVT_update_velocity_part1( reax_system *system, real dt ) +{ + int blocks; + + blocks = system->n / DEF_BLOCK_SIZE + + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); + + k_nhNVT_update_velocity_1 <<< blocks, DEF_BLOCK_SIZE >>> + ( system->d_my_atoms, system->reax_param.d_sbp, dt, system->n ); + cudaThreadSynchronize( ); + cudaCheckError( ); +} + + +CUDA_GLOBAL void k_nhNVT_update_velocity_2( reax_atom *my_atoms, rvec * v_const, + single_body_parameters *sbp, real dt, real v_xi, int n ) +{ + reax_atom *atom; + real inv_m; + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= n ) + { + return; + } + + /* velocity verlet, 2nd part */ + atom = &(my_atoms[i]); + inv_m = 1.0 / sbp[atom->type].mass; + /* Compute v(t + dt) */ + rvec_Scale( v_const[i], 1.0 - 0.5 * dt * v_xi, atom->v ); + rvec_ScaledAdd( v_const[i], 0.5 * dt * inv_m * -F_CONV, atom->f_old ); + rvec_ScaledAdd( v_const[i], 0.5 * dt * inv_m * -F_CONV, atom->f ); +} + + +void nhNVT_update_velocity_part2( reax_system *system, storage *workspace, real dt, real v_xi ) +{ + int blocks; + + blocks = system->n / DEF_BLOCK_SIZE + + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); + + k_nhNVT_update_velocity_2 <<< blocks, DEF_BLOCK_SIZE >>> + ( system->d_my_atoms, workspace->v_const, system->reax_param.d_sbp, dt, v_xi, system->n ); cudaThreadSynchronize( ); cudaCheckError( ); } -CUDA_GLOBAL void k_scale_velocities( reax_atom *my_atoms, real lambda, int n ) +CUDA_GLOBAL void k_nhNVT_update_velocity_3( reax_atom *my_atoms, rvec *v_const, + single_body_parameters *sbp, real dt, real v_xi_old, real * my_ekin, int n ) +{ + reax_atom *atom; + real coef_v; + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= n ) + { + return; + } + + atom = &(my_atoms[i]); + coef_v = 1.0 / (1.0 + 0.5 * dt * v_xi_old); + rvec_Scale( atom->v, coef_v, v_const[i] ); + my_ekin[i] = (0.5 * sbp[atom->type].mass * rvec_Dot(atom->v, atom->v)); +} + + +int nhNVT_update_velocity_part3( reax_system *system, storage *workspace, + real dt, real v_xi_old, real * d_my_ekin, real * d_total_my_ekin ) +{ + int blocks, my_ekin; + + blocks = system->n / DEF_BLOCK_SIZE + + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); + + k_nhNVT_update_velocity_3 <<< blocks, DEF_BLOCK_SIZE >>> + ( system->d_my_atoms, workspace->v_const, system->reax_param.d_sbp, dt, v_xi_old, d_my_ekin, system->n ); + cudaThreadSynchronize( ); + cudaCheckError( ); + + Cuda_Reduction_Sum( d_my_ekin, d_total_my_ekin, system->n ); + + copy_host_device( &my_ekin, d_total_my_ekin, sizeof(int), + cudaMemcpyDeviceToHost, "nhNVT_update_velocity_part3::d_total_my_ekin" ); + + return my_ekin; +} + + +CUDA_GLOBAL void k_bNVT_scale_velocities( reax_atom *my_atoms, real lambda, int n ) { reax_atom *atom; int i = blockIdx.x * blockDim.x + threadIdx.x; @@ -107,27 +234,311 @@ void bNVT_scale_velocities( reax_system *system, real lambda ) blocks = system->n / DEF_BLOCK_SIZE + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); - k_scale_velocities <<< blocks, DEF_BLOCK_SIZE >>> - (system->d_my_atoms, lambda, system->n); + + k_bNVT_scale_velocities <<< blocks, DEF_BLOCK_SIZE >>> + ( system->d_my_atoms, lambda, system->n ); cudaThreadSynchronize( ); cudaCheckError( ); } +CUDA_GLOBAL void k_bNVP_scale_velocities( reax_atom *my_atoms, real lambda, + real mu0, real mu1, real mu2, int n ) +{ + reax_atom *atom; + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= n ) + { + return; + } + + /* Scale velocities and positions at t+dt */ + atom = &(my_atoms[i]); + rvec_Scale( atom->v, lambda, atom->v ); +// rvec_Multiply( atom->x, mu, atom->x ); + atom->x[0] = mu0 * atom->x[0]; + atom->x[1] = mu1 * atom->x[1]; + atom->x[2] = mu2 * atom->x[2]; +} + + +void bNVP_scale_velocities( reax_system *system, real lambda, rvec mu ) +{ + int blocks; + + blocks = system->n / DEF_BLOCK_SIZE + + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); + + k_bNVP_scale_velocities <<< blocks, DEF_BLOCK_SIZE >>> + ( system->d_my_atoms, lambda, mu[0], mu[1], mu[2], system->n ); + cudaThreadSynchronize( ); + cudaCheckError( ); +} + + +int Cuda_Velocity_Verlet_NVE( reax_system* system, control_params* control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, mpi_datatypes *mpi_data ) +{ + int steps, renbr, ret; + static int verlet_part1_done = FALSE, estimate_nbrs_done = 0; + real dt; +#if defined(DEBUG) + real t_over_start, t_over_elapsed; +#endif + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step %d\n", system->my_rank, data->step ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + dt = control->dt; + steps = data->step - data->prev_steps; + renbr = steps % control->reneighbor == 0 ? TRUE : FALSE; + ret = SUCCESS; + + Cuda_ReAllocate( system, control, data, workspace, lists, mpi_data ); + + if ( verlet_part1_done == FALSE ) + { + update_velocity_part1( system, dt ); + verlet_part1_done = TRUE; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step%d: verlet1 done\n", system->my_rank, data->step ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + if ( renbr ) + { + Update_Grid( system, control, mpi_data->world ); + } + + Output_Sync_Atoms( system ); + Comm_Atoms( system, control, data, workspace, lists, mpi_data, renbr ); + Sync_Atoms( system ); + + /* synch the Grid to the Device here */ + Sync_Grid( &system->my_grid, &system->d_my_grid ); + + init_blocks( system ); + +#if defined(__CUDA_DEBUG_LOG__) + fprintf( stderr, "p:%d - Matvec BLocks: %d, blocksize: %d \n", + system->my_rank, MATVEC_BLOCKS, MATVEC_BLOCK_SIZE ); +#endif + } + + Cuda_Reset( system, control, data, workspace, lists ); + + if ( renbr ) + { +#if defined(DEBUG) + t_over_start = Get_Time( ); +#endif + + if ( estimate_nbrs_done == 0 ) + { + //TODO: move far_nbrs reallocation checks outside of renbr frequency check + ret = Cuda_Estimate_Neighbors( system, data->step ); + estimate_nbrs_done = 1; + } + + if ( ret == SUCCESS && estimate_nbrs_done == 1 ) + { + Cuda_Generate_Neighbor_Lists( system, data, workspace, lists ); + estimate_nbrs_done = 2; + +#if defined(DEBUG) + t_over_elapsed = Get_Timing_Info( t_over_start ); + fprintf( stderr, "p%d --> Overhead (Step-%d) %f \n", + system->my_rank, data->step, t_over_elapsed ); +#endif + } + } + + if ( ret == SUCCESS ) + { + ret = Cuda_Compute_Forces( system, control, data, workspace, + lists, out_control, mpi_data ); + } + + if ( ret == SUCCESS ) + { + update_velocity_part2( system, dt ); + + verlet_part1_done = FALSE; + estimate_nbrs_done = 0; + } + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step%d: verlet2 done\n", system->my_rank, data->step ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + return ret; +} + + +int Cuda_Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system* system, + control_params* control, simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, mpi_datatypes *mpi_data ) +{ + int itr, steps, renbr, ret; + real *d_my_ekin, *d_total_my_ekin; + static int verlet_part1_done = FALSE, estimate_nbrs_done = 0; + real dt, dt_sqr; + real my_ekin, new_ekin; + real G_xi_new, v_xi_new, v_xi_old; + thermostat *therm; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step%d\n", system->my_rank, data->step ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + dt = control->dt; + dt_sqr = SQR(dt); + therm = &( data->therm ); + steps = data->step - data->prev_steps; + renbr = steps % control->reneighbor == 0 ? TRUE : FALSE; + + Cuda_ReAllocate( system, control, data, workspace, lists, mpi_data ); + + if ( verlet_part1_done == FALSE ) + { + nhNVT_update_velocity_part1( system, dt ); + + /* Compute xi(t + dt) */ + therm->xi += ( therm->v_xi * dt + 0.5 * dt_sqr * therm->G_xi ); + + verlet_part1_done = TRUE; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step%d: verlet1 done\n", system->my_rank, data->step ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + if ( renbr ) + { + Update_Grid( system, control, mpi_data->world ); + } + + Output_Sync_Atoms( system ); + Comm_Atoms( system, control, data, workspace, lists, mpi_data, renbr ); + Sync_Atoms( system ); + + /* synch the Grid to the Device here */ + Sync_Grid( &system->my_grid, &system->d_my_grid ); + + init_blocks( system ); + } + + Cuda_Reset( system, control, data, workspace, lists ); + + if ( renbr ) + { +#if defined(DEBUG) + t_over_start = Get_Time( ); +#endif + + if ( estimate_nbrs_done == 0 ) + { + //TODO: move far_nbrs reallocation checks outside of renbr frequency check + ret = Cuda_Estimate_Neighbors( system, data->step ); + fprintf( stderr, "Cuda_Estimate_Neighbors (%d)\n", ret ); + estimate_nbrs_done = 1; + } + + if ( ret == SUCCESS && estimate_nbrs_done == 1 ) + { + Cuda_Generate_Neighbor_Lists( system, data, workspace, lists ); + estimate_nbrs_done = 2; + +#if defined(DEBUG) + t_over_elapsed = Get_Timing_Info( t_over_start ); + fprintf( stderr, "p%d --> Overhead (Step-%d) %f \n", + system->my_rank, data->step, t_over_elapsed ); +#endif + } + } + + if ( ret == SUCCESS ) + { + ret = Cuda_Compute_Forces( system, control, data, workspace, + lists, out_control, mpi_data ); + fprintf( stderr, "Cuda_Compute_Forces (%d)\n", ret ); + } + + if ( ret == SUCCESS ) + { + /* Compute iteration constants for each atom's velocity */ + nhNVT_update_velocity_part2( system, dev_workspace, dt, therm->v_xi ); + + v_xi_new = therm->v_xi_old + 2.0 * dt * therm->G_xi; + my_ekin = G_xi_new = v_xi_old = 0; + itr = 0; + + cuda_malloc( (void **) &d_my_ekin, sizeof(real) * system->n, FALSE, + "Cuda_Velocity_Verlet_Nose_Hoover_NVT_Klein::d_my_ekin" ); + cuda_malloc( (void **) &d_total_my_ekin, sizeof(real), FALSE, + "Cuda_Velocity_Verlet_Nose_Hoover_NVT_Klein::d_total_my_ekin" ); + + do + { + itr++; + + /* new values become old in this iteration */ + v_xi_old = v_xi_new; + + my_ekin = nhNVT_update_velocity_part3( system, dev_workspace, dt, v_xi_old, + d_my_ekin, d_total_my_ekin ); + + MPI_Allreduce( &my_ekin, &new_ekin, 1, MPI_DOUBLE, MPI_SUM, + mpi_data->comm_mesh3D ); + + G_xi_new = control->Tau_T * ( 2.0 * new_ekin - data->N_f * K_B * control->T ); + v_xi_new = therm->v_xi + 0.5 * dt * ( therm->G_xi + G_xi_new ); + } + while ( FABS(v_xi_new - v_xi_old) > 1e-5 ); + therm->v_xi_old = therm->v_xi; + therm->v_xi = v_xi_new; + therm->G_xi = G_xi_new; + + cuda_free( d_total_my_ekin, + "Cuda_Velocity_Verlet_Nose_Hoover_NVT_Klein::d_total_my_ekin" ); + cuda_free( d_my_ekin, + "Cuda_Velocity_Verlet_Nose_Hoover_NVT_Klein::d_my_ekin" ); + + verlet_part1_done = FALSE; + estimate_nbrs_done = 0; + } + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step%d: verlet2 done\n", system->my_rank, data->step ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + return ret; +} + + +/* uses Berendsen-type coupling for both T and P. + All box dimensions are scaled by the same amount, + there is no change in the angles between axes. */ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control, mpi_datatypes *mpi_data ) { - int i, steps, renbr, ret; + int steps, renbr, ret; static int verlet_part1_done = FALSE, estimate_nbrs_done = 0; - real inv_m, dt, lambda; - rvec dx; - reax_atom *atom; - int *bond_top, *hb_top; - int Htop, num_3body; - int total_hbonds, count, total_bonds; - int bond_cap, cap_3body; + real dt, lambda; +#if defined(DEBUG) real t_over_start, t_over_elapsed; +#endif #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ step%d\n", system->my_rank, data->step ); @@ -144,7 +555,7 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con if ( verlet_part1_done == FALSE ) { /* velocity verlet, 1st part */ - bNVT_update_velocity_part1( system, dt ); + update_velocity_part1( system, dt ); verlet_part1_done = TRUE; #if defined(DEBUG_FOCUS) @@ -161,7 +572,7 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con Comm_Atoms( system, control, data, workspace, lists, mpi_data, renbr ); Sync_Atoms( system ); - /* synch the Grid to the Device here */ + /* sync grid to device */ Sync_Grid( &system->my_grid, &system->d_my_grid ); init_blocks( system ); @@ -209,7 +620,7 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con if ( ret == SUCCESS ) { /* velocity verlet, 2nd part */ - bNVT_update_velocity_part2( system, dt ); + update_velocity_part2( system, dt ); #if defined(DEBUG_FOCUS) fprintf(stderr, "p%d @ step%d: verlet2 done\n", system->my_rank, data->step); @@ -247,3 +658,109 @@ int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* con return ret; } + + +/* uses Berendsen-type coupling for both T and P. + * All box dimensions are scaled by the same amount, + * there is no change in the angles between axes. */ +int Cuda_Velocity_Verlet_Berendsen_NPT( reax_system* system, control_params* control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, mpi_datatypes *mpi_data ) +{ + int steps, renbr, ret; + static int verlet_part1_done = FALSE, estimate_nbrs_done = 0; + real dt; +#if defined(DEBUG) + real t_over_start, t_over_elapsed; +#endif + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step %d\n", system->my_rank, data->step ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + dt = control->dt; + steps = data->step - data->prev_steps; + renbr = steps % control->reneighbor == 0 ? TRUE : FALSE; + ret = SUCCESS; + + Cuda_ReAllocate( system, control, data, workspace, lists, mpi_data ); + + if ( verlet_part1_done == FALSE ) + { + update_velocity_part1( system, dt ); + verlet_part1_done = TRUE; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step%d: verlet1 done\n", system->my_rank, data->step ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + if ( renbr ) + { + Update_Grid( system, control, mpi_data->world ); + } + + Output_Sync_Atoms( system ); + Comm_Atoms( system, control, data, workspace, lists, mpi_data, renbr ); + Sync_Atoms( system ); + + /* sync grid to device */ + Sync_Grid( &system->my_grid, &system->d_my_grid ); + + init_blocks( system ); + } + + Cuda_Reset( system, control, data, workspace, lists ); + + if ( renbr ) + { +#if defined(DEBUG) + t_over_start = Get_Time( ); +#endif + + if ( estimate_nbrs_done == 0 ) + { + //TODO: move far_nbrs reallocation checks outside of renbr frequency check + ret = Cuda_Estimate_Neighbors( system, data->step ); + estimate_nbrs_done = 1; + } + + if ( ret == SUCCESS && estimate_nbrs_done == 1 ) + { + Cuda_Generate_Neighbor_Lists( system, data, workspace, lists ); + estimate_nbrs_done = 2; + +#if defined(DEBUG) + t_over_elapsed = Get_Timing_Info( t_over_start ); + fprintf( stderr, "p%d --> Overhead (Step-%d) %f \n", + system->my_rank, data->step, t_over_elapsed ); +#endif + } + } + + if ( ret == SUCCESS ) + { + ret = Cuda_Compute_Forces( system, control, data, workspace, + lists, out_control, mpi_data ); + } + + if ( ret == SUCCESS ) + { + update_velocity_part2( system, dt ); + + verlet_part1_done = FALSE; + estimate_nbrs_done = 0; + + Cuda_Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D ); + Cuda_Compute_Pressure( system, control, data, mpi_data ); + Cuda_Scale_Box( system, control, data, mpi_data ); + } + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step%d: verlet2 done\n", system->my_rank, data->step ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif + + return ret; +} diff --git a/PG-PuReMD/src/cuda/cuda_integrate.h b/PG-PuReMD/src/cuda/cuda_integrate.h index 2797b3e33397685fc1cfdc05dedead43fb98fe74..9d893419abd1c0c4397641cbad9fd0f42f888696 100644 --- a/PG-PuReMD/src/cuda/cuda_integrate.h +++ b/PG-PuReMD/src/cuda/cuda_integrate.h @@ -29,16 +29,24 @@ extern "C" { #endif -void bNVT_update_velocity_part1( reax_system *, real ); +void bNVP_scale_velocities( reax_system *, real, rvec ); -void bNVT_update_velocity_part2( reax_system *, real ); +int Cuda_Velocity_Verlet_NVE( reax_system*, control_params*, + simulation_data*, storage*, reax_list**, output_controls*, + mpi_datatypes* ); -void bNVT_scale_velocities( reax_system *, real ); +int Cuda_Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system*, control_params*, + simulation_data*, storage*, reax_list**, output_controls*, + mpi_datatypes* ); int Cuda_Velocity_Verlet_Berendsen_NVT( reax_system*, control_params*, simulation_data*, storage*, reax_list**, output_controls*, mpi_datatypes* ); +int Cuda_Velocity_Verlet_Berendsen_NPT( reax_system*, control_params*, + simulation_data*, storage*, reax_list**, output_controls*, + mpi_datatypes* ); + #ifdef __cplusplus } #endif diff --git a/PG-PuReMD/src/cuda/cuda_lin_alg.cu b/PG-PuReMD/src/cuda/cuda_lin_alg.cu index dc7a2fc344e8c82d2758f89052b4cad34fb578ba..a0db127b6abefd39019acb874147ecb022a85705 100644 --- a/PG-PuReMD/src/cuda/cuda_lin_alg.cu +++ b/PG-PuReMD/src/cuda/cuda_lin_alg.cu @@ -766,7 +766,7 @@ int Cuda_dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, } #endif - for ( i = 1; i < 300; ++i ) + for ( i = 1; i < 1000; ++i ) { //MVAPICH2 //#ifdef __CUDA_DEBUG__ @@ -948,7 +948,7 @@ int Cuda_dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, Cuda_RvecCopy_To( dev_workspace->x, dev_workspace->s, 0, system->n ); } - if ( i >= 300 ) + if ( i >= 1000 ) { fprintf( stderr, "[WARNING] p%d: dual CG convergence failed! (%d steps)\n", system->my_rank, i ); diff --git a/PG-PuReMD/src/cuda/cuda_neighbors.cu b/PG-PuReMD/src/cuda/cuda_neighbors.cu index b1f2b85d7530efb12647de27e0fa3d89ca3086b9..f9c127d6e919c8a9177b858b1c49f84866aa145b 100644 --- a/PG-PuReMD/src/cuda/cuda_neighbors.cu +++ b/PG-PuReMD/src/cuda/cuda_neighbors.cu @@ -68,8 +68,7 @@ CUDA_GLOBAL void k_generate_neighbor_lists( reax_atom *my_atoms, atom1 = &(my_atoms[l]); num_far = Dev_Start_Index( l, &far_nbrs ); - //get the coordinates of the atom and - //compute the grid cell + /* get the coordinates of the atom and compute the grid cell */ if ( l < n ) { for ( i = 0; i < 3; i++ ) @@ -115,7 +114,7 @@ CUDA_GLOBAL void k_generate_neighbor_lists( reax_atom *my_atoms, /* if neighboring grid cell is further in the "positive" direction AND within cutoff */ if ( g.str[index_grid_3d(i, j, k, &g)] <= g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)] && - Dev_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs (i, j, k, itr, &g)], atom1->x) <= cutoff ) + Dev_DistSqr_to_Special_Point(g.nbrs_cp[index_grid_nbrs(i, j, k, itr, &g)], atom1->x) <= cutoff ) { /* pick up another atom from the neighbor cell */ for ( m = g.str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], &g)]; diff --git a/PG-PuReMD/src/cuda/cuda_system_props.cu b/PG-PuReMD/src/cuda/cuda_system_props.cu index 54957d00fc8d555a26ac699511ed003865482a19..a7ece36c47cce9b0e07178d8b92fe68844472fc8 100644 --- a/PG-PuReMD/src/cuda/cuda_system_props.cu +++ b/PG-PuReMD/src/cuda/cuda_system_props.cu @@ -1,11 +1,12 @@ #include "cuda_system_props.h" +#include "cuda_copy.h" #include "cuda_utils.h" #include "cuda_reduction.h" -#include "cuda_copy.h" #include "cuda_shuffle.h" +#include "../tool_box.h" #include "../vector.h" @@ -622,18 +623,18 @@ extern "C" void dev_compute_total_mass( reax_system *system, real *local_val ) real *block_mass = (real *) scratch; cuda_memset( block_mass, 0, sizeof(real) * (1 + BLOCKS_POW_2), "total_mass:tmp" ); - k_compute_total_mass <<<BLOCKS, BLOCK_SIZE, sizeof(real) * BLOCK_SIZE >>> - (system->reax_param.d_sbp, system->d_my_atoms, block_mass, system->n); + k_compute_total_mass <<< BLOCKS, BLOCK_SIZE, sizeof(real) * BLOCK_SIZE >>> + ( system->reax_param.d_sbp, system->d_my_atoms, block_mass, system->n ); cudaThreadSynchronize( ); cudaCheckError( ); - k_reduction <<<1, BLOCKS_POW_2, sizeof(real) * BLOCKS_POW_2 >>> - (block_mass, block_mass + BLOCKS_POW_2, BLOCKS_POW_2); + k_reduction <<< 1, BLOCKS_POW_2, sizeof(real) * BLOCKS_POW_2 >>> + ( block_mass, block_mass + BLOCKS_POW_2, BLOCKS_POW_2 ); cudaThreadSynchronize( ); cudaCheckError( ); - copy_host_device (local_val, block_mass + BLOCKS_POW_2, sizeof(real), - cudaMemcpyDeviceToHost, "total_mass:tmp"); + copy_host_device( local_val, block_mass + BLOCKS_POW_2, sizeof(real), + cudaMemcpyDeviceToHost, "total_mass:tmp" ); } @@ -655,9 +656,9 @@ CUDA_GLOBAL void k_compute_kinetic_energy( single_body_parameters *sbp, reax_ato __syncthreads( ); - for(int z = 16; z >=1; z/=2) + for(int z = 16; z >= 1; z /= 2) { - sdata += shfl( sdata, z); + sdata += shfl( sdata, z ); } if (threadIdx.x % 32 == 0) @@ -715,27 +716,28 @@ CUDA_GLOBAL void k_compute_kinetic_energy( single_body_parameters *sbp, reax_ato #endif } + extern "C" void dev_compute_kinetic_energy( reax_system *system, simulation_data *data, real *local_val ) { real *block_energy = (real *) scratch; cuda_memset( block_energy, 0, sizeof(real) * (BLOCKS_POW_2 + 1), "kinetic_energy:tmp" ); - k_compute_kinetic_energy <<<BLOCKS, BLOCK_SIZE, sizeof(real) * BLOCK_SIZE >>> - (system->reax_param.d_sbp, system->d_my_atoms, block_energy, system->n); + k_compute_kinetic_energy <<< BLOCKS, BLOCK_SIZE, sizeof(real) * BLOCK_SIZE >>> + ( system->reax_param.d_sbp, system->d_my_atoms, block_energy, system->n ); cudaThreadSynchronize( ); cudaCheckError( ); - k_reduction <<<1, BLOCKS_POW_2, sizeof(real) * BLOCKS_POW_2 >>> - (block_energy, block_energy + BLOCKS_POW_2, BLOCKS_POW_2); + k_reduction <<< 1, BLOCKS_POW_2, sizeof(real) * BLOCKS_POW_2 >>> + ( block_energy, block_energy + BLOCKS_POW_2, BLOCKS_POW_2 ); cudaThreadSynchronize( ); cudaCheckError( ); + //copy_host_device( local_val, &((simulation_data *)data->d_simulation_data)->my_en.e_kin, copy_host_device( local_val, block_energy + BLOCKS_POW_2, - //copy_host_device (local_val, &((simulation_data *)data->d_simulation_data)->my_en.e_kin, sizeof(real), cudaMemcpyDeviceToHost, "kinetic_energy:tmp" ); - //copy_device (block_energy + BLOCKS_POW_2, &((simulation_data *)data->d_simulation_data)->my_en.e_kin, - // sizeof (real), "kinetic_energy"); + //copy_device( block_energy + BLOCKS_POW_2, &((simulation_data *)data->d_simulation_data)->my_en.e_kin, + // sizeof(real), "kinetic_energy" ); } @@ -884,20 +886,16 @@ extern "C" void dev_sync_simulation_data( simulation_data *data ) void Cuda_Compute_Kinetic_Energy( reax_system* system, simulation_data* data, MPI_Comm comm ) { - int i; - rvec p; - real m; - data->my_en.e_kin = 0.0; dev_compute_kinetic_energy( system, data, &data->my_en.e_kin ); - MPI_Allreduce( &data->my_en.e_kin, &data->sys_en.e_kin, + MPI_Allreduce( &data->my_en.e_kin, &data->sys_en.e_kin, 1, MPI_DOUBLE, MPI_SUM, comm ); - data->therm.T = (2. * data->sys_en.e_kin) / (data->N_f * K_B); + data->therm.T = (2.0 * data->sys_en.e_kin) / (data->N_f * K_B); - // avoid T being an absolute zero, might cause F.P.E! + /* avoid T being an absolute zero, might cause F.P.E! */ if ( FABS(data->therm.T) < ALMOST_ZERO ) { data->therm.T = ALMOST_ZERO; @@ -908,10 +906,9 @@ void Cuda_Compute_Kinetic_Energy( reax_system* system, simulation_data* data, void Cuda_Compute_Total_Mass( reax_system *system, simulation_data *data, MPI_Comm comm ) { - int i; real tmp; - //compute local total mass of the system + /* compute local total mass of the system */ dev_compute_total_mass( system, &tmp ); MPI_Allreduce( &tmp, &data->M, 1, MPI_DOUBLE, MPI_SUM, comm ); @@ -924,10 +921,10 @@ void Cuda_Compute_Center_of_Mass( reax_system *system, simulation_data *data, mpi_datatypes *mpi_data, MPI_Comm comm ) { int i; - real m, det; //xx, xy, xz, yy, yz, zz; + real det; //xx, xy, xz, yy, yz, zz; real tmp_mat[6], tot_mat[6]; rvec my_xcm, my_vcm, my_amcm, my_avcm; - rvec tvec, diff; + rvec tvec; rtensor mat, inv; rvec_MakeZero( my_xcm ); // position of CoM @@ -1024,3 +1021,116 @@ void Cuda_Compute_Center_of_Mass( reax_system *system, simulation_data *data, } +CUDA_GLOBAL void k_compute_pressure( reax_atom *my_atoms, simulation_box *big_box, + rvec *int_press, int n ) +{ + reax_atom *p_atom; + rvec tx; + int i; + + i = blockIdx.x * blockDim.x + threadIdx.x; + + if ( i >= n ) + { + return; + } + + p_atom = &( my_atoms[i] ); + rvec_MakeZero( int_press[i] ); + + /* transform x into unit box coordinates, store in tx */ + Transform_to_UnitBox( p_atom->x, big_box, 1, tx ); + + /* this atom's contribution to internal pressure */ + rvec_Multiply( int_press[i], p_atom->f, tx ); +} + + +/* IMPORTANT: This function assumes that current kinetic energy + * the system is already computed + * + * IMPORTANT: In Klein's paper, it is stated that a dU/dV term needs + * to be added when there are long-range interactions or long-range + * corrections to short-range interactions present. + * We may want to add that for more accuracy. + */ +void Cuda_Compute_Pressure( reax_system* system, control_params *control, + simulation_data* data, mpi_datatypes *mpi_data ) +{ + int blocks, block_size, blocks_n, blocks_pow_2_n; + rvec *rvec_spad; + rvec int_press; + simulation_box *big_box; + + rvec_spad = (rvec *) scratch; + big_box = &(system->big_box); + + /* 0: both int and ext, 1: ext only, 2: int only */ + if ( control->press_mode == 0 || control->press_mode == 2 ) + { + blocks = system->n / DEF_BLOCK_SIZE + + ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); + + compute_blocks( &blocks_n, &block_size, system->n ); + compute_nearest_pow_2( blocks_n, &blocks_pow_2_n ); + + k_compute_pressure <<< blocks, DEF_BLOCK_SIZE >>> + ( system->d_my_atoms, system->d_big_box, rvec_spad, + system->n ); + + k_reduction_rvec <<< blocks_n, block_size, sizeof(rvec) * block_size >>> + ( rvec_spad, rvec_spad + system->n, system->n ); + cudaThreadSynchronize( ); + cudaCheckError( ); + + k_reduction_rvec <<< 1, blocks_pow_2_n, sizeof(rvec) * blocks_pow_2_n >>> + ( rvec_spad + system->n, rvec_spad + system->n + blocks_n, blocks_n ); + cudaThreadSynchronize (); + cudaCheckError( ); + + copy_host_device( &int_press, rvec_spad + system->n + blocks_n, sizeof(rvec), + cudaMemcpyDeviceToHost, "Cuda_Compute_Pressure::d_int_press" ); + } + +#if defined(DEBUG) + fprintf( stderr, "p%d:p_int(%10.5f %10.5f %10.5f)p_ext(%10.5f %10.5f %10.5f)\n", + system->my_rank, int_press[0], int_press[1], int_press[2], + data->my_ext_press[0], data->my_ext_press[1], data->my_ext_press[2] ); +#endif + + /* sum up internal and external pressure */ + MPI_Allreduce( int_press, data->int_press, + 3, MPI_DOUBLE, MPI_SUM, mpi_data->comm_mesh3D ); + MPI_Allreduce( data->my_ext_press, data->ext_press, + 3, MPI_DOUBLE, MPI_SUM, mpi_data->comm_mesh3D ); + +#if defined(DEBUG) + fprintf( stderr, "p%d: %10.5f %10.5f %10.5f\n", + system->my_rank, + data->int_press[0], data->int_press[1], data->int_press[2] ); + fprintf( stderr, "p%d: %10.5f %10.5f %10.5f\n", + system->my_rank, + data->ext_press[0], data->ext_press[1], data->ext_press[2] ); +#endif + + /* kinetic contribution */ + data->kin_press = 2.0 * (E_CONV * data->sys_en.e_kin) + / (3.0 * big_box->V * P_CONV); + + /* Calculate total pressure in each direction */ + data->tot_press[0] = data->kin_press - + (( data->int_press[0] + data->ext_press[0] ) / + ( big_box->box_norms[1] * big_box->box_norms[2] * P_CONV )); + + data->tot_press[1] = data->kin_press - + (( data->int_press[1] + data->ext_press[1] ) / + ( big_box->box_norms[0] * big_box->box_norms[2] * P_CONV )); + + data->tot_press[2] = data->kin_press - + (( data->int_press[2] + data->ext_press[2] ) / + ( big_box->box_norms[0] * big_box->box_norms[1] * P_CONV )); + + /* Average pressure for the whole box */ + data->iso_bar.P = + ( data->tot_press[0] + data->tot_press[1] + data->tot_press[2] ) / 3.0; +} diff --git a/PG-PuReMD/src/cuda/cuda_system_props.h b/PG-PuReMD/src/cuda/cuda_system_props.h index 66f620b301deaf3d5bf0d7acac48e28fa45f6901..ddf8ed4d5c5b99e5cddda960c5b6274cd5622386 100644 --- a/PG-PuReMD/src/cuda/cuda_system_props.h +++ b/PG-PuReMD/src/cuda/cuda_system_props.h @@ -28,6 +28,9 @@ void Cuda_Compute_Kinetic_Energy( reax_system*, simulation_data*, MPI_Comm ); void Cuda_Compute_Center_of_Mass( reax_system*, simulation_data*, mpi_datatypes*, MPI_Comm ); +void Cuda_Compute_Pressure( reax_system *, control_params *, + simulation_data *, mpi_datatypes * ); + #ifdef __cplusplus } #endif diff --git a/PG-PuReMD/src/grid.c b/PG-PuReMD/src/grid.c index d893f6c60e166f8e11092e203817a3f610439c64..7f3ef23144dc9b7dd64929b1270dbd348bef79a5 100644 --- a/PG-PuReMD/src/grid.c +++ b/PG-PuReMD/src/grid.c @@ -215,7 +215,7 @@ void Find_Neighbor_GridCells( grid *g, control_params *control ) //gc->nbrs[top] = &(g->cells[cj[0]][cj[1]][cj[2]]); //gc->nbrs[top] = &(g->cells[ index_grid_3d(cj[0],cj[1],cj[2],g) ]); ivec_Copy( g->nbrs_x[index_grid_nbrs(ci[0], ci[1], ci[2], top, g)], cj ); - //fprintf (stderr, " index: %d - %d \n", index_grid_nbrs (ci[0], ci[1], ci[2], top, g), g->total * g->max_nbrs); + //fprintf( stderr, " index: %d - %d \n", index_grid_nbrs (ci[0], ci[1], ci[2], top, g), g->total * g->max_nbrs ); Find_Closest_Point( /*ext_box,*/ g, ci, cj, g->nbrs_cp[index_grid_nbrs (ci[0], ci[1], ci[2], top, g)] ); //fprintf( stderr, "cp: %f %f %f\n", // gc->nbrs_cp[top][0], gc->nbrs_cp[top][1], @@ -787,13 +787,14 @@ void Bin_Boundary_Atoms( reax_system *system ) Get_Boundary_GCell( g, base, atoms[start].x, &gc, &cur_min, &cur_max, gcell_cood ); g->str[index_grid_3d( gcell_cood[0], gcell_cood[1], gcell_cood[2], g )] = start; gc->top = 1; + /* error check */ if ( is_Within_GCell( atoms[start].x, ext_box->min, ext_box->max ) == FALSE ) { fprintf( stderr, "p%d: (start):ghost atom%d [%f %f %f] is out of my (grid cell) box!\n", system->my_rank, start, atoms[start].x[0], atoms[start].x[1], atoms[start].x[2] ); -// MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT ); + MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT ); } for ( i = start + 1; i < end; i++ ) @@ -804,7 +805,7 @@ void Bin_Boundary_Atoms( reax_system *system ) fprintf( stderr, "p%d: (middle) ghost atom%d [%f %f %f] is out of my (grid cell) box!\n", system->my_rank, i, atoms[i].x[0], atoms[i].x[1], atoms[i].x[2] ); -// MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT ); + MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT ); } if ( is_Within_GCell( atoms[i].x, cur_min, cur_max ) == TRUE ) @@ -815,6 +816,7 @@ void Bin_Boundary_Atoms( reax_system *system ) { g->end[index_grid_3d( gcell_cood[0], gcell_cood[1], gcell_cood[2], g )] = i; Get_Boundary_GCell( g, base, atoms[i].x, &gc, &cur_min, &cur_max, gcell_cood ); + /* sanity check! */ if ( gc->top != 0 ) { @@ -824,8 +826,9 @@ void Bin_Boundary_Atoms( reax_system *system ) atoms[i].x[0], atoms[i].x[1], atoms[i].x[2], gc->min[0], gc->min[1], gc->min[2], gc->max[0], gc->max[1], gc->max[2] ); -// MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT ); + MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT ); } + g->str[index_grid_3d( gcell_cood[0], gcell_cood[1], gcell_cood[2], g )] = i; gc->top = 1; } diff --git a/PG-PuReMD/src/integrate.c b/PG-PuReMD/src/integrate.c index b02008972bf42c235df90fd16ba5b8f84e6097ba..d3e9239f38e5b826b871ade2a6ade78f6b180c66 100644 --- a/PG-PuReMD/src/integrate.c +++ b/PG-PuReMD/src/integrate.c @@ -64,8 +64,10 @@ int Velocity_Verlet_NVE( reax_system* system, control_params* control, { atom = &(system->my_atoms[i]); inv_m = 1.0 / system->reax_param.sbp[atom->type].mass; + /* Compute x(t + dt) */ rvec_ScaledSum( dx, dt, atom->v, 0.5 * dt_sqr * -F_CONV * inv_m, atom->f ); rvec_Add( system->my_atoms[i].x, dx ); + /* Compute v(t + dt/2) */ rvec_ScaledAdd( atom->v, 0.5 * dt * -F_CONV * inv_m, atom->f ); } verlet_part1_done = TRUE; @@ -232,7 +234,7 @@ int Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* control, { int i, steps, renbr, ret; static int verlet_part1_done = FALSE; - real inv_m, dt, lambda; + real inv_m, dt, dt_sqr, lambda; rvec dx; reax_atom *atom; @@ -243,6 +245,7 @@ int Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* control, dt = control->dt; steps = data->step - data->prev_steps; renbr = steps % control->reneighbor == 0 ? TRUE : FALSE; + dt_sqr = SQR(dt); ReAllocate( system, control, data, workspace, lists, mpi_data ); @@ -253,10 +256,8 @@ int Velocity_Verlet_Berendsen_NVT( reax_system* system, control_params* control, { atom = &(system->my_atoms[i]); inv_m = 1.0 / system->reax_param.sbp[atom->type].mass; - /* Compute x(t + dt) */ - rvec_ScaledSum( dx, dt, atom->v, 0.5 * -F_CONV * inv_m * SQR(dt), atom->f ); + rvec_ScaledSum( dx, dt, atom->v, 0.5 * -F_CONV * inv_m * dt_sqr, atom->f ); rvec_Add( atom->x, dx ); - /* Compute v(t + dt/2) */ rvec_ScaledAdd( atom->v, 0.5 * -F_CONV * inv_m * dt, atom->f ); } diff --git a/PG-PuReMD/src/neighbors.c b/PG-PuReMD/src/neighbors.c index e938329a6728f57d178d530ee8bb996447fa8af7..732a6e7da4bdbcb422308246db9916d66870385c 100644 --- a/PG-PuReMD/src/neighbors.c +++ b/PG-PuReMD/src/neighbors.c @@ -112,22 +112,23 @@ void Generate_Neighbor_Lists( reax_system *system, simulation_data *data, cutoff = SQR(g->cutoff[index_grid_3d(i, j, k, g)]); /* pick up an atom from the current cell */ - for (l = g->str[index_grid_3d(i, j, k, g)]; l < g->end[index_grid_3d(i, j, k, g)]; ++l ) + for ( l = g->str[index_grid_3d(i, j, k, g)]; l < g->end[index_grid_3d(i, j, k, g)]; ++l ) { atom1 = &(system->my_atoms[l]); Set_Start_Index( l, num_far, far_nbrs ); itr = 0; + /* search through neighbor grid cell candidates of current cell */ while ( (g->nbrs_x[index_grid_nbrs(i, j, k, itr, g)][0]) >= 0 ) { - ivec_Copy (nbrs_x, g->nbrs_x[index_grid_nbrs(i, j, k, itr, g)]); - gcj = &( g->cells [ index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], g) ] ); + ivec_Copy( nbrs_x, g->nbrs_x[index_grid_nbrs(i, j, k, itr, g)] ); + gcj = &( g->cells[ index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], g) ] ); if ( g->str[index_grid_3d(i, j, k, g)] <= g->str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], g)] && (DistSqr_to_Special_Point(g->nbrs_cp[index_grid_nbrs(i, j, k, itr, g)], atom1->x) <= cutoff) ) { - /* pick up another atom from the neighbor cell */ + /* pick up another atom from the neighbor grid cell */ for ( m = g->str[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], g)]; m < g->end[index_grid_3d(nbrs_x[0], nbrs_x[1], nbrs_x[2], g)]; ++m ) { @@ -217,10 +218,7 @@ int Estimate_NumNeighbors( reax_system *system, reax_list **lists ) { for ( k = 0; k < g->ncells[2]; k++ ) { - //SUDHIR - //gci = &(g->cells[i][j][k]); gci = &(g->cells[ index_grid_3d(i, j, k, g) ]); - //cutoff = SQR(gci->cutoff); cutoff = SQR(g->cutoff[ index_grid_3d(i, j, k, g) ]); //fprintf( stderr, "gridcell %d %d %d\n", i, j, k ); @@ -279,15 +277,12 @@ int Estimate_NumNeighbors( reax_system *system, reax_list **lists ) } } -#if defined(DEBUG) - fprintf (stderr, "Total number of host neighbors: %d \n", num_far); -#endif - #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: estimate nbrs done - num_far=%d\n", system->my_rank, num_far ); MPI_Barrier( MPI_COMM_WORLD ); #endif + return MAX( num_far * SAFE_ZONE, MIN_CAP * MIN_NBRS ); } diff --git a/PG-PuReMD/src/system_props.c b/PG-PuReMD/src/system_props.c index ea4465c58f54635963b0da5c383833fc2aa094f7..c983afc1cad8fbc324fb7977a921c96dbfbe23f2 100644 --- a/PG-PuReMD/src/system_props.c +++ b/PG-PuReMD/src/system_props.c @@ -40,7 +40,8 @@ void Temperature_Control( control_params *control, simulation_data *data ) { real tmp; - if ( control->T_mode == 1 ) // step-wise temperature control + /* step-wise temperature control */ + if ( control->T_mode == 1 ) { if ((data->step - data->prev_steps) % ((int)(control->T_freq / control->dt)) == 0) { @@ -54,7 +55,8 @@ void Temperature_Control( control_params *control, simulation_data *data ) } } } - else if ( control->T_mode == 2 ) // constant slope control + /* constant slope control */ + else if ( control->T_mode == 2 ) { tmp = control->T_rate * control->dt / control->T_freq; @@ -105,7 +107,6 @@ void Compute_System_Energy( reax_system *system, simulation_data *data, my_en[13] = data->my_en.e_kin; #ifdef HAVE_CUDA - //Cuda Wrapper here dev_sync_simulation_data( data ); #endif @@ -166,7 +167,7 @@ void Compute_System_Energy( reax_system *system, simulation_data *data, void Compute_Total_Mass( reax_system *system, simulation_data *data, MPI_Comm comm ) { - int i; + int i; real tmp; tmp = 0; @@ -268,8 +269,13 @@ void Compute_Center_of_Mass( reax_system *system, simulation_data *data, inv[2][2] = mat[0][0] * mat[1][1] - mat[1][0] * mat[0][1]; if ( det > ALMOST_ZERO ) + { rtensor_Scale( inv, 1. / det, inv ); - else rtensor_MakeZero( inv ); + } + else + { + rtensor_MakeZero( inv ); + } /* Compute the angular velocity about the centre of mass */ rtensor_MatVec( data->avcm, inv, data->amcm ); @@ -345,16 +351,18 @@ void Compute_Pressure( reax_system* system, control_params *control, } } - /* sum up internal and external pressure */ #if defined(DEBUG) fprintf(stderr, "p%d:p_int(%10.5f %10.5f %10.5f)p_ext(%10.5f %10.5f %10.5f)\n", system->my_rank, int_press[0], int_press[1], int_press[2], data->my_ext_press[0], data->my_ext_press[1], data->my_ext_press[2] ); #endif + + /* sum up internal and external pressure */ MPI_Allreduce( int_press, data->int_press, - 3, MPI_DOUBLE, MPI_SUM, mpi_data->comm_mesh3D ); + 3, MPI_DOUBLE, MPI_SUM, mpi_data->comm_mesh3D ); MPI_Allreduce( data->my_ext_press, data->ext_press, - 3, MPI_DOUBLE, MPI_SUM, mpi_data->comm_mesh3D ); + 3, MPI_DOUBLE, MPI_SUM, mpi_data->comm_mesh3D ); + #if defined(DEBUG) fprintf( stderr, "p%d: %10.5f %10.5f %10.5f\n", system->my_rank, @@ -365,20 +373,21 @@ void Compute_Pressure( reax_system* system, control_params *control, #endif /* kinetic contribution */ - data->kin_press = 2.*(E_CONV * data->sys_en.e_kin) / (3.*big_box->V * P_CONV); + data->kin_press = 2. * (E_CONV * data->sys_en.e_kin) + / (3. * big_box->V * P_CONV); /* Calculate total pressure in each direction */ data->tot_press[0] = data->kin_press - - (( data->int_press[0] + data->ext_press[0] ) / - ( big_box->box_norms[1] * big_box->box_norms[2] * P_CONV )); + (( data->int_press[0] + data->ext_press[0] ) / + ( big_box->box_norms[1] * big_box->box_norms[2] * P_CONV )); data->tot_press[1] = data->kin_press - - (( data->int_press[1] + data->ext_press[1] ) / - ( big_box->box_norms[0] * big_box->box_norms[2] * P_CONV )); + (( data->int_press[1] + data->ext_press[1] ) / + ( big_box->box_norms[0] * big_box->box_norms[2] * P_CONV )); data->tot_press[2] = data->kin_press - - (( data->int_press[2] + data->ext_press[2] ) / - ( big_box->box_norms[0] * big_box->box_norms[1] * P_CONV )); + (( data->int_press[2] + data->ext_press[2] ) / + ( big_box->box_norms[0] * big_box->box_norms[1] * P_CONV )); /* Average pressure for the whole box */ data->iso_bar.P = diff --git a/PG-PuReMD/src/tool_box.c b/PG-PuReMD/src/tool_box.c index 2c2c0a5040f01547fb7e9bddf91ab4c90615eb94..a6f570c8c2b3fdb2a7d1afd789490f8a1f5fcd57 100644 --- a/PG-PuReMD/src/tool_box.c +++ b/PG-PuReMD/src/tool_box.c @@ -78,57 +78,6 @@ void SumScanB( int n, int me, int wsize, int root, MPI_Comm comm, int *nbuf ) } -/************** taken from box.c **************/ -void Transform( rvec x1, simulation_box *box, char flag, rvec x2 ) -{ - int i, j; - real tmp; - -#if defined(DEBUG) - fprintf( stderr, ">x1: (%lf, %lf, %lf)\n",x1[0],x1[1],x1[2] ); -#endif - - if ( flag > 0 ) - { - for (i = 0; i < 3; i++) - { - tmp = 0.0; - for (j = 0; j < 3; j++) - { - tmp += box->trans[i][j] * x1[j]; - } - x2[i] = tmp; - } - } - else - { - for (i = 0; i < 3; i++) - { - tmp = 0.0; - for (j = 0; j < 3; j++) - { - tmp += box->trans_inv[i][j] * x1[j]; - } - x2[i] = tmp; - } - } - -#if defined(DEBUG) - fprintf( stderr, ">x2: (%lf, %lf, %lf)\n", x2[0], x2[1], x2[2] ); -#endif -} - - -void Transform_to_UnitBox( rvec x1, simulation_box *box, char flag, rvec x2 ) -{ - Transform( x1, box, flag, x2 ); - - x2[0] /= box->box_norms[0]; - x2[1] /= box->box_norms[1]; - x2[2] /= box->box_norms[2]; -} - - /* determine whether point p is inside the box */ void Fit_to_Periodic_Box( simulation_box *box, rvec *p ) { diff --git a/PG-PuReMD/src/tool_box.h b/PG-PuReMD/src/tool_box.h index a1f55910ff34fe08aceb5a84aa7e270a855d9291..717a8a80e490171e8b6aa4249dd432d5ad7e3da7 100644 --- a/PG-PuReMD/src/tool_box.h +++ b/PG-PuReMD/src/tool_box.h @@ -34,9 +34,6 @@ int SumScan( int, int, int, MPI_Comm ); void SumScanB( int, int, int, int, MPI_Comm, int* ); -/* from box.h */ -void Transform_to_UnitBox( rvec, simulation_box*, char, rvec ); - void Fit_to_Periodic_Box( simulation_box*, rvec* ); /* from geo_tools.h */ @@ -223,6 +220,51 @@ static inline real DistSqr_to_Special_Point( rvec cp, rvec x ) return d_sqr; } + + +/************** taken from box.c **************/ +CUDA_HOST_DEVICE static inline void Transform( rvec x1, + simulation_box *box, char flag, rvec x2 ) +{ + int i, j; + real tmp; + + if ( flag > 0 ) + { + for ( i = 0; i < 3; i++ ) + { + tmp = 0.0; + for ( j = 0; j < 3; j++ ) + { + tmp += box->trans[i][j] * x1[j]; + } + x2[i] = tmp; + } + } + else + { + for ( i = 0; i < 3; i++ ) + { + tmp = 0.0; + for ( j = 0; j < 3; j++ ) + { + tmp += box->trans_inv[i][j] * x1[j]; + } + x2[i] = tmp; + } + } +} + + +CUDA_HOST_DEVICE static inline void Transform_to_UnitBox( rvec x1, + simulation_box *box, char flag, rvec x2 ) +{ + Transform( x1, box, flag, x2 ); + + x2[0] /= box->box_norms[0]; + x2[1] /= box->box_norms[1]; + x2[2] /= box->box_norms[2]; +} #endif