diff --git a/PuReMD/src/allocate.c b/PuReMD/src/allocate.c index ca67bea5e5fca41c44b2248600a5ba454b4b89a9..74a9e161ece3c041527e83f01212b0d4921cb2f3 100644 --- a/PuReMD/src/allocate.c +++ b/PuReMD/src/allocate.c @@ -693,7 +693,8 @@ void Deallocate_Grid( grid *g ) buffers are void*, type cast to the correct pointer type to access the allocated buffers */ int Allocate_MPI_Buffers( mpi_datatypes *mpi_data, int est_recv, - neighbor_proc *my_nbrs, char *msg ) + neighbor_proc *my_nbrs, + char *msg ) { int i; mpi_out_data *mpi_buf; @@ -718,6 +719,19 @@ int Allocate_MPI_Buffers( mpi_datatypes *mpi_data, int est_recv, scalloc( my_nbrs[i].est_send, sizeof(boundary_atom), "mpibuf:out_atoms", comm ); } +#if defined(NEUTRAL_TERRITORY) + /* Neutral Territory out buffers */ + for ( i = 0; i < MAX_NT_NBRS; ++i ) + { + mpi_buf = &( mpi_data->out_nt_buffers[i] ); + /* allocate storage for the neighbor processor i */ + mpi_buf->index = (int*) + scalloc( my_nbrs[i].est_send, sizeof(int), "mpibuf:nt_index", comm ); + mpi_buf->out_atoms = (void*) + scalloc( my_nbrs[i].est_send, sizeof(boundary_atom), "mpibuf:nt_out_atoms", + comm ); + } +#endif return SUCCESS; } @@ -737,6 +751,14 @@ void Deallocate_MPI_Buffers( mpi_datatypes *mpi_data ) sfree( mpi_buf->index, "mpibuf:index" ); sfree( mpi_buf->out_atoms, "mpibuf:out_atoms" ); } +#if defined(NEUTRAL_TERRITORY) + for ( i = 0; i < MAX_NT_NBRS; ++i ) + { + mpi_buf = &( mpi_data->out_nt_buffers[i] ); + sfree( mpi_buf->index, "mpibuf:nt_index" ); + sfree( mpi_buf->out_atoms, "mpibuf:nt_out_atoms" ); + } +#endif } @@ -985,6 +1007,20 @@ void ReAllocate( reax_system *system, control_params *control, break; } } +#if defined(NEUTRAL_TERRITORY) + // otherwise check individual outgoing Neutral Territory buffers + mpi_flag = 0; + for ( p = 0; p < MAX_NT_NBRS; ++p ) + { + nbr_pr = &( system->my_nt_nbrs[p] ); + nbr_data = &( mpi_data->out_nt_buffers[p] ); + if ( nbr_data->cnt >= nbr_pr->est_send * 0.90 ) + { + mpi_flag = 1; + break; + } + } +#endif } if ( mpi_flag ) @@ -1001,6 +1037,7 @@ void ReAllocate( reax_system *system, control_params *control, system->est_trans = (system->est_recv * sizeof(boundary_atom)) / sizeof(mpi_atom); total_send = 0; + for ( p = 0; p < MAX_NBRS; ++p ) { nbr_pr = &( system->my_nbrs[p] ); @@ -1008,6 +1045,15 @@ void ReAllocate( reax_system *system, control_params *control, nbr_pr->est_send = MAX( nbr_data->cnt * SAFER_ZONE, MIN_SEND ); total_send += nbr_pr->est_send; } +#if defined(NEUTRAL_TERRITORY) + for ( p = 0; p < MAX_NT_NBRS; ++p ) + { + nbr_pr = &( system->my_nt_nbrs[p] ); + nbr_data = &( mpi_data->out_nt_buffers[p] ); + nbr_pr->est_send = MAX( nbr_data->cnt * SAFER_ZONE, MIN_SEND ); + } +#endif + #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: reallocating mpi_buf: recv=%d send=%d total=%dMB\n", system->my_rank, system->est_recv, total_send, diff --git a/PuReMD/src/basic_comm.c b/PuReMD/src/basic_comm.c index 3014299478da41545f9bdd0da2efc5f4e379d7db..135abb6e49404c8305938ea510e5c0bb2aafd9c6 100644 --- a/PuReMD/src/basic_comm.c +++ b/PuReMD/src/basic_comm.c @@ -129,6 +129,60 @@ void Dist( reax_system* system, mpi_datatypes *mpi_data, } +void Dist_NT( reax_system* system, mpi_datatypes *mpi_data, + void *buf, MPI_Datatype type, int scale, dist_packer pack ) +{ + int d; + mpi_out_data *out_bufs; + MPI_Comm comm; + MPI_Request req[6]; + MPI_Status stat[6]; + neighbor_proc *nbr; + +#if defined(DEBUG) + fprintf( stderr, "p%d dist: entered\n", system->my_rank ); +#endif + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_nt_buffers; + + /* initiate recvs */ + for( d = 0; d < 6; ++d ) + { + nbr = &(system->my_nt_nbrs[d]); + if ( nbr->atoms_cnt ) + { + MPI_Irecv( buf + nbr->atoms_str * scale, nbr->atoms_cnt, type, + nbr->receive_rank, d, comm, &(req[d]) ); + } + } + + for( d = 0; d < 6; ++d) + { + /* send both messages in dimension d */ + if ( out_bufs[d].cnt ) + { + pack( buf, out_bufs + d ); + MPI_Send( out_bufs[d].out_atoms, out_bufs[d].cnt, type, + nbr->rank, d, comm ); + } + } + + for( d = 0; d < 6; ++d ) + { + nbr = &(system->my_nbrs[d]); + if ( nbr->atoms_cnt ) + { + // TODO: Wait(MPI_WAITANY) + MPI_Wait( &(req[d]), &(stat[d]) ); + } + } + +#if defined(DEBUG) + fprintf( stderr, "p%d dist: done\n", system->my_rank ); +#endif +} + + void real_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf ) { int i; @@ -236,6 +290,63 @@ void Coll( reax_system* system, mpi_datatypes *mpi_data, fprintf( stderr, "p%d coll: done\n", system->my_rank ); #endif } + + +void Coll_NT( reax_system* system, mpi_datatypes *mpi_data, + void *buf, MPI_Datatype type, int scale, coll_unpacker unpack ) +{ + int d; + void *in[6]; + mpi_out_data *out_bufs; + MPI_Comm comm; + MPI_Request req[6]; + MPI_Status stat[6]; + neighbor_proc *nbr; + +#if defined(DEBUG) + fprintf( stderr, "p%d coll: entered\n", system->my_rank ); +#endif + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_buffers; + + for ( d = 0; d < 6; ++d ) + { + /* initiate recvs */ + nbr = &(system->my_nbrs[d]); + in[d] = mpi_data->in_nt_buffer[d]; + if ( out_bufs[d].cnt ) + { + MPI_Irecv(in[d], out_bufs[d].cnt, type, nbr->receive_rank, d, comm, &(req[d])); + } + } + + for( d = 0; d < 6; ++d ) + { + /* send both messages in direction d */ + nbr = &(system->my_nbrs[d]); + if ( nbr->atoms_cnt ) + { + MPI_Send( buf + nbr->atoms_str * scale, nbr->atoms_cnt, type, + nbr->rank, d, comm ); + } + } + + for( d = 0; d < 6; ++d ) + { + if ( out_bufs[d].cnt ) + { + //TODO: WAITANY + MPI_Wait( &(req[d]), &(stat[d])); + unpack( in[d], buf, out_bufs + d ); + } + } + +#if defined(DEBUG) + fprintf( stderr, "p%d coll: done\n", system->my_rank ); +#endif +} + + #endif /*PURE_REAX*/ /*****************************************************************************/ diff --git a/PuReMD/src/comm_tools.c b/PuReMD/src/comm_tools.c index 8419e3efe44377e7d1680fe09b2bc06ef5756ca6..0b399825a58054fdfa6aa07f5351d4f221f45c55 100644 --- a/PuReMD/src/comm_tools.c +++ b/PuReMD/src/comm_tools.c @@ -26,6 +26,143 @@ #include "vector.h" +void Setup_NT_Comm( reax_system* system, control_params* control, + mpi_datatypes *mpi_data ) +{ + int i, d; + real bndry_cut; + neighbor_proc *nbr_pr; + simulation_box *my_box; + ivec nbr_coords; + ivec r[12] = { + {0, 0, -1}, // -z + {0, 0, +1}, // +z + {0, -1, 0}, // -y + {-1, -1, 0}, // -x-y + {-1, 0, 0}, // -x + {-1, +1, 0}, // -x+y + + {0, 0, +1}, // +z + {0, 0, -1}, // -z + {0, +1, 0}, // +y + {+1, +1, 0}, // +x+y + {+1, 0, 0}, // -x + {+1, -1, 0} // -x+y + }; + my_box = &(system->my_box); + bndry_cut = system->bndry_cuts.ghost_cutoff; + + /* identify my neighbors */ + system->num_nt_nbrs = MAX_NT_NBRS; + for ( i = 0; i < system->num_nt_nbrs; ++i ) + { + ivec_Sum( nbr_coords, system->my_coords, r[i] ); /* actual nbr coords */ + nbr_pr = &(system->my_nt_nbrs[i]); + MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_coords, &(nbr_pr->rank) ); + + /* set the rank of the neighbor processor in the receiving direction */ + ivec_Sum( nbr_coords, system->my_coords, r[i + 6] ); /* actual nbr coords */ + MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_coords, &(nbr_pr->receive_rank) ); + + for ( d = 0; d < 3; ++d ) + { + /* determine the boundary area with this nbr */ + if ( r[i][d] < 0 ) + { + nbr_pr->bndry_min[d] = my_box->min[d]; + nbr_pr->bndry_max[d] = my_box->min[d] + bndry_cut; + } + else if ( r[i][d] > 0 ) + { + nbr_pr->bndry_min[d] = my_box->max[d] - bndry_cut; + nbr_pr->bndry_max[d] = my_box->max[d]; + } + else + { + nbr_pr->bndry_min[d] = my_box->min[d]; + nbr_pr->bndry_max[d] = my_box->max[d]; + } + + /* determine if it is a periodic neighbor */ + if ( nbr_coords[d] < 0 ) + nbr_pr->prdc[d] = -1; + else if ( nbr_coords[d] >= control->procs_by_dim[d] ) + nbr_pr->prdc[d] = +1; + else + nbr_pr->prdc[d] = 0; + } + + } +} + + +void Sort_Neutral_Territory( reax_system *system, int dir, mpi_out_data *out_bufs ) +{ + int i, d, p, out_cnt; + reax_atom *atoms; + simulation_box *my_box; + boundary_atom *out_buf; + neighbor_proc *nbr_pr; + + atoms = system->my_atoms; + my_box = &( system->my_box ); + + /* place each atom into the appropriate outgoing list */ + nbr_pr = &( system->my_nt_nbrs[dir] ); + for ( i = 0; i < system->n; ++i ) + { + if ( nbr_pr->bndry_min[0] <= atoms[i].x[0] && + atoms[i].x[0] < nbr_pr->bndry_max[0] && + nbr_pr->bndry_min[1] <= atoms[i].x[1] && + atoms[i].x[1] < nbr_pr->bndry_max[1] && + nbr_pr->bndry_min[2] <= atoms[i].x[2] && + atoms[i].x[2] < nbr_pr->bndry_max[2] ) + { + out_bufs[dir].index[out_cnt] = i; + out_bufs[dir].cnt++; + } + } +} + + +void Init_Neutral_Territory( reax_system* system, mpi_datatypes *mpi_data ) +{ + int d, start, end, cnt; + mpi_out_data *out_bufs; + void *in; + MPI_Comm comm; + MPI_Request req; + MPI_Status stat; + neighbor_proc *nbr; + + Reset_Out_Buffers( mpi_data->out_nt_buffers, system->num_nt_nbrs ); + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_nt_buffers; + cnt = 0; + end = system->n; + + for ( d = 0; d < 6; ++d ) + { + Sort_Neutral_Territory( system, d, out_bufs ); + start = end; + + MPI_Irecv( &cnt, 1, MPI_INT, nbr->receive_rank, d, comm, &req ); + MPI_Send( &(out_bufs[d].cnt), 1, MPI_INT, nbr->rank, d, comm ); + MPI_Wait( &req, &stat ); + + if( mpi_data->in_nt_buffer[d] == NULL ) + { + mpi_data->in_nt_buffer[d] = (void *) smalloc( SAFER_ZONE * out_bufs[d].cnt * sizeof(real), "in", comm ); + } + + nbr = &(system->my_nt_nbrs[d]); + nbr->atoms_str = end; + nbr->atoms_cnt = cnt; + end += cnt; + } +} + + void Setup_Comm( reax_system* system, control_params* control, mpi_datatypes *mpi_data ) { @@ -653,6 +790,10 @@ void Comm_Atoms( reax_system *system, control_params *control, #endif Bin_Boundary_Atoms( system ); + +#if defined(NEUTRAL_TERRITORY) + Init_Neutral_Territory( system, mpi_data ); +#endif } else { diff --git a/PuReMD/src/forces.c b/PuReMD/src/forces.c index 49e747a4c92042a1b2eba168a9c05417450b9332..d05110c13cfbf0e2d6d446df5ad8c484c3ece9bf 100644 --- a/PuReMD/src/forces.c +++ b/PuReMD/src/forces.c @@ -378,6 +378,7 @@ void Init_Forces( reax_system *system, control_params *control, #endif sbp_i = &(system->reax_param.sbp[type_i]); + //TODO: edit this part to include NT atoms if ( i < system->n ) { local = 1; diff --git a/PuReMD/src/linear_solvers.c b/PuReMD/src/linear_solvers.c index 2d09f7daf4f570b4d52e7641cabc558dc3361d92..7c881c1b4497531dd7e96c39a82904f0309a552d 100644 --- a/PuReMD/src/linear_solvers.c +++ b/PuReMD/src/linear_solvers.c @@ -1266,7 +1266,6 @@ int CG( reax_system *system, control_params *control, simulation_data *data, t_start = Get_Time( ); tmp = Parallel_Dot(workspace->d, workspace->q, system->n, mpi_data->world); - //TODO: all_Reduce time t_allreduce += Get_Timing_Info ( t_start ); t_start = Get_Time( ); @@ -1300,7 +1299,6 @@ int CG( reax_system *system, control_params *control, simulation_data *data, t_start = Get_Time( ); sig_old = sig_new; sig_new = Parallel_Dot(workspace->r, workspace->p, system->n, mpi_data->world); - //TODO all_reduce time t_allreduce += Get_Timing_Info( t_start ); t_start = Get_Time( ); diff --git a/PuReMD/src/reax_defs.h b/PuReMD/src/reax_defs.h index a61ce245a5fc29c580158f4f425805dd16506c5a..ecee152c938db9b5e33969a07906b7906b781058 100644 --- a/PuReMD/src/reax_defs.h +++ b/PuReMD/src/reax_defs.h @@ -94,6 +94,7 @@ #define MASTER_NODE 0 #define MAX_NBRS 6 //27 +#define MAX_NT_NBRS 6 #define MYSELF 13 // encoding of relative coordinate (0,0,0) #define MAX_ITR 10 diff --git a/PuReMD/src/reax_types.h b/PuReMD/src/reax_types.h index 56a8c1c9841d0f6ea67f74d364277ff9a768f5d3..7e3de437a470f54221824bb27a948401ed546fef 100644 --- a/PuReMD/src/reax_types.h +++ b/PuReMD/src/reax_types.h @@ -55,6 +55,7 @@ #define ZERO (0.000000000000000e+00) #define REAX_MAX_STR 1024 #define REAX_MAX_NBRS 6 +#define REAX_MAX_NT_NBRS 6 #define REAX_MAX_3BODY_PARAM 5 #define REAX_MAX_4BODY_PARAM 5 #define REAX_MAX_ATOM_TYPES 25 @@ -205,8 +206,10 @@ typedef struct //MPI_Status recv_stat2[REAX_MAX_NBRS]; mpi_out_data out_buffers[REAX_MAX_NBRS]; + mpi_out_data out_nt_buffers[REAX_MAX_NT_NBRS]; void *in1_buffer; void *in2_buffer; + void *in_nt_buffer[REAX_MAX_NT_NBRS] } mpi_datatypes; @@ -484,6 +487,7 @@ typedef struct typedef struct { int rank; + int receive_rank; int est_send, est_recv; int atoms_str, atoms_cnt; ivec rltv, prdc; @@ -525,9 +529,10 @@ typedef struct int n, N, bigN, numH; int local_cap, total_cap, gcell_cap, Hcap; int est_recv, est_trans, max_recved; - int wsize, my_rank, num_nbrs; + int wsize, my_rank, num_nbrs, num_nt_nbrs; ivec my_coords; neighbor_proc my_nbrs[REAX_MAX_NBRS]; + neighbor_proc my_nt_nbrs[REAX_MAX_NT_NBRS]; int *global_offset; simulation_box big_box, my_box, my_ext_box; grid my_grid;