From 1257b0429b3b3ec2e8e5aa89c32e510b46dd9345 Mon Sep 17 00:00:00 2001 From: Abdullah Alperen <alperena@msu.edu> Date: Thu, 22 Nov 2018 14:25:37 -0500 Subject: [PATCH] PuReMD: continue NT for Jacobi debugging --- PuReMD/src/allocate.c | 6 +- PuReMD/src/allocate.h | 2 +- PuReMD/src/basic_comm.c | 57 ++++++++++++---- PuReMD/src/comm_tools.c | 17 +++-- PuReMD/src/forces.c | 132 +++++++++++++++++++++++++----------- PuReMD/src/init_md.c | 2 +- PuReMD/src/linear_solvers.c | 36 ++++++++-- PuReMD/src/reax_defs.h | 1 + PuReMD/src/reax_types.h | 5 +- 9 files changed, 186 insertions(+), 72 deletions(-) diff --git a/PuReMD/src/allocate.c b/PuReMD/src/allocate.c index 862aa0e1..ffdd965b 100644 --- a/PuReMD/src/allocate.c +++ b/PuReMD/src/allocate.c @@ -708,7 +708,7 @@ void Deallocate_Grid( grid *g ) buffers are void*, type cast to the correct pointer type to access the allocated buffers */ int Allocate_MPI_Buffers( mpi_datatypes *mpi_data, int est_recv, - neighbor_proc *my_nbrs, + neighbor_proc *my_nbrs, neighbor_proc *my_nt_nbrs, char *msg ) { int i; @@ -744,6 +744,7 @@ int Allocate_MPI_Buffers( mpi_datatypes *mpi_data, int est_recv, mpi_data->in_nt_buffer[i] = (void*) scalloc( my_nbrs[i].est_recv, sizeof(real), "mpibuf:in_nt_buffer", comm ); + //TODO mpi_buf = &( mpi_data->out_nt_buffers[i] ); /* allocate storage for the neighbor processor i */ mpi_buf->index = (int*) @@ -1096,7 +1097,8 @@ void ReAllocate( reax_system *system, control_params *control, /* reallocate mpi buffers */ Deallocate_MPI_Buffers( mpi_data ); ret = Allocate_MPI_Buffers( mpi_data, system->est_recv, - system->my_nbrs, msg ); + system->my_nbrs, system->my_nt_nbrs, + msg ); if ( ret != SUCCESS ) { fprintf( stderr, "%s", msg ); diff --git a/PuReMD/src/allocate.h b/PuReMD/src/allocate.h index 5458f29c..669861a2 100644 --- a/PuReMD/src/allocate.h +++ b/PuReMD/src/allocate.h @@ -36,7 +36,7 @@ void Allocate_Grid( reax_system*, MPI_Comm ); void Deallocate_Grid( grid* ); -int Allocate_MPI_Buffers( mpi_datatypes*, int, neighbor_proc*, char* ); +int Allocate_MPI_Buffers( mpi_datatypes*, int, neighbor_proc*, neighbor_proc*, char* ); int Allocate_Matrix( sparse_matrix**, int, int, int, MPI_Comm ); diff --git a/PuReMD/src/basic_comm.c b/PuReMD/src/basic_comm.c index 834034ed..83f01e52 100644 --- a/PuReMD/src/basic_comm.c +++ b/PuReMD/src/basic_comm.c @@ -141,7 +141,6 @@ void Dist_NT( reax_system* system, mpi_datatypes *mpi_data, neighbor_proc *nbr; #if defined(DEBUG) - fprintf( stderr, "p%d dist: entered\n", system->my_rank ); #endif comm = mpi_data->comm_mesh3D; out_bufs = mpi_data->out_nt_buffers; @@ -157,10 +156,22 @@ void Dist_NT( reax_system* system, mpi_datatypes *mpi_data, MPI_Irecv( buf + nbr->atoms_str * scale, nbr->atoms_cnt, type, nbr->receive_rank, d, comm, &(req[d]) ); } + + /*if(d==0) + { + fprintf( stdout, "p%d dist: entered and need to receive %d numbers from %d\n", system->my_rank, nbr->atoms_cnt, nbr->receive_rank ); + fflush( stdout ); + }*/ } for( d = 0; d < 6; ++d) { + nbr = &(system->my_nt_nbrs[d]); + if(d==0) + { + //fprintf( stdout, "p%d dist: entered and need to send %d numbers to %d\n", system->my_rank, out_bufs[d].cnt, nbr->rank ); + //fflush( stdout ); + } /* send both messages in dimension d */ if ( out_bufs[d].cnt ) { @@ -171,19 +182,34 @@ void Dist_NT( reax_system* system, mpi_datatypes *mpi_data, } /* + MPI_Barrier( MPI_COMM_WORLD ); + fprintf( stdout, "p%d dist: MISSION COMPLETED\n", system->my_rank ); + fflush( stdout ); + */ + for( d = 0; d < 6; ++d ) { + //fprintf( stdout, "p%d dist: direction %d\n", system->my_rank, d ); + //fflush( stdout ); nbr = &(system->my_nbrs[d]); if ( nbr->atoms_cnt ) { MPI_Wait( &(req[d]), &(stat[d]) ); } } - */ - for( d = 0; d < count; ++d ) + + + /*for( d = 0; d < count; ++d ) { MPI_Waitany( REAX_MAX_NT_NBRS, req, &index, stat); - } + }*/ + + + /* + MPI_Barrier( MPI_COMM_WORLD ); + fprintf( stdout, "p%d dist: MPI_Waitany is done\n", system->my_rank ); + fflush( stdout ); + */ #if defined(DEBUG) @@ -317,47 +343,54 @@ void Coll_NT( reax_system* system, mpi_datatypes *mpi_data, fprintf( stderr, "p%d coll: entered\n", system->my_rank ); #endif comm = mpi_data->comm_mesh3D; - out_bufs = mpi_data->out_buffers; + out_bufs = mpi_data->out_nt_buffers; count = 0; for ( d = 0; d < 6; ++d ) { /* initiate recvs */ - nbr = &(system->my_nbrs[d]); + nbr = &(system->my_nt_nbrs[d]); in[d] = mpi_data->in_nt_buffer[d]; if ( out_bufs[d].cnt ) { count++; MPI_Irecv(in[d], out_bufs[d].cnt, type, nbr->receive_rank, d, comm, &(req[d])); + //fprintf( stdout, "p%d coll: d = %d, receive_rank = %d, cnt = %d\n", system->my_rank, d, nbr->receive_rank, out_bufs[d].cnt ); } } for( d = 0; d < 6; ++d ) { /* send both messages in direction d */ - nbr = &(system->my_nbrs[d]); + nbr = &(system->my_nt_nbrs[d]); if ( nbr->atoms_cnt ) { MPI_Send( buf + nbr->atoms_str * scale, nbr->atoms_cnt, type, nbr->rank, d, comm ); + //fprintf( stdout, "p%d coll: d = %d, send_rank = %d, cnt = %d\n", system->my_rank, d, nbr->rank, nbr->atoms_cnt ); } } - - /* + for( d = 0; d < 6; ++d ) { + fprintf( stderr, "p%d coll: d = %d\n", system->my_rank, d ); + fflush( stdout ); if ( out_bufs[d].cnt ) { MPI_Wait( &(req[d]), &(stat[d])); + //fprintf( stderr, "p%d coll: %d status complete\n", system->my_rank, d ); + //fflush( stdout ); unpack( in[d], buf, out_bufs + d ); } } - */ - for( d = 0; d < count; ++d ) + //fprintf( stderr, "p%d coll: entered\n", system->my_rank ); + fflush( stdout ); + + /*for( d = 0; d < count; ++d ) { MPI_Waitany( REAX_MAX_NT_NBRS, req, &index, stat); unpack( in[index], buf, out_bufs + index ); - } + }*/ #if defined(DEBUG) fprintf( stderr, "p%d coll: done\n", system->my_rank ); diff --git a/PuReMD/src/comm_tools.c b/PuReMD/src/comm_tools.c index 5646b9cf..2162456c 100644 --- a/PuReMD/src/comm_tools.c +++ b/PuReMD/src/comm_tools.c @@ -33,7 +33,7 @@ void Setup_NT_Comm( reax_system* system, control_params* control, real bndry_cut; neighbor_proc *nbr_pr; simulation_box *my_box; - ivec nbr_coords; + ivec nbr_coords, nbr_recv_coords; ivec r[12] = { {0, 0, -1}, // -z {0, 0, +1}, // +z @@ -50,20 +50,18 @@ void Setup_NT_Comm( reax_system* system, control_params* control, {+1, -1, 0} // +x-y }; my_box = &(system->my_box); - // TODO: verify this line - //bndry_cut = system->bndry_cuts.ghost_cutoff; - bndry_cut = control->vlist_cut; + bndry_cut = system->bndry_cuts.ghost_cutoff; /* identify my neighbors */ system->num_nt_nbrs = REAX_MAX_NT_NBRS; for ( i = 0; i < system->num_nt_nbrs; ++i ) { - ivec_Sum( nbr_coords, system->my_coords, r[i] ); /* actual nbr coords */ nbr_pr = &(system->my_nt_nbrs[i]); + ivec_Sum( nbr_coords, system->my_coords, r[i] ); /* actual nbr coords */ MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_coords, &(nbr_pr->rank) ); /* set the rank of the neighbor processor in the receiving direction */ - ivec_Sum( nbr_coords, system->my_coords, r[i + 6] ); /* actual nbr coords */ - MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_coords, &(nbr_pr->receive_rank) ); + ivec_Sum( nbr_recv_coords, system->my_coords, r[i + 6] ); /* actual nbr coords */ + MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_recv_coords, &(nbr_pr->receive_rank) ); for ( d = 0; d < 3; ++d ) { @@ -164,7 +162,8 @@ void Init_Neutral_Territory( reax_system* system, mpi_datatypes *mpi_data ) if( mpi_data->in_nt_buffer[d] == NULL ) { - mpi_data->in_nt_buffer[d] = (void *) smalloc( SAFER_ZONE * cnt * sizeof(real), "in", comm ); + //TODO + mpi_data->in_nt_buffer[d] = (void *) smalloc( 100 * cnt * sizeof(real), "in", comm ); } nbr = &(system->my_nt_nbrs[d]); @@ -199,7 +198,7 @@ void Estimate_NT_Atoms( reax_system *system, mpi_datatypes *mpi_data ) out_bufs[d].out_atoms = (void*) calloc( nbr->est_send, sizeof(real) ); /* sort the atoms to their outgoing buffers */ - // TODO: ??? + // TODO: to call or not to call? //Sort_Neutral_Territory( system, d, out_bufs, 1 ); } } diff --git a/PuReMD/src/forces.c b/PuReMD/src/forces.c index 7b66fa32..8178856d 100644 --- a/PuReMD/src/forces.c +++ b/PuReMD/src/forces.c @@ -412,6 +412,22 @@ void Init_Forces( reax_system *system, control_params *control, } #endif + //TODO: remove those variables + int local_to_local = 0; + //FILE *fp; +#if defined(NEUTRAL_TERRITORY) + //TODO: remove those variables + int local_to_nt = 0; + int nt_to_local = 0; + int nt_to_nt = 0; + //fp = fopen( "NT_Pairs.txt", "w" ); + int mark[6]; + mark[0] = mark[1] = 1; + mark[2] = mark[3] = mark[4] = mark[5] = 2; +#else + //fp = fopen( "Jacobi_Pairs.txt", "w" ); +#endif + for ( i = 0; i < system->N; ++i ) { atom_i = &(system->my_atoms[i]); @@ -451,12 +467,16 @@ void Init_Forces( reax_system *system, control_params *control, ihb = -1; ihb_top = -1; + //TODO: undo if ( local == 1 ) { H->start[i] = Htop; H->entries[Htop].j = i; H->entries[Htop].val = sbp_i->eta; ++Htop; + //TODO: + //fprintf( fp, "%d %d\n", atom_i->orig_id, atom_i->orig_id ); + ++local_to_local; if ( control->hbond_cut > 0 ) { @@ -480,14 +500,6 @@ void Init_Forces( reax_system *system, control_params *control, } } } - // TODO: Diag entries for NT atoms? - /*else if( local == 2 ) - { - H->start[atom_i->pos] = Htop; - H->entries[Htop].j = atom_i->pos; - H->entries[Htop].val = sbp_i->eta; - ++Htop; - }*/ /* update i-j distance - check if j is within cutoff */ for ( pj = start_i; pj < end_i; ++pj ) @@ -533,14 +545,18 @@ void Init_Forces( reax_system *system, control_params *control, || far_nbrs->format == FULL_LIST ) { #if defined(NEUTRAL_TERRITORY) - if( atom_j->nt_dir != -1 || j < system->n ) + if( atom_j->nt_dir > 0 || j < system->n ) { + //TODO + //fprintf( fp, "%d %d\n", atom_i->orig_id, atom_j->orig_id ); if( j < system->n ) { + ++local_to_local; H->entries[Htop].j = j; } else { + ++local_to_nt; H->entries[Htop].j = atom_j->pos; } @@ -555,6 +571,10 @@ void Init_Forces( reax_system *system, control_params *control, ++Htop; } #else + //TODO + //fprintf( fp, "%d %d\n", atom_i->orig_id, atom_j->orig_id ); + if( j < system->n ) + ++local_to_local; H->entries[Htop].j = j; if ( control->tabulate == 0 ) { @@ -599,38 +619,36 @@ void Init_Forces( reax_system *system, control_params *control, else if ( local == 2 ) { /* H matrix entry */ -#if defined(HALF_LIST) - if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) -#endif + //TODO + if( ( atom_j->nt_dir != -1 && mark[atom_i->nt_dir] != mark[atom_j->nt_dir] ) || ( j < system->n && atom_i->nt_dir != 0 )) { - if( atom_j->nt_dir != -1 || j < system->n ) + //fprintf( fp, "%d %d\n", atom_i->orig_id, atom_j->orig_id ); + if( !nt_flag ) { - if( !nt_flag ) - { - nt_flag = 1; - H->start[atom_i->pos] = Htop; - } - if( j < system->n ) - { - H->entries[Htop].j = j; - } - else - { - H->entries[Htop].j = atom_j->pos; - } + nt_flag = 1; + H->start[atom_i->pos] = Htop; + } + if( j < system->n ) + { + ++nt_to_local; + H->entries[Htop].j = j; + } + else + { + ++nt_to_nt; + H->entries[Htop].j = atom_j->pos; + } - if ( control->tabulate == 0 ) - { - H->entries[Htop].val = Compute_H(r_ij, twbp->gamma, workspace->Tap); - } - else - { - H->entries[Htop].val = Compute_tabH(r_ij, type_i, type_j); - } - ++Htop; + if ( control->tabulate == 0 ) + { + H->entries[Htop].val = Compute_H(r_ij, twbp->gamma, workspace->Tap); + } + else + { + H->entries[Htop].val = Compute_tabH(r_ij, type_i, type_j); } + ++Htop; } - } #endif @@ -664,7 +682,15 @@ void Init_Forces( reax_system *system, control_params *control, #if defined(NEUTRAL_TERRITORY) else if ( local == 2 ) { - H->end[atom_i->pos] = Htop; + if( nt_flag ) + { + H->end[atom_i->pos] = Htop; + } + else + { + H->start[atom_i->pos] = 0; + H->end[atom_i->pos] = 0; + } } #endif } @@ -700,6 +726,30 @@ void Init_Forces( reax_system *system, control_params *control, } } + int Htot, localtot; + + MPI_Allreduce(&Htop, &Htot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&local_to_local, &localtot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + +#if defined(NEUTRAL_TERRITORY) + int tot2, tot3, tot4; + MPI_Allreduce(&local_to_nt, &tot2, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&nt_to_local, &tot3, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&nt_to_nt, &tot4, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); +#endif + + if(system->my_rank == MASTER_NODE ) + { + fprintf( stdout, "total number of entries across all matrices: %d\n", Htot ); + fprintf( stdout, "total number of local to local entries across all matrices: %d\n", localtot ); +#if defined(NEUTRAL_TERRITORY) + fprintf( stdout, "total number of local to nt entries across all matrices: %d\n", tot2 ); + fprintf( stdout, "total number of nt to local entries across all matrices: %d\n", tot3 ); + fprintf( stdout, "total number of nt to nt entries across all matrices: %d\n", tot4 ); +#endif + fflush( stdout ); + } + workspace->realloc.Htop = Htop; workspace->realloc.num_bonds = num_bonds; workspace->realloc.num_hbonds = num_hbonds; @@ -725,6 +775,8 @@ void Init_Forces( reax_system *system, control_params *control, Validate_Lists( system, workspace, lists, data->step, system->n, system->N, system->numH, comm ); + + //fclose( fp ); } @@ -1109,10 +1161,6 @@ void Estimate_Storages( reax_system *system, control_params *control, BO_pi2 = exp( C56 ); } else BO_pi2 = C56 = 0.0; - - /* Initially BO values are the uncorrected ones, page 1 */ - BO = BO_s + BO_pi + BO_pi2; - if ( BO >= control->bo_cut ) { ++bond_top[i]; @@ -1230,6 +1278,8 @@ void Compute_Forces( reax_system *system, control_params *control, //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) Update_Timing_Info( &t_start, &(data->timing.nonb) ); +#endif + Update_Timing_Info( &t_start, &(data->timing.nonb) ); #endif #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ step%d: nonbonded forces completed\n", diff --git a/PuReMD/src/init_md.c b/PuReMD/src/init_md.c index 4e3000b5..9ea85cca 100644 --- a/PuReMD/src/init_md.c +++ b/PuReMD/src/init_md.c @@ -589,7 +589,7 @@ int Init_Lists( reax_system *system, control_params *control, comm = mpi_data->world; - if ( control->cm_solver_pre_comp_type == SAI_PC ) + if ( control->cm_solver_pre_comp_type == SAI_PC || 1 ) { far_nbr_list_format = FULL_LIST; cm_format = SYM_FULL_MATRIX; diff --git a/PuReMD/src/linear_solvers.c b/PuReMD/src/linear_solvers.c index 36a6b15e..3606d90d 100644 --- a/PuReMD/src/linear_solvers.c +++ b/PuReMD/src/linear_solvers.c @@ -1137,7 +1137,7 @@ void dual_Sparse_MatVec( sparse_matrix *A, rvec2 *x, rvec2 *b, int N ) void Sparse_MatVec( sparse_matrix *A, real *x, real *b, int N ) { - int i, j, k, si; + int i, j, k, si, dim; real val; for ( i = 0; i < N; ++i ) @@ -1145,9 +1145,14 @@ void Sparse_MatVec( sparse_matrix *A, real *x, real *b, int N ) b[i] = 0; } +#if defined(NEUTRAL_TERRITORY) + dim = A->NT; +#else + dim = A->n; +#endif if ( A->format == SYM_HALF_MATRIX ) { - for ( i = 0; i < A->n; ++i ) + for ( i = 0; i < dim; ++i ) { si = A->start[i]; @@ -1167,7 +1172,7 @@ void Sparse_MatVec( sparse_matrix *A, real *x, real *b, int N ) } else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX ) { - for ( i = 0; i < A->n; ++i ) + for ( i = 0; i < dim; ++i ) { si = A->start[i]; @@ -1187,6 +1192,9 @@ int CG( reax_system *system, control_params *control, simulation_data *data, storage *workspace, sparse_matrix *H, real *b, real tol, real *x, mpi_datatypes* mpi_data, FILE *fout, int nprocs ) { + fprintf(stdout, "%d %d\n", H->n, H->NT); + fflush(stdout); + int i, j, scale; real tmp, alpha, beta, b_norm; real sig_old, sig_new; @@ -1227,6 +1235,18 @@ int CG( reax_system *system, control_params *control, simulation_data *data, t_comm += Get_Timing_Info( t_start ); } + else + { +#if defined(NEUTRAL_TERRITORY) + fprintf(stdout,"BEFORE COLL\n"); + fflush( stdout ); + + Coll_NT( system, mpi_data, workspace->q, MPI_DOUBLE, scale, real_unpacker ); + fprintf(stdout,"AFTER COLL\n"); + fflush( stdout ); +#endif + } + t_start = Get_Time( ); Vector_Sum( workspace->r , 1., b, -1., workspace->q, system->n ); t_vops += Get_Timing_Info( t_start ); @@ -1268,6 +1288,8 @@ int CG( reax_system *system, control_params *control, simulation_data *data, for ( i = 0; i < control->cm_solver_max_iters && sqrt(sig_new) / b_norm > tol; ++i ) { + fprintf( stdout, "p%d, i = %d, res = %.6f\n", system->my_rank, i, sqrt(sig_new) / b_norm ); + fflush( stdout ); t_start = Get_Time( ); #if defined(NEUTRAL_TERRITORY) Dist_NT( system, mpi_data, workspace->d, MPI_DOUBLE, scale, real_packer ); @@ -1294,6 +1316,12 @@ int CG( reax_system *system, control_params *control, simulation_data *data, #endif t_comm += Get_Timing_Info( t_start ); } + else + { +#if defined(NEUTRAL_TERRITORY) + Coll_NT( system, mpi_data, workspace->q, MPI_DOUBLE, scale, real_unpacker ); +#endif + } t_start = Get_Time( ); tmp = Parallel_Dot(workspace->d, workspace->q, system->n, mpi_data->world); @@ -1336,7 +1364,7 @@ int CG( reax_system *system, control_params *control, simulation_data *data, } t_start = Get_Time( ); - sig_old = sig_new; + //sig_new = Parallel_Dot(workspace->r, workspace->p, H->NT, mpi_data->world); sig_new = Parallel_Dot(workspace->r, workspace->p, system->n, mpi_data->world); t_allreduce += Get_Timing_Info( t_start ); diff --git a/PuReMD/src/reax_defs.h b/PuReMD/src/reax_defs.h index 0f3b9089..daa3f283 100644 --- a/PuReMD/src/reax_defs.h +++ b/PuReMD/src/reax_defs.h @@ -133,6 +133,7 @@ enum gcell_types { NO_NBRS = 0, NEAR_ONLY = 1, HBOND_ONLY = 2, FAR_ONLY = 4, NEAR_HBOND = 3, NEAR_FAR = 5, HBOND_FAR = 6, FULL_NBRS = 7, NATIVE = 8, NT_NBRS = 9 // 9 through 14 }; +enum nt_atom_type { TOWER = 1, PLATE = 2 }; enum atoms { C_ATOM = 0, H_ATOM = 1, O_ATOM = 2, N_ATOM = 3, S_ATOM = 4, SI_ATOM = 5, GE_ATOM = 6, X_ATOM = 7 diff --git a/PuReMD/src/reax_types.h b/PuReMD/src/reax_types.h index 464bfb3a..8a08c5ad 100644 --- a/PuReMD/src/reax_types.h +++ b/PuReMD/src/reax_types.h @@ -978,9 +978,10 @@ typedef struct /* matrix storage format */ int format; int cap, n, m; -#if defined(NEUTRAL_TERRITORY) + //TODO: uncomment +//#if defined(NEUTRAL_TERRITORY) int NT; -#endif +//#endif int *start, *end; sparse_matrix_entry *entries; } sparse_matrix; -- GitLab