diff --git a/PuReMD/src/forces.c b/PuReMD/src/forces.c index cb30f01d67a567c4145b14bf618f0588f1eb7bbc..21823d49de3dd295bad85bfa20dbc7f73c540bbd 100644 --- a/PuReMD/src/forces.c +++ b/PuReMD/src/forces.c @@ -328,7 +328,7 @@ real Compute_tabH( real r_ij, int ti, int tj ) void Init_Forces( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, - output_controls *out_control, MPI_Comm comm ) + output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) { int i, j, pj; int start_i, end_i; @@ -353,6 +353,9 @@ void Init_Forces( reax_system *system, control_params *control, int total_sum[6]; int nt_flag; #endif + real t_start, t_cm_init, total_cm_init; + + t_cm_init = 0; far_nbrs = lists[FAR_NBRS]; bonds = lists[BONDS]; @@ -376,6 +379,7 @@ void Init_Forces( reax_system *system, control_params *control, renbr = (data->step - data->prev_steps) % control->reneighbor == 0; #if defined(NEUTRAL_TERRITORY) + t_start = Get_Time( ); nt_flag = 1; if( renbr ) { @@ -417,6 +421,7 @@ void Init_Forces( reax_system *system, control_params *control, mark[0] = mark[1] = 1; mark[2] = mark[3] = mark[4] = mark[5] = 2; + t_cm_init += Get_Timing_Info( t_start ); #endif for ( i = 0; i < system->N; ++i ) @@ -461,10 +466,12 @@ void Init_Forces( reax_system *system, control_params *control, ihb_top = -1; if ( local == 1 ) { + t_start = Get_Time( ); H->start[i] = Htop; H->entries[Htop].j = i; H->entries[Htop].val = sbp_i->eta; ++Htop; + t_cm_init += Get_Timing_Info( t_start ); if ( control->hbond_cut > 0 ) { @@ -527,6 +534,7 @@ void Init_Forces( reax_system *system, control_params *control, if ( local == 1 ) { + t_start = Get_Time( ); /* H matrix entry */ #if defined(NEUTRAL_TERRITORY) if ( atom_j->nt_dir > 0 || (j < system->n @@ -572,6 +580,7 @@ void Init_Forces( reax_system *system, control_params *control, ++Htop; } #endif + t_cm_init += Get_Timing_Info( t_start ); /* hydrogen bond lists */ if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) && @@ -603,6 +612,7 @@ void Init_Forces( reax_system *system, control_params *control, #if defined(NEUTRAL_TERRITORY) else if ( local == 2 ) { + t_start = Get_Time( ); /* H matrix entry */ if( ( atom_j->nt_dir != -1 && mark[atom_i->nt_dir] != mark[atom_j->nt_dir] && ( H->format == SYM_FULL_MATRIX @@ -635,6 +645,7 @@ void Init_Forces( reax_system *system, control_params *control, ++Htop; } + t_cm_init += Get_Timing_Info( t_start ); } #endif @@ -659,6 +670,7 @@ void Init_Forces( reax_system *system, control_params *control, } Set_End_Index( i, btop_i, bonds ); + t_start = Get_Time( ); if ( local == 1 ) { H->end[i] = Htop; @@ -679,6 +691,7 @@ void Init_Forces( reax_system *system, control_params *control, } } #endif + t_cm_init += Get_Timing_Info( t_start ); } if ( far_nbrs->format == FULL_LIST ) @@ -712,6 +725,12 @@ void Init_Forces( reax_system *system, control_params *control, } } + MPI_Reduce(&t_cm_init, &total_cm_init, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + if( system->my_rank == MASTER_NODE ) + { + data->timing.init_qeq += total_cm_init / control->nprocs; + } + #if defined(DEBUG) Print_Sparse_Matrix2( system, H, NULL ); #endif @@ -1178,16 +1197,16 @@ void Estimate_Storages( reax_system *system, control_params *control, #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ estimate storages: Htop = %d, num_3body = %d\n", - system->my_rank, *Htop, *num_3body ); + system->my_rank, *Htop, *num_3body ); MPI_Barrier( comm ); #endif } void Compute_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - mpi_datatypes *mpi_data ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data ) { MPI_Comm comm; int qeq_flag; @@ -1209,7 +1228,7 @@ void Compute_Forces( reax_system *system, control_params *control, qeq_flag = 0; #endif if ( qeq_flag ) - Init_Forces( system, control, data, workspace, lists, out_control, comm ); + Init_Forces( system, control, data, workspace, lists, out_control, comm, mpi_data ); else Init_Forces_noQEq( system, control, data, workspace, lists, out_control, comm ); diff --git a/PuReMD/src/init_md.c b/PuReMD/src/init_md.c index 2df6f5e92ba7849047264cb39ae4a9d211007a20..f3efe9d1ac25acde1d4ea1bb0d58bf64fb834449 100644 --- a/PuReMD/src/init_md.c +++ b/PuReMD/src/init_md.c @@ -291,7 +291,7 @@ int Init_Simulation_Data( reax_system *system, control_params *control, data->timing.bonded = 0; data->timing.nonb = 0; data->timing.cm = ZERO; - data->timing.cm_sort_mat_rows = ZERO; + data->timing.init_qeq = ZERO; data->timing.cm_solver_comm = ZERO; data->timing.cm_solver_allreduce = ZERO; data->timing.cm_solver_pre_comp = ZERO; @@ -365,7 +365,7 @@ int Init_Simulation_Data( reax_system *system, control_params *control, data->timing.bonded = 0; data->timing.nonb = 0; data->timing.cm = ZERO; - data->timing.cm_sort_mat_rows = ZERO; + data->timing.init_qeq = ZERO; data->timing.cm_solver_comm = ZERO; data->timing.cm_solver_allreduce = ZERO; data->timing.cm_solver_pre_comp = ZERO; diff --git a/PuReMD/src/io_tools.c b/PuReMD/src/io_tools.c index eaa5503cb32c2bd88c82ed1dab1ba8dfd359585b..cf62b27b124b072b1a8a5358fa5e074545fe7ee1 100644 --- a/PuReMD/src/io_tools.c +++ b/PuReMD/src/io_tools.c @@ -100,7 +100,7 @@ int Init_Output_Files( reax_system *system, control_params *control, out_control->log = sfopen( temp, "w", "Init_Output_Files" ); fprintf( out_control->log, "%-6s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n", "step", "total", "comm", "neighbors", "init", "bonded", "nonbonded", - "CM", "CM Sort", "S iters", "Pre Comp", "Pre App", "S comm", "S allr", + "CM", "QEq Init", "S iters", "Pre Comp", "Pre App", "S comm", "S allr", "S spmv", "S vec ops", "S orthog", "S tsolve" ); fflush( out_control->log ); #endif @@ -1107,7 +1107,7 @@ void Output_Results( reax_system *system, control_params *control, data->timing.bonded * denom, data->timing.nonb * denom, data->timing.cm * denom, - data->timing.cm_sort_mat_rows * denom, + data->timing.init_qeq * denom, (double)( data->timing.cm_solver_iters * denom), data->timing.cm_solver_pre_comp * denom, data->timing.cm_solver_pre_app * denom, @@ -1126,7 +1126,7 @@ void Output_Results( reax_system *system, control_params *control, data->timing.bonded = 0; data->timing.nonb = 0; data->timing.cm = ZERO; - data->timing.cm_sort_mat_rows = ZERO; + data->timing.init_qeq = ZERO; data->timing.cm_solver_pre_comp = ZERO; data->timing.cm_solver_pre_app = ZERO; data->timing.cm_solver_comm = ZERO; diff --git a/PuReMD/src/linear_solvers.c b/PuReMD/src/linear_solvers.c index f22abd889073db65fa51f42e112c2c1e7aa83bf9..2ff188c20b6a9d7431cbb8c9327d7c477b70b845 100644 --- a/PuReMD/src/linear_solvers.c +++ b/PuReMD/src/linear_solvers.c @@ -1915,7 +1915,7 @@ int CG( reax_system *system, control_params *control, simulation_data *data, MPI_Barrier(mpi_data->world); //TODO only master node should report it - if ( i >= control->cm_solver_max_iters ) + if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE ) { fprintf( stderr, "CG convergence failed!\n" ); return i; diff --git a/PuReMD/src/qEq.c b/PuReMD/src/qEq.c index 066993d700b08dbb1c6ac0ce67d8f107cc1467f9..29cccf43ca375d07a91f01b28625911b09026a50 100644 --- a/PuReMD/src/qEq.c +++ b/PuReMD/src/qEq.c @@ -238,39 +238,42 @@ void Init_MatVec( reax_system *system, simulation_data *data, reax_atom *atom; /*if( (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 || - workspace->L == NULL ) { - //Print_Linear_System( system, control, workspace, data->step ); - Sort_Matrix_Rows( workspace->H ); - fprintf( stderr, "H matrix sorted\n" ); - Calculate_Droptol( workspace->H, workspace->droptol, control->cm_solver_pre_comp_droptol ); - fprintf( stderr, "drop tolerances calculated\n" ); - if( workspace->L == NULL ) { - fillin = Estimate_LU_Fill( workspace->H, workspace->droptol ); - - if( Allocate_Matrix( &(workspace->L), workspace->H->cap, fillin, FULL_MATRIX, comm ) == 0 || - Allocate_Matrix( &(workspace->U), workspace->H->cap, fillin, FULL_MATRIX, comm ) == 0 ) { - fprintf( stderr, "not enough memory for LU matrices. terminating.\n" ); - MPI_Abort( mpi_data->world, INSUFFICIENT_MEMORY ); - } + workspace->L == NULL ) + { + //Print_Linear_System( system, control, workspace, data->step ); + Sort_Matrix_Rows( workspace->H ); + fprintf( stderr, "H matrix sorted\n" ); + Calculate_Droptol( workspace->H, workspace->droptol, control->cm_solver_pre_comp_droptol ); + fprintf( stderr, "drop tolerances calculated\n" ); + if( workspace->L == NULL ) + { + fillin = Estimate_LU_Fill( workspace->H, workspace->droptol ); - workspace->L->n = workspace->H->n; - workspace->U->n = workspace->H->n; + if( Allocate_Matrix( &(workspace->L), workspace->H->cap, fillin, FULL_MATRIX, comm ) == 0 || + Allocate_Matrix( &(workspace->U), workspace->H->cap, fillin, FULL_MATRIX, comm ) == 0 ) + { + fprintf( stderr, "not enough memory for LU matrices. terminating.\n" ); + MPI_Abort( mpi_data->world, INSUFFICIENT_MEMORY ); + } + + workspace->L->n = workspace->H->n; + workspace->U->n = workspace->H->n; #if defined(DEBUG_FOCUS) -fprintf( stderr, "p%d: n=%d, fillin = %d\n", -system->my_rank, workspace->L->n, fillin ); -fprintf( stderr, "p%d: allocated memory: L = U = %ldMB\n", -system->my_rank,fillin*sizeof(sparse_matrix_entry)/(1024*1024) ); + fprintf( stderr, "p%d: n=%d, fillin = %d\n", + system->my_rank, workspace->L->n, fillin ); + fprintf( stderr, "p%d: allocated memory: L = U = %ldMB\n", + system->my_rank,fillin*sizeof(sparse_matrix_entry)/(1024*1024) ); #endif -} + } -ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U ); + ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U ); #if defined(DEBUG_FOCUS) -fprintf( stderr, "p%d: icholt finished\n", system->my_rank ); - //sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); - //Print_Sparse_Matrix2( workspace->L, fname ); - //Print_Sparse_Matrix( U ); + fprintf( stderr, "p%d: icholt finished\n", system->my_rank ); + //sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); + //Print_Sparse_Matrix2( workspace->L, fname ); + //Print_Sparse_Matrix( U ); #endif -}*/ + }*/ //TODO: fill in code for setting up and computing SAI, see sPuReMD code, // and remove diagonal preconditioner computation below (workspace->Hdia_inv) @@ -279,35 +282,35 @@ fprintf( stderr, "p%d: icholt finished\n", system->my_rank ); // control->cm_solver_pre_comp_sai_thres ); for ( i = 0; i < system->n; ++i ) -{ - atom = &( system->my_atoms[i] ); - - /* init pre-conditioner for H and init solution vectors */ - workspace->Hdia_inv[i] = 1. / system->reax_param.sbp[ atom->type ].eta; - workspace->b_s[i] = -system->reax_param.sbp[ atom->type ].chi; - workspace->b_t[i] = -1.0; - workspace->b[i][0] = -system->reax_param.sbp[ atom->type ].chi; - workspace->b[i][1] = -1.0; - - /* linear extrapolation for s and for t */ - // newQEq: no extrapolation! - //workspace->s[i] = 2 * atom->s[0] - atom->s[1]; //0; - //workspace->t[i] = 2 * atom->t[0] - atom->t[1]; //0; - //workspace->x[i][0] = 2 * atom->s[0] - atom->s[1]; //0; - //workspace->x[i][1] = 2 * atom->t[0] - atom->t[1]; //0; - - /* quadratic extrapolation for s and t */ - // workspace->s[i] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] ); - // workspace->t[i] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] ); - //workspace->x[i][0] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] ); - workspace->x[i][1] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] ); - - /* cubic extrapolation for s and t */ - workspace->x[i][0] = 4 * (atom->s[0] + atom->s[2]) - (6 * atom->s[1] + atom->s[3]); - //workspace->x[i][1] = 4*(atom->t[0]+atom->t[2])-(6*atom->t[1]+atom->t[3]); - - // fprintf(stderr, "i=%d s=%f t=%f\n", i, workspace->s[i], workspace->t[i]); -} + { + atom = &( system->my_atoms[i] ); + + /* init pre-conditioner for H and init solution vectors */ + workspace->Hdia_inv[i] = 1. / system->reax_param.sbp[ atom->type ].eta; + workspace->b_s[i] = -system->reax_param.sbp[ atom->type ].chi; + workspace->b_t[i] = -1.0; + workspace->b[i][0] = -system->reax_param.sbp[ atom->type ].chi; + workspace->b[i][1] = -1.0; + + /* linear extrapolation for s and for t */ + // newQEq: no extrapolation! + //workspace->s[i] = 2 * atom->s[0] - atom->s[1]; //0; + //workspace->t[i] = 2 * atom->t[0] - atom->t[1]; //0; + //workspace->x[i][0] = 2 * atom->s[0] - atom->s[1]; //0; + //workspace->x[i][1] = 2 * atom->t[0] - atom->t[1]; //0; + + /* quadratic extrapolation for s and t */ + // workspace->s[i] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] ); + // workspace->t[i] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] ); + //workspace->x[i][0] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] ); + workspace->x[i][1] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] ); + + /* cubic extrapolation for s and t */ + workspace->x[i][0] = 4 * (atom->s[0] + atom->s[2]) - (6 * atom->s[1] + atom->s[3]); + //workspace->x[i][1] = 4*(atom->t[0]+atom->t[2])-(6*atom->t[1]+atom->t[3]); + + // fprintf(stderr, "i=%d s=%f t=%f\n", i, workspace->s[i], workspace->t[i]); + } } @@ -324,6 +327,9 @@ void Calculate_Charges( reax_system *system, storage *workspace, scale = sizeof(real) / sizeof(void); q = (real*) malloc(system->N * sizeof(real)); + //s_sum = Parallel_Vector_Acc(workspace->s, system->n, mpi_data->world); + q = (real*) malloc(system->N * sizeof(real)); + //s_sum = Parallel_Vector_Acc(workspace->s, system->n, mpi_data->world); //t_sum = Parallel_Vector_Acc(workspace->t, system->n, mpi_data->world); my_sum[0] = my_sum[1] = 0; @@ -346,8 +352,7 @@ void Calculate_Charges( reax_system *system, storage *workspace, /* backup s & t */ atom->s[3] = atom->s[2]; atom->s[2] = atom->s[1]; - atom->s[1] = atom->s[0]; - //atom->s[0] = workspace->s[i]; + atom->s[0] = workspace->x[i][0]; atom->t[3] = atom->t[2]; @@ -384,7 +389,7 @@ static void Setup_Preconditioner_QEq( reax_system *system, control_params *contr if( system->my_rank == MASTER_NODE ) { - data->timing.cm_sort_mat_rows += total_sort / control->nprocs; + data->timing.init_qeq += total_sort / control->nprocs; data->timing.cm_solver_pre_comp += total_pc / control->nprocs; } } @@ -427,8 +432,7 @@ void QEq( reax_system *system, control_params *control, simulation_data *data, { if( control->cm_solver_pre_comp_refactor > 0 && ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0)) { - Setup_Preconditioner_QEq( system, control, data, workspace, mpi_data ); - + Compute_Preconditioner_QEq( system, control, data, workspace, mpi_data ); Compute_Preconditioner_QEq( system, control, data, workspace, mpi_data ); } } diff --git a/PuReMD/src/reax_types.h b/PuReMD/src/reax_types.h index c711320303b11c50a1713b446eb1ffaaea189683..644fceae9cca7cdf117b4b6292a77800bd710868 100644 --- a/PuReMD/src/reax_types.h +++ b/PuReMD/src/reax_types.h @@ -792,8 +792,8 @@ typedef struct real nonb; /* atomic charge distribution calculation time */ real cm; - /**/ - real cm_sort_mat_rows; + /* matrix initiation and sort time */ + real init_qeq; /**/ real cm_solver_comm; /**/