From 99c6a251d1eb12931525ce7e6177befbca87dd64 Mon Sep 17 00:00:00 2001 From: Abdullah Alperen <alperena@msu.edu> Date: Wed, 19 Sep 2018 22:16:53 -0400 Subject: [PATCH] PuReMD: cleand code and add timing performance --- PuReMD/src/control.c | 11 + PuReMD/src/forces.c | 11 +- PuReMD/src/init_md.c | 338 +++++++------- PuReMD/src/io_tools.c | 877 ++++++++++++++++++------------------ PuReMD/src/linear_solvers.c | 353 +++++++++------ PuReMD/src/linear_solvers.h | 8 +- PuReMD/src/parallelreax.c | 4 +- PuReMD/src/qEq.c | 63 +-- PuReMD/src/reax_types.h | 53 ++- PuReMD/src/reset_tools.c | 5 +- 10 files changed, 963 insertions(+), 760 deletions(-) diff --git a/PuReMD/src/control.c b/PuReMD/src/control.c index cbdd4147..f0f6ea0c 100644 --- a/PuReMD/src/control.c +++ b/PuReMD/src/control.c @@ -74,6 +74,7 @@ char Read_Control_File( char *control_file, control_params* control, control->tabulate = 0; control->qeq_freq = 1; + control->cm_solver_max_iters = 100; control->q_err = 1e-6; control->sai_thres = 0.1; control->refactor = 100; @@ -246,11 +247,21 @@ char Read_Control_File( char *control_file, control_params* control, ival = atoi( tmp[1] ); control->qeq_freq = ival; } + else if ( strcmp(tmp[0], "cm_solver_max_iters") == 0 ) + { + ival = atoi( tmp[1] ); + control->cm_solver_max_iters = ival; + } else if ( strcmp(tmp[0], "q_err") == 0 ) { val = atof( tmp[1] ); control->q_err = val; } + else if ( strcmp(tmp[0], "cm_solver_pre_comp_type") == 0 ) + { + val = atoi( tmp[1] ); + control->cm_solver_pre_comp_type = val; + } else if ( strcmp(tmp[0], "sai_thres") == 0 ) { val = atof( tmp[1] ); diff --git a/PuReMD/src/forces.c b/PuReMD/src/forces.c index d157eb6f..bbbf110a 100644 --- a/PuReMD/src/forces.c +++ b/PuReMD/src/forces.c @@ -968,7 +968,7 @@ void Compute_Forces( reax_system *system, control_params *control, MPI_Comm comm; int qeq_flag; #if defined(LOG_PERFORMANCE) - real t_start = 0; + real t_start = 0, t_elapsed; //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) @@ -1005,7 +1005,9 @@ void Compute_Forces( reax_system *system, control_params *control, #if defined(LOG_PERFORMANCE) //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.bonded) ); + { + t_start = Get_Time( ); + } #endif #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ step%d: completed bonded\n", @@ -1022,7 +1024,10 @@ void Compute_Forces( reax_system *system, control_params *control, #if defined(LOG_PERFORMANCE) //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.qEq) ); + { + t_elapsed = Get_Timing_Info( t_start ); + data->timing.cm += t_elapsed; + } #endif #if defined(DEBUG_FOCUS) fprintf(stderr, "p%d @ step%d: qeq completed\n", system->my_rank, data->step); diff --git a/PuReMD/src/init_md.c b/PuReMD/src/init_md.c index 0720b0f7..0d9bcc40 100644 --- a/PuReMD/src/init_md.c +++ b/PuReMD/src/init_md.c @@ -54,8 +54,8 @@ #if defined(PURE_REAX) /************************ initialize system ************************/ int Reposition_Atoms( reax_system *system, control_params *control, - simulation_data *data, mpi_datatypes *mpi_data, - char *msg ) + simulation_data *data, mpi_datatypes *mpi_data, + char *msg ) { int i; rvec dx; @@ -130,8 +130,8 @@ void Generate_Initial_Velocities( reax_system *system, real T ) int Init_System( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - mpi_datatypes *mpi_data, char *msg ) + simulation_data *data, storage *workspace, + mpi_datatypes *mpi_data, char *msg ) { int i; reax_atom *atom; @@ -152,7 +152,7 @@ int Init_System( reax_system *system, control_params *control, for ( i = 0; i < MAX_NBRS; ++i ) nrecv[i] = 0; system->max_recved = 0; system->N = SendRecv( system, mpi_data, mpi_data->boundary_atom_type, nrecv, - Estimate_Boundary_Atoms, Unpack_Estimate_Message, 1 ); + Estimate_Boundary_Atoms, Unpack_Estimate_Message, 1 ); system->total_cap = MAX( (int)(system->N * SAFE_ZONE), MIN_CAP ); Bin_Boundary_Atoms( system ); @@ -177,11 +177,11 @@ int Init_System( reax_system *system, control_params *control, //Allocate_System( system, system->local_cap, system->total_cap, msg ); #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: n=%d local_cap=%d\n", - system->my_rank, system->n, system->local_cap ); + system->my_rank, system->n, system->local_cap ); fprintf( stderr, "p%d: N=%d total_cap=%d\n", - system->my_rank, system->N, system->total_cap ); + system->my_rank, system->N, system->total_cap ); fprintf( stderr, "p%d: numH=%d H_cap=%d\n", - system->my_rank, system->numH, system->Hcap ); + system->my_rank, system->numH, system->Hcap ); MPI_Barrier( mpi_data->world ); #endif @@ -198,8 +198,8 @@ int Init_System( reax_system *system, control_params *control, /************************ initialize simulation data ************************/ int Init_Simulation_Data( reax_system *system, control_params *control, - simulation_data *data, mpi_datatypes *mpi_data, - char *msg ) + simulation_data *data, mpi_datatypes *mpi_data, + char *msg ) { Reset_Simulation_Data( data, control->virial ); @@ -212,65 +212,65 @@ int Init_Simulation_Data( reax_system *system, control_params *control, switch ( control->ensemble ) { - case NVE: - data->N_f = 3 * system->bigN; - Evolve = Velocity_Verlet_NVE; - break; - - case bNVT: - data->N_f = 3 * system->bigN + 1; - Evolve = Velocity_Verlet_Berendsen_NVT; - break; - - case nhNVT: - fprintf( stderr, "WARNING: Nose-Hoover NVT is still under testing.\n" ); - //return FAILURE; - data->N_f = 3 * system->bigN + 1; - Evolve = Velocity_Verlet_Nose_Hoover_NVT_Klein; - if ( !control->restart || (control->restart && control->random_vel) ) - { - data->therm.G_xi = control->Tau_T * - (2.0 * data->sys_en.e_kin - data->N_f * K_B * control->T ); - data->therm.v_xi = data->therm.G_xi * control->dt; - data->therm.v_xi_old = 0; - data->therm.xi = 0; - } - break; - - case sNPT: /* Semi-Isotropic NPT */ - data->N_f = 3 * system->bigN + 4; - Evolve = Velocity_Verlet_Berendsen_NPT; - if ( !control->restart ) - Reset_Pressures( data ); - break; - - case iNPT: /* Isotropic NPT */ - data->N_f = 3 * system->bigN + 2; - Evolve = Velocity_Verlet_Berendsen_NPT; - if ( !control->restart ) - Reset_Pressures( data ); - break; - - case NPT: /* Anisotropic NPT */ - strcpy( msg, "init_simulation_data: option not yet implemented" ); - return FAILURE; - - data->N_f = 3 * system->bigN + 9; - Evolve = Velocity_Verlet_Berendsen_NPT; - /*if( !control->restart ) { - data->therm.G_xi = control->Tau_T * - (2.0 * data->my_en.e_Kin - data->N_f * K_B * control->T ); - data->therm.v_xi = data->therm.G_xi * control->dt; - data->iso_bar.eps = 0.33333 * log(system->box.volume); - data->inv_W = 1.0 / - ( data->N_f * K_B * control->T * SQR(control->Tau_P) ); - Compute_Pressure( system, control, data, out_control ); - }*/ - break; - - default: - strcpy( msg, "init_simulation_data: ensemble not recognized" ); - return FAILURE; + case NVE: + data->N_f = 3 * system->bigN; + Evolve = Velocity_Verlet_NVE; + break; + + case bNVT: + data->N_f = 3 * system->bigN + 1; + Evolve = Velocity_Verlet_Berendsen_NVT; + break; + + case nhNVT: + fprintf( stderr, "WARNING: Nose-Hoover NVT is still under testing.\n" ); + //return FAILURE; + data->N_f = 3 * system->bigN + 1; + Evolve = Velocity_Verlet_Nose_Hoover_NVT_Klein; + if ( !control->restart || (control->restart && control->random_vel) ) + { + data->therm.G_xi = control->Tau_T * + (2.0 * data->sys_en.e_kin - data->N_f * K_B * control->T ); + data->therm.v_xi = data->therm.G_xi * control->dt; + data->therm.v_xi_old = 0; + data->therm.xi = 0; + } + break; + + case sNPT: /* Semi-Isotropic NPT */ + data->N_f = 3 * system->bigN + 4; + Evolve = Velocity_Verlet_Berendsen_NPT; + if ( !control->restart ) + Reset_Pressures( data ); + break; + + case iNPT: /* Isotropic NPT */ + data->N_f = 3 * system->bigN + 2; + Evolve = Velocity_Verlet_Berendsen_NPT; + if ( !control->restart ) + Reset_Pressures( data ); + break; + + case NPT: /* Anisotropic NPT */ + strcpy( msg, "init_simulation_data: option not yet implemented" ); + return FAILURE; + + data->N_f = 3 * system->bigN + 9; + Evolve = Velocity_Verlet_Berendsen_NPT; + /*if( !control->restart ) { + data->therm.G_xi = control->Tau_T * + (2.0 * data->my_en.e_Kin - data->N_f * K_B * control->T ); + data->therm.v_xi = data->therm.G_xi * control->dt; + data->iso_bar.eps = 0.33333 * log(system->box.volume); + data->inv_W = 1.0 / + ( data->N_f * K_B * control->T * SQR(control->Tau_P) ); + Compute_Pressure( system, control, data, out_control ); + }*/ + break; + + default: + strcpy( msg, "init_simulation_data: ensemble not recognized" ); + return FAILURE; } /* initialize the timer(s) */ @@ -279,7 +279,24 @@ int Init_Simulation_Data( reax_system *system, control_params *control, { data->timing.start = Get_Time( ); #if defined(LOG_PERFORMANCE) - Reset_Timing( &data->timing ); + //Reset_Timing( &data->timing ); + /* init timing info */ + data->timing.total = data->timing.start; + data->timing.comm = ZERO; + data->timing.nbrs = 0; + data->timing.init_forces = 0; + data->timing.bonded = 0; + data->timing.nonb = 0; + data->timing.cm = ZERO; + data->timing.cm_sort_mat_rows = ZERO; + data->timing.cm_solver_comm = ZERO; + data->timing.cm_solver_pre_comp = ZERO; + data->timing.cm_solver_pre_app = ZERO; + data->timing.cm_solver_iters = 0; + data->timing.cm_solver_spmv = ZERO; + data->timing.cm_solver_vector_ops = ZERO; + data->timing.cm_solver_orthog = ZERO; + data->timing.cm_solver_tri_solve = ZERO; #endif } @@ -314,11 +331,11 @@ int Init_System( reax_system *system, control_params *control, char *msg ) #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: n=%d local_cap=%d\n", - system->my_rank, system->n, system->local_cap ); + system->my_rank, system->n, system->local_cap ); fprintf( stderr, "p%d: N=%d total_cap=%d\n", - system->my_rank, system->N, system->total_cap ); + system->my_rank, system->N, system->total_cap ); fprintf( stderr, "p%d: numH=%d H_cap=%d\n", - system->my_rank, system->numH, system->Hcap ); + system->my_rank, system->numH, system->Hcap ); #endif return SUCCESS; @@ -326,7 +343,7 @@ int Init_System( reax_system *system, control_params *control, char *msg ) int Init_Simulation_Data( reax_system *system, control_params *control, - simulation_data *data, char *msg ) + simulation_data *data, char *msg ) { Reset_Simulation_Data( data, control->virial ); @@ -335,7 +352,24 @@ int Init_Simulation_Data( reax_system *system, control_params *control, { data->timing.start = Get_Time( ); #if defined(LOG_PERFORMANCE) - Reset_Timing( &data->timing ); + //Reset_Timing( &data->timing ); + /* init timing info */ + data->timing.total = data->timing.start; + data->timing.comm = ZERO; + data->timing.nbrs = 0; + data->timing.init_forces = 0; + data->timing.bonded = 0; + data->timing.nonb = 0; + data->timing.cm = ZERO; + data->timing.cm_sort_mat_rows = ZERO; + data->timing.cm_solver_comm = ZERO; + data->timing.cm_solver_pre_comp = ZERO; + data->timing.cm_solver_pre_app = ZERO; + data->timing.cm_solver_iters = 0; + data->timing.cm_solver_spmv = ZERO; + data->timing.cm_solver_vector_ops = ZERO; + data->timing.cm_solver_orthog = ZERO; + data->timing.cm_solver_tri_solve = ZERO; #endif } @@ -385,17 +419,17 @@ void Init_Taper( control_params *control, storage *workspace, MPI_Comm comm ) workspace->Tap[2] = -210.0 * (swa3 * swb2 + swa2 * swb3) / d7; workspace->Tap[1] = 140.0 * swa3 * swb3 / d7; workspace->Tap[0] = (-35.0 * swa3 * swb2 * swb2 + 21.0 * swa2 * swb3 * swb2 + - 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; + 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; } int Init_Workspace( reax_system *system, control_params *control, - storage *workspace, MPI_Comm comm, char *msg ) + storage *workspace, MPI_Comm comm, char *msg ) { int ret; ret = Allocate_Workspace( system, control, workspace, - system->local_cap, system->total_cap, comm, msg ); + system->local_cap, system->total_cap, comm, msg ); if ( ret != SUCCESS ) return ret; @@ -411,7 +445,7 @@ int Init_Workspace( reax_system *system, control_params *control, /************** setup communication data structures **************/ int Init_MPI_Datatypes( reax_system *system, storage *workspace, - mpi_datatypes *mpi_data, MPI_Comm comm, char *msg ) + mpi_datatypes *mpi_data, MPI_Comm comm, char *msg ) { #if defined(PURE_REAX) int i, block[11]; @@ -434,7 +468,7 @@ int Init_MPI_Datatypes( reax_system *system, storage *workspace, mpi_data->in2_buffer = NULL; /* mpi_atom - [orig_id, imprt_id, type, num_bonds, num_hbonds, name, - x, v, f_old, s, t] */ + x, v, f_old, s, t] */ block[0] = block[1] = block[2] = block[3] = block[4] = 1; block[5] = 8; block[6] = block[7] = block[8] = 3; @@ -529,8 +563,8 @@ int Init_MPI_Datatypes( reax_system *system, storage *workspace, /********************** allocate lists *************************/ #if defined(PURE_REAX) int Init_Lists( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, reax_list **lists, - mpi_datatypes *mpi_data, char *msg ) + simulation_data *data, storage *workspace, reax_list **lists, + mpi_datatypes *mpi_data, char *msg ) { int i, num_nbrs; int total_hbonds, total_bonds, bond_cap, num_3body, cap_3body, Htop; @@ -539,7 +573,7 @@ int Init_Lists( reax_system *system, control_params *control, #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: before est_nbrs - local_cap=%d, total_cap=%d\n", - system->my_rank, system->local_cap, system->total_cap ); + system->my_rank, system->local_cap, system->total_cap ); #endif comm = mpi_data->world; @@ -550,7 +584,7 @@ int Init_Lists( reax_system *system, control_params *control, #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: after est_nbrs - local_cap=%d, total_cap=%d\n", - system->my_rank, system->local_cap, system->total_cap ); + system->my_rank, system->local_cap, system->total_cap ); #endif if ( !Make_List( system->total_cap, num_nbrs, TYP_FAR_NEIGHBOR, @@ -561,20 +595,20 @@ int Init_Lists( reax_system *system, control_params *control, } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated far_nbrs: num_far=%d, space=%dMB\n", - system->my_rank, num_nbrs, - (int)(num_nbrs * sizeof(far_neighbor_data) / (1024 * 1024)) ); + system->my_rank, num_nbrs, + (int)(num_nbrs * sizeof(far_neighbor_data) / (1024 * 1024)) ); #endif #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: before gen_nbrs - local_cap=%d, total_cap=%d\n", - system->my_rank, system->local_cap, system->total_cap ); + system->my_rank, system->local_cap, system->total_cap ); #endif Generate_Neighbor_Lists( system, data, workspace, lists ); #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: after gen_nbrs - local_cap=%d, total_cap=%d\n", - system->my_rank, system->local_cap, system->total_cap ); + system->my_rank, system->local_cap, system->total_cap ); #endif bond_top = (int*) calloc( system->total_cap, sizeof(int) ); @@ -582,23 +616,23 @@ int Init_Lists( reax_system *system, control_params *control, //bond_top = (int*) malloc( system->total_cap * sizeof(int) ); //hb_top = (int*) malloc( system->local_cap * sizeof(int) ); Estimate_Storages( system, control, lists, - &Htop, hb_top, bond_top, &num_3body, comm ); + &Htop, hb_top, bond_top, &num_3body, comm ); Allocate_Matrix( &(workspace->H), system->local_cap, Htop, comm ); workspace->L = NULL; workspace->U = NULL; - + //TODO: uncomment for SAI -// Allocate_Matrix2( &(workspace->H_spar_patt), workspace->H->n, system->local_cap, workspace->H->m, comm ); -// Allocate_Matrix( &(workspace->H_spar_patt_full), workspace->H->n, 2 * workspace->H->m - workspace->H->n ); -// Allocate_Matrix2( &(workspace->H_app_inv), workspace->H->n, system->local_cap, workspace->H->m, comm ); + // Allocate_Matrix2( &(workspace->H_spar_patt), workspace->H->n, system->local_cap, workspace->H->m, comm ); + // Allocate_Matrix( &(workspace->H_spar_patt_full), workspace->H->n, 2 * workspace->H->m - workspace->H->n ); + // Allocate_Matrix2( &(workspace->H_app_inv), workspace->H->n, system->local_cap, workspace->H->m, comm ); workspace->H_spar_patt = NULL; workspace->H_app_inv = NULL; #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated H matrix: Htop=%d, space=%dMB\n", - system->my_rank, Htop, - (int)(Htop * sizeof(sparse_matrix_entry) / (1024 * 1024)) ); + system->my_rank, Htop, + (int)(Htop * sizeof(sparse_matrix_entry) / (1024 * 1024)) ); #endif if ( control->hbond_cut > 0 ) @@ -613,15 +647,15 @@ int Init_Lists( reax_system *system, control_params *control, total_hbonds = MAX( total_hbonds * SAFER_ZONE, MIN_CAP * MIN_HBONDS ); if ( !Make_List( system->Hcap, total_hbonds, TYP_HBOND, - lists[HBONDS], comm ) ) + lists[HBONDS], comm ) ) { fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated hbonds: total_hbonds=%d, space=%dMB\n", - system->my_rank, total_hbonds, - (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); + system->my_rank, total_hbonds, + (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); #endif } @@ -637,41 +671,41 @@ int Init_Lists( reax_system *system, control_params *control, bond_cap = MAX( total_bonds * SAFE_ZONE, MIN_CAP * MIN_BONDS ); if ( !Make_List( system->total_cap, bond_cap, TYP_BOND, - lists[BONDS], comm ) ) + lists[BONDS], comm ) ) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated bonds: total_bonds=%d, space=%dMB\n", - system->my_rank, bond_cap, - (int)(bond_cap * sizeof(bond_data) / (1024 * 1024)) ); + system->my_rank, bond_cap, + (int)(bond_cap * sizeof(bond_data) / (1024 * 1024)) ); #endif /* 3bodies list */ cap_3body = MAX( num_3body * SAFE_ZONE, MIN_3BODIES ); if ( !Make_List( bond_cap, cap_3body, TYP_THREE_BODY, - lists[THREE_BODIES], comm ) ) + lists[THREE_BODIES], comm ) ) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated 3-body list: num_3body=%d, space=%dMB\n", - system->my_rank, cap_3body, - (int)(cap_3body * sizeof(three_body_interaction_data) / (1024 * 1024)) ); + system->my_rank, cap_3body, + (int)(cap_3body * sizeof(three_body_interaction_data) / (1024 * 1024)) ); #endif #if defined(TEST_FORCES) if ( !Make_List( system->total_cap, bond_cap * 8, TYP_DDELTA, - lists[DDELTAS], comm ) ) + lists[DDELTAS], comm ) ) { fprintf( stderr, "Problem in initializing dDelta list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } fprintf( stderr, "p%d: allocated dDelta list: num_ddelta=%d space=%ldMB\n", - system->my_rank, bond_cap * 30, - bond_cap * 8 * sizeof(dDelta_data) / (1024 * 1024) ); + system->my_rank, bond_cap * 30, + bond_cap * 8 * sizeof(dDelta_data) / (1024 * 1024) ); if ( !Make_List( bond_cap, bond_cap * 50, TYP_DBO, lists[DBOS], comm ) ) { @@ -679,8 +713,8 @@ int Init_Lists( reax_system *system, control_params *control, MPI_Abort( comm, INSUFFICIENT_MEMORY ); } fprintf( stderr, "p%d: allocated dbond list: num_dbonds=%d space=%ldMB\n", - system->my_rank, bond_cap * MAX_BONDS * 3, - bond_cap * MAX_BONDS * 3 * sizeof(dbond_data) / (1024 * 1024) ); + system->my_rank, bond_cap * MAX_BONDS * 3, + bond_cap * MAX_BONDS * 3 * sizeof(dbond_data) / (1024 * 1024) ); #endif sfree( hb_top, "hb_top" ); @@ -690,8 +724,8 @@ int Init_Lists( reax_system *system, control_params *control, } #elif defined(LAMMPS_REAX) int Init_Lists( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, reax_list **lists, - mpi_datatypes *mpi_data, char *msg ) + simulation_data *data, storage *workspace, reax_list **lists, + mpi_datatypes *mpi_data, char *msg ) { int i, num_nbrs; int total_hbonds, total_bonds, bond_cap, num_3body, cap_3body, Htop; @@ -703,7 +737,7 @@ int Init_Lists( reax_system *system, control_params *control, bond_top = (int*) calloc( system->total_cap, sizeof(int) ); hb_top = (int*) calloc( system->local_cap, sizeof(int) ); Estimate_Storages( system, control, lists, - &Htop, hb_top, bond_top, &num_3body, comm ); + &Htop, hb_top, bond_top, &num_3body, comm ); if ( control->hbond_cut > 0 ) { @@ -717,15 +751,15 @@ int Init_Lists( reax_system *system, control_params *control, total_hbonds = (int)(MAX( total_hbonds * SAFER_ZONE, MIN_CAP * MIN_HBONDS )); if ( !Make_List( system->Hcap, total_hbonds, TYP_HBOND, - lists[HBONDS], comm ) ) + lists[HBONDS], comm ) ) { fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated hbonds: total_hbonds=%d, space=%dMB\n", - system->my_rank, total_hbonds, - (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); + system->my_rank, total_hbonds, + (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); #endif } @@ -741,41 +775,41 @@ int Init_Lists( reax_system *system, control_params *control, bond_cap = (int)(MAX( total_bonds * SAFE_ZONE, MIN_CAP * MIN_BONDS )); if ( !Make_List( system->total_cap, bond_cap, TYP_BOND, - lists[BONDS], comm ) ) + lists[BONDS], comm ) ) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated bonds: total_bonds=%d, space=%dMB\n", - system->my_rank, bond_cap, - (int)(bond_cap * sizeof(bond_data) / (1024 * 1024)) ); + system->my_rank, bond_cap, + (int)(bond_cap * sizeof(bond_data) / (1024 * 1024)) ); #endif /* 3bodies list */ cap_3body = (int)(MAX( num_3body * SAFE_ZONE, MIN_3BODIES )); if ( !Make_List( bond_cap, cap_3body, TYP_THREE_BODY, - lists[THREE_BODIES], comm ) ) + lists[THREE_BODIES], comm ) ) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated 3-body list: num_3body=%d, space=%dMB\n", - system->my_rank, cap_3body, - (int)(cap_3body * sizeof(three_body_interaction_data) / (1024 * 1024)) ); + system->my_rank, cap_3body, + (int)(cap_3body * sizeof(three_body_interaction_data) / (1024 * 1024)) ); #endif #if defined(TEST_FORCES) if ( !Make_List( system->total_cap, bond_cap * 8, TYP_DDELTA, - lists[DDELTAS], comm ) ) + lists[DDELTAS], comm ) ) { fprintf( stderr, "Problem in initializing dDelta list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } fprintf( stderr, "p%d: allocated dDelta list: num_ddelta=%d space=%ldMB\n", - system->my_rank, bond_cap * 30, - bond_cap * 8 * sizeof(dDelta_data) / (1024 * 1024) ); + system->my_rank, bond_cap * 30, + bond_cap * 8 * sizeof(dDelta_data) / (1024 * 1024) ); if ( !Make_List( bond_cap, bond_cap * 50, TYP_DBO, lists[DBOS], comm ) ) { @@ -783,8 +817,8 @@ int Init_Lists( reax_system *system, control_params *control, MPI_Abort( comm, INSUFFICIENT_MEMORY ); } fprintf( stderr, "p%d: allocated dbond list: num_dbonds=%d space=%ldMB\n", - system->my_rank, bond_cap * MAX_BONDS * 3, - bond_cap * MAX_BONDS * 3 * sizeof(dbond_data) / (1024 * 1024) ); + system->my_rank, bond_cap * MAX_BONDS * 3, + bond_cap * MAX_BONDS * 3 * sizeof(dbond_data) / (1024 * 1024) ); #endif sfree( hb_top, "hb_top" ); @@ -798,9 +832,9 @@ int Init_Lists( reax_system *system, control_params *control, #if defined(PURE_REAX) void Initialize( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - mpi_datatypes *mpi_data ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data ) { char msg[MAX_STR]; @@ -808,9 +842,9 @@ void Initialize( reax_system *system, control_params *control, FAILURE ) { fprintf( stderr, "p%d: init_mpi_datatypes: could not create datatypes\n", - system->my_rank ); + system->my_rank ); fprintf( stderr, "p%d: mpi_data couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -821,7 +855,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: system could not be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -832,7 +866,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: sim_data couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -843,9 +877,9 @@ void Initialize( reax_system *system, control_params *control, FAILURE ) { fprintf( stderr, "p%d:init_workspace: not enough memory\n", - system->my_rank ); + system->my_rank ); fprintf( stderr, "p%d:workspace couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -857,7 +891,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: system could not be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -868,7 +902,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: could not open output files! terminating...\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -881,7 +915,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: couldn't create lookup table! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -896,14 +930,14 @@ void Initialize( reax_system *system, control_params *control, /*#ifdef TEST_FORCES Init_Force_Test_Functions(); fprintf(stderr,"p%d: initialized force test functions\n",system->my_rank); - #endif */ +#endif */ } #elif defined(LAMMPS_REAX) void Initialize( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - mpi_datatypes *mpi_data, MPI_Comm comm ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data, MPI_Comm comm ) { char msg[MAX_STR]; @@ -911,9 +945,9 @@ void Initialize( reax_system *system, control_params *control, if ( Init_MPI_Datatypes(system, workspace, mpi_data, comm, msg) == FAILURE ) { fprintf( stderr, "p%d: init_mpi_datatypes: could not create datatypes\n", - system->my_rank ); + system->my_rank ); fprintf( stderr, "p%d: mpi_data couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -924,7 +958,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: system could not be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -935,7 +969,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: sim_data couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -946,9 +980,9 @@ void Initialize( reax_system *system, control_params *control, FAILURE ) { fprintf( stderr, "p%d:init_workspace: not enough memory\n", - system->my_rank ); + system->my_rank ); fprintf( stderr, "p%d:workspace couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -960,7 +994,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: system could not be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -971,7 +1005,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: could not open output files! terminating...\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -984,7 +1018,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: couldn't create lookup table! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -1001,5 +1035,5 @@ void Initialize( reax_system *system, control_params *control, Init_Force_Test_Functions(); fprintf(stderr,"p%d: initialized force test functions\n",system->my_rank); #endif*/ - } +} #endif diff --git a/PuReMD/src/io_tools.c b/PuReMD/src/io_tools.c index 560bb5a1..122b4e4e 100644 --- a/PuReMD/src/io_tools.c +++ b/PuReMD/src/io_tools.c @@ -44,8 +44,8 @@ print_interaction Print_Interactions[NUM_INTRS]; /************************ initialize output controls ************************/ int Init_Output_Files( reax_system *system, control_params *control, - output_controls *out_control, mpi_datatypes *mpi_data, - char *msg ) + output_controls *out_control, mpi_datatypes *mpi_data, + char *msg ) { char temp[MAX_STR]; int ret; @@ -68,12 +68,12 @@ int Init_Output_Files( reax_system *system, control_params *control, { #if !defined(DEBUG) && !defined(DEBUG_FOCUS) fprintf( out_control->out, "%-6s%14s%14s%14s%11s%13s%13s\n", - "step", "total energy", "potential", "kinetic", - "T(K)", "V(A^3)", "P(Gpa)" ); + "step", "total energy", "potential", "kinetic", + "T(K)", "V(A^3)", "P(Gpa)" ); #else fprintf( out_control->out, "%-6s%24s%24s%24s%13s%16s%13s\n", - "step", "total energy", "potential", "kinetic", - "T(K)", "V(A^3)", "P(GPa)" ); + "step", "total energy", "potential", "kinetic", + "T(K)", "V(A^3)", "P(GPa)" ); #endif fflush( out_control->out ); } @@ -89,16 +89,16 @@ int Init_Output_Files( reax_system *system, control_params *control, { #if !defined(DEBUG) && !defined(DEBUG_FOCUS) fprintf( out_control->pot, - "%-6s%14s%14s%14s%14s%14s%14s%14s%14s%14s%14s%14s\n", - "step", "ebond", "eatom", "elp", - "eang", "ecoa", "ehb", "etor", "econj", - "evdw", "ecoul", "epol" ); + "%-6s%14s%14s%14s%14s%14s%14s%14s%14s%14s%14s%14s\n", + "step", "ebond", "eatom", "elp", + "eang", "ecoa", "ehb", "etor", "econj", + "evdw", "ecoul", "epol" ); #else fprintf( out_control->pot, - "%-6s%24s%24s%24s%24s%24s%24s%24s%24s%24s%24s%24s\n", - "step", "ebond", "eatom", "elp", - "eang", "ecoa", "ehb", "etor", "econj", - "evdw", "ecoul", "epol" ); + "%-6s%24s%24s%24s%24s%24s%24s%24s%24s%24s%24s%24s\n", + "step", "ebond", "eatom", "elp", + "eang", "ecoa", "ehb", "etor", "econj", + "evdw", "ecoul", "epol" ); #endif fflush( out_control->pot ); } @@ -113,9 +113,10 @@ int Init_Output_Files( reax_system *system, control_params *control, sprintf( temp, "%s.log", control->sim_name ); if ( (out_control->log = fopen( temp, "w" )) != NULL ) { - fprintf( out_control->log, "%6s%8s%8s%8s%8s%8s%8s%8s%8s\n", - "step", "total", "comm", "nbrs", "init", "bonded", "nonb", - "qeq", "matvecs" ); + fprintf( out_control->log, "%-6s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n", + "step", "total", "comm", "neighbors", "init", "bonded", + "nonbonded", "CM", "CM Sort", "S iters", "S comm", "Pre Comp", "Pre App", + "S spmv", "S vec ops", "S orthog", "S tsolve" ); fflush( out_control->log ); } else @@ -153,7 +154,7 @@ int Init_Output_Files( reax_system *system, control_params *control, if ( (out_control->dpl = fopen( temp, "w" )) != NULL ) { fprintf( out_control->dpl, "%6s%20s%30s", - "step", "molecule count", "avg dipole moment norm" ); + "step", "molecule count", "avg dipole moment norm" ); fflush( out_control->dpl ); } else @@ -170,7 +171,7 @@ int Init_Output_Files( reax_system *system, control_params *control, if ( (out_control->drft = fopen( temp, "w" )) != NULL ) { fprintf( out_control->drft, "%7s%20s%20s\n", - "step", "type count", "avg disp^2" ); + "step", "type count", "avg disp^2" ); fflush( out_control->drft ); } else @@ -188,11 +189,11 @@ int Init_Output_Files( reax_system *system, control_params *control, fashion controlled by their rank */ /*if( control->molecular_analysis ) { if( system->my_rank == MASTER_NODE ) { - sprintf( temp, "%s.mol", control->sim_name ); - if( (out_control->mol = fopen( temp, "w" )) == NULL ) { - strcpy(msg,"init_out_controls: .mol file could not be opened\n"); - return FAILURE; - } + sprintf( temp, "%s.mol", control->sim_name ); + if( (out_control->mol = fopen( temp, "w" )) == NULL ) { + strcpy(msg,"init_out_controls: .mol file could not be opened\n"); + return FAILURE; + } } MPI_Bcast( &(out_control->mol), 1, MPI_LONG, 0, MPI_COMM_WORLD ); @@ -466,7 +467,7 @@ int Init_Output_Files( reax_system *system, control_params *control, /************************ close output files ************************/ int Close_Output_Files( reax_system *system, control_params *control, - output_controls *out_control, mpi_datatypes *mpi_data ) + output_controls *out_control, mpi_datatypes *mpi_data ) { if ( out_control->write_steps > 0 ) End_Traj( system->my_rank, out_control ); @@ -548,11 +549,11 @@ void Print_Box( simulation_box* box, char *name, FILE *out ) fprintf( out, "%s:\n", name ); fprintf( out, "\tmin[%8.3f %8.3f %8.3f]\n", - box->min[0], box->min[1], box->min[2] ); + box->min[0], box->min[1], box->min[2] ); fprintf( out, "\tmax[%8.3f %8.3f %8.3f]\n", - box->max[0], box->max[1], box->max[2] ); + box->max[0], box->max[1], box->max[2] ); fprintf( out, "\tdims[%8.3f%8.3f%8.3f]\n", - box->box_norms[0], box->box_norms[1], box->box_norms[2] ); + box->box_norms[0], box->box_norms[1], box->box_norms[2] ); // fprintf( out, "box: {" ); // for( i = 0; i < 3; ++i ) @@ -598,34 +599,34 @@ void Print_Grid( grid* g, FILE *out ) }; fprintf( out, "\tnumber of grid cells: %d %d %d\n", - g->ncells[0], g->ncells[1], g->ncells[2] ); + g->ncells[0], g->ncells[1], g->ncells[2] ); fprintf( out, "\tgcell lengths: %8.3f %8.3f %8.3f\n", - g->cell_len[0], g->cell_len[1], g->cell_len[2] ); + g->cell_len[0], g->cell_len[1], g->cell_len[2] ); fprintf( out, "\tinverses of gcell lengths: %8.3f %8.3f %8.3f\n", - g->inv_len[0], g->inv_len[1], g->inv_len[2] ); + g->inv_len[0], g->inv_len[1], g->inv_len[2] ); fprintf( out, "\t---------------------------------\n" ); fprintf( out, "\tnumber of native gcells: %d %d %d\n", - g->native_cells[0], g->native_cells[1], g->native_cells[2] ); + g->native_cells[0], g->native_cells[1], g->native_cells[2] ); fprintf( out, "\tnative gcell span: %d-%d %d-%d %d-%d\n", - g->native_str[0], g->native_end[0], - g->native_str[1], g->native_end[1], - g->native_str[2], g->native_end[2] ); + g->native_str[0], g->native_end[0], + g->native_str[1], g->native_end[1], + g->native_str[2], g->native_end[2] ); fprintf( out, "\t---------------------------------\n" ); fprintf( out, "\tvlist gcell stretch: %d %d %d\n", - g->vlist_span[0], g->vlist_span[1], g->vlist_span[2] ); + g->vlist_span[0], g->vlist_span[1], g->vlist_span[2] ); fprintf( out, "\tnonbonded nbrs gcell stretch: %d %d %d\n", - g->nonb_span[0], g->nonb_span[1], g->nonb_span[2] ); + g->nonb_span[0], g->nonb_span[1], g->nonb_span[2] ); fprintf( out, "\tbonded nbrs gcell stretch: %d %d %d\n", - g->bond_span[0], g->bond_span[1], g->bond_span[2] ); + g->bond_span[0], g->bond_span[1], g->bond_span[2] ); fprintf( out, "\t---------------------------------\n" ); fprintf( out, "\tghost gcell span: %d %d %d\n", - g->ghost_span[0], g->ghost_span[1], g->ghost_span[2] ); + g->ghost_span[0], g->ghost_span[1], g->ghost_span[2] ); fprintf( out, "\tnonbonded ghost gcell span: %d %d %d\n", - g->ghost_nonb_span[0], g->ghost_nonb_span[1], g->ghost_nonb_span[2]); + g->ghost_nonb_span[0], g->ghost_nonb_span[1], g->ghost_nonb_span[2]); fprintf(out, "\thbonded ghost gcell span: %d %d %d\n", g->ghost_hbond_span[0], g->ghost_hbond_span[1], g->ghost_hbond_span[2]); fprintf( out, "\tbonded ghost gcell span: %d %d %d\n", - g->ghost_bond_span[0], g->ghost_bond_span[1], g->ghost_bond_span[2]); + g->ghost_bond_span[0], g->ghost_bond_span[1], g->ghost_bond_span[2]); //fprintf(out, "\t---------------------------------\n" ); //fprintf(out, "\tmax number of gcells at the boundary: %d\n", g->gcell_cap); fprintf( out, "\t---------------------------------\n" ); @@ -641,17 +642,17 @@ void Print_Grid( grid* g, FILE *out ) if ( g->cells[x][y][z].type != gc_type ) { fprintf( stderr, - "\tgcells from(%2d %2d %2d) to (%2d %2d %2d): %d - %s\n", - gc_str[0], gc_str[1], gc_str[2], x, y, z, - gc_type, gcell_type_text[gc_type] ); + "\tgcells from(%2d %2d %2d) to (%2d %2d %2d): %d - %s\n", + gc_str[0], gc_str[1], gc_str[2], x, y, z, + gc_type, gcell_type_text[gc_type] ); gc_type = g->cells[x][y][z].type; gc_str[0] = x; gc_str[1] = y; gc_str[2] = z; } fprintf( stderr, "\tgcells from(%2d %2d %2d) to (%2d %2d %2d): %d - %s\n", - gc_str[0], gc_str[1], gc_str[2], x, y, z, - gc_type, gcell_type_text[gc_type] ); + gc_str[0], gc_str[1], gc_str[2], x, y, z, + gc_type, gcell_type_text[gc_type] ); fprintf( out, "-------------------------------------\n" ); } @@ -678,21 +679,21 @@ void Print_GCell_Exchange_Bounds( int my_rank, neighbor_proc *my_nbrs ) nbr_pr = &(my_nbrs[nbr]); fprintf( f, "p%-2d GCELL BOUNDARIES with r(%2d %2d %2d):\n", - my_rank, r[0], r[1], r[2] ); + my_rank, r[0], r[1], r[2] ); fprintf( f, "\tsend_type %s: send(%d %d %d) to (%d %d %d)\n", - exch[nbr_pr->send_type], - nbr_pr->str_send[0], nbr_pr->str_send[1], - nbr_pr->str_send[2], - nbr_pr->end_send[0], nbr_pr->end_send[1], - nbr_pr->end_send[2] ); + exch[nbr_pr->send_type], + nbr_pr->str_send[0], nbr_pr->str_send[1], + nbr_pr->str_send[2], + nbr_pr->end_send[0], nbr_pr->end_send[1], + nbr_pr->end_send[2] ); fprintf( f, "\trecv_type %s: recv(%d %d %d) to (%d %d %d)\n", - exch[nbr_pr->recv_type], - nbr_pr->str_recv[0], nbr_pr->str_recv[1], - nbr_pr->str_recv[2], - nbr_pr->end_recv[0], nbr_pr->end_recv[1], - nbr_pr->end_recv[2] ); + exch[nbr_pr->recv_type], + nbr_pr->str_recv[0], nbr_pr->str_recv[1], + nbr_pr->str_recv[2], + nbr_pr->end_recv[0], nbr_pr->end_recv[1], + nbr_pr->end_recv[2] ); } fclose(f); @@ -724,8 +725,8 @@ void Print_Native_GCells( reax_system *system ) gc = &( g->cells[i][j][k] ); fprintf( f, "p%d gcell(%2d %2d %2d) of type %d(%s)\n", - system->my_rank, i, j, k, - gc->type, gcell_type_text[gc->type] ); + system->my_rank, i, j, k, + gc->type, gcell_type_text[gc->type] ); fprintf( f, "\tatom list start: %d, end: %d\n\t", gc->str, gc->end ); @@ -763,8 +764,8 @@ void Print_All_GCells( reax_system *system ) gc = &( g->cells[i][j][k] ); fprintf( f, "p%d gcell(%2d %2d %2d) of type %d(%s)\n", - system->my_rank, i, j, k, - gc->type, gcell_type_text[gc->type] ); + system->my_rank, i, j, k, + gc->type, gcell_type_text[gc->type] ); fprintf( f, "\tatom list start: %d, end: %d\n\t", gc->str, gc->end ); @@ -796,11 +797,11 @@ void Print_My_Atoms( reax_system *system, control_params *control, int step ) for ( i = 0; i < system->n; ++i ) fprintf( fh, "p%-2d %-5d %2d %24.15e%24.15e%24.15e\n", - system->my_rank, - system->my_atoms[i].orig_id, system->my_atoms[i].type, - system->my_atoms[i].x[0], - system->my_atoms[i].x[1], - system->my_atoms[i].x[2] ); + system->my_rank, + system->my_atoms[i].orig_id, system->my_atoms[i].type, + system->my_atoms[i].x[0], + system->my_atoms[i].x[1], + system->my_atoms[i].x[2] ); fclose( fh ); } @@ -824,18 +825,18 @@ void Print_My_Ext_Atoms( reax_system *system ) for ( i = 0; i < system->N; ++i ) fprintf( fh, "p%-2d %-5d imprt%-5d %2d %24.15e%24.15e%24.15e\n", - system->my_rank, system->my_atoms[i].orig_id, - system->my_atoms[i].imprt_id, system->my_atoms[i].type, - system->my_atoms[i].x[0], - system->my_atoms[i].x[1], - system->my_atoms[i].x[2] ); + system->my_rank, system->my_atoms[i].orig_id, + system->my_atoms[i].imprt_id, system->my_atoms[i].type, + system->my_atoms[i].x[0], + system->my_atoms[i].x[1], + system->my_atoms[i].x[2] ); fclose( fh ); } void Print_Far_Neighbors( reax_system *system, reax_list **lists, - control_params *control ) + control_params *control ) { char fname[100]; int i, j, id_i, id_j, nbr, natoms; @@ -857,16 +858,16 @@ void Print_Far_Neighbors( reax_system *system, reax_list **lists, id_j = system->my_atoms[nbr].orig_id; fprintf( fout, "%6d%6d%24.15e%24.15e%24.15e%24.15e\n", - id_i, id_j, far_nbrs->far_nbr_list[j].d, - far_nbrs->far_nbr_list[j].dvec[0], - far_nbrs->far_nbr_list[j].dvec[1], - far_nbrs->far_nbr_list[j].dvec[2] ); + id_i, id_j, far_nbrs->far_nbr_list[j].d, + far_nbrs->far_nbr_list[j].dvec[0], + far_nbrs->far_nbr_list[j].dvec[1], + far_nbrs->far_nbr_list[j].dvec[2] ); fprintf( fout, "%6d%6d%24.15e%24.15e%24.15e%24.15e\n", - id_j, id_i, far_nbrs->far_nbr_list[j].d, - -far_nbrs->far_nbr_list[j].dvec[0], - -far_nbrs->far_nbr_list[j].dvec[1], - -far_nbrs->far_nbr_list[j].dvec[2] ); + id_j, id_i, far_nbrs->far_nbr_list[j].d, + -far_nbrs->far_nbr_list[j].dvec[0], + -far_nbrs->far_nbr_list[j].dvec[1], + -far_nbrs->far_nbr_list[j].dvec[2] ); } } @@ -881,9 +882,9 @@ void Print_Sparse_Matrix( reax_system *system, sparse_matrix *A ) for ( i = 0; i < A->n; ++i ) for ( j = A->start[i]; j < A->end[i]; ++j ) fprintf( stderr, "%d %d %.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[A->entries[j].j].orig_id, - A->entries[j].val ); + system->my_atoms[i].orig_id, + system->my_atoms[A->entries[j].j].orig_id, + A->entries[j].val ); } @@ -895,9 +896,9 @@ void Print_Sparse_Matrix2( reax_system *system, sparse_matrix *A, char *fname ) for ( i = 0; i < A->n; ++i ) for ( j = A->start[i]; j < A->end[i]; ++j ) fprintf( f, "%d %d %.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[A->entries[j].j].orig_id, - A->entries[j].val ); + system->my_atoms[i].orig_id, + system->my_atoms[A->entries[j].j].orig_id, + A->entries[j].val ); fclose(f); } @@ -916,10 +917,10 @@ void Print_Symmetric_Sparse(reax_system *system, sparse_matrix *A, char *fname) { aj = &(system->my_atoms[A->entries[j].j]); fprintf( f, "%d %d %.15e\n", - ai->renumber, aj->renumber, A->entries[j].val ); + ai->renumber, aj->renumber, A->entries[j].val ); if ( A->entries[j].j < system->n && ai->renumber != aj->renumber ) fprintf( f, "%d %d %.15e\n", - aj->renumber, ai->renumber, A->entries[j].val ); + aj->renumber, ai->renumber, A->entries[j].val ); } } @@ -928,7 +929,7 @@ void Print_Symmetric_Sparse(reax_system *system, sparse_matrix *A, char *fname) void Print_Linear_System( reax_system *system, control_params *control, - storage *workspace, int step ) + storage *workspace, int step ) { int i, j; char fname[100]; @@ -943,9 +944,9 @@ void Print_Linear_System( reax_system *system, control_params *control, { ai = &(system->my_atoms[i]); fprintf( out, "%6d%2d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", - ai->renumber, ai->type, ai->x[0], ai->x[1], ai->x[2], - workspace->s[i], workspace->b_s[i], - workspace->t[i], workspace->b_t[i] ); + ai->renumber, ai->type, ai->x[0], ai->x[1], ai->x[2], + workspace->s[i], workspace->b_s[i], + workspace->t[i], workspace->b_t[i] ); } fclose( out ); @@ -955,21 +956,21 @@ void Print_Linear_System( reax_system *system, control_params *control, // print the incomplete H matrix /*sprintf( fname, "%s.p%dHinc%d", control->sim_name, system->my_rank, step ); - out = fopen( fname, "w" ); - H = workspace->H; - for( i = 0; i < H->n; ++i ) { + out = fopen( fname, "w" ); + H = workspace->H; + for( i = 0; i < H->n; ++i ) { ai = &(system->my_atoms[i]); for( j = H->start[i]; j < H->end[i]; ++j ) - if( H->entries[j].j < system->n ) { - aj = &(system->my_atoms[H->entries[j].j]); - fprintf( out, "%d %d %.15e\n", - ai->orig_id, aj->orig_id, H->entries[j].val ); - if( ai->orig_id != aj->orig_id ) + if( H->entries[j].j < system->n ) { + aj = &(system->my_atoms[H->entries[j].j]); fprintf( out, "%d %d %.15e\n", - aj->orig_id, ai->orig_id, H->entries[j].val ); - } - } - fclose( out );*/ + ai->orig_id, aj->orig_id, H->entries[j].val ); + if( ai->orig_id != aj->orig_id ) + fprintf( out, "%d %d %.15e\n", + aj->orig_id, ai->orig_id, H->entries[j].val ); + } + } + fclose( out );*/ // print the L from incomplete cholesky decomposition /*sprintf( fname, "%s.p%dL%d", control->sim_name, system->my_rank, step ); @@ -988,7 +989,7 @@ void Print_LinSys_Soln( reax_system *system, real *x, real *b_prm, real *b ) for ( i = 0; i < system->n; ++i ) fprintf( fout, "%6d%10.4f%10.4f%10.4f\n", - system->my_atoms[i].orig_id, x[i], b_prm[i], b[i] ); + system->my_atoms[i].orig_id, x[i], b_prm[i], b[i] ); fclose( fout ); } @@ -1005,10 +1006,10 @@ void Print_Charges( reax_system *system ) for ( i = 0; i < system->n; ++i ) fprintf( fout, "%6d %10.7f %10.7f %10.7f\n", - system->my_atoms[i].orig_id, - system->my_atoms[i].s[0], - system->my_atoms[i].t[0], - system->my_atoms[i].q ); + system->my_atoms[i].orig_id, + system->my_atoms[i].s[0], + system->my_atoms[i].t[0], + system->my_atoms[i].q ); fclose( fout ); } @@ -1034,15 +1035,15 @@ void Print_HBonds( reax_system *system, reax_list **lists, fprintf( fout, "%8d%8d %24.15e %24.15e %24.15e\n", i, phbond->nbr, phbond->ptr->dvec[0], phbond->ptr->dvec[1], phbond->ptr->dvec[2] ); -// fprintf( fout, "%8d%8d %8d %8d\n", i, phbond->nbr, -// phbond->scl, phbond->sym_index ); + // fprintf( fout, "%8d%8d %8d %8d\n", i, phbond->nbr, + // phbond->scl, phbond->sym_index ); } } fclose( fout ); } - + void Print_HBond_Indices( reax_system *system, reax_list **lists, control_params *control, int step ) { @@ -1083,9 +1084,9 @@ void Print_Bonds( reax_system *system, reax_list **lists, { pbond = &bonds->bond_list[pj]; bo_ij = &pbond->bo_data; -// fprintf( fout, "%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", -// system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, -// pbond->d, bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); + // fprintf( fout, "%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", + // system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, + // pbond->d, bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); fprintf( fout, "%8d%8d %24.15f %24.15f\n", i, pbond->nbr, //system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, pbond->d, bo_ij->BO ); @@ -1130,7 +1131,7 @@ void Print_Bond_List2( reax_system *system, reax_list *bonds, char *fname ) void Print_Total_Force( reax_system *system, simulation_data *data, - storage *workspace ) + storage *workspace ) { int i; @@ -1139,9 +1140,9 @@ void Print_Total_Force( reax_system *system, simulation_data *data, for ( i = 0; i < system->N; ++i ) fprintf( stderr, "%6d %f %f %f\n", - //"%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - workspace->f[i][0], workspace->f[i][1], workspace->f[i][2] ); + //"%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + workspace->f[i][0], workspace->f[i][1], workspace->f[i][2] ); } @@ -1206,15 +1207,15 @@ void Print_Far_Neighbors_List_Adj_Format( reax_system *system, } void Output_Results( reax_system *system, control_params *control, - simulation_data *data, reax_list **lists, - output_controls *out_control, mpi_datatypes *mpi_data ) + simulation_data *data, reax_list **lists, + output_controls *out_control, mpi_datatypes *mpi_data ) { #if defined(LOG_PERFORMANCE) real t_elapsed, denom; #endif if ((out_control->energy_update_freq > 0 && - data->step % out_control->energy_update_freq == 0) || + data->step % out_control->energy_update_freq == 0) || (out_control->write_steps > 0 && data->step % out_control->write_steps == 0)) { @@ -1228,36 +1229,36 @@ void Output_Results( reax_system *system, control_params *control, { #if !defined(DEBUG) && !defined(DEBUG_FOCUS) fprintf( out_control->out, - "%-6d%14.2f%14.2f%14.2f%11.2f%13.2f%13.5f\n", - data->step, data->sys_en.e_tot, data->sys_en.e_pot, - E_CONV * data->sys_en.e_kin, data->therm.T, - system->big_box.V, data->iso_bar.P ); + "%-6d%14.2f%14.2f%14.2f%11.2f%13.2f%13.5f\n", + data->step, data->sys_en.e_tot, data->sys_en.e_pot, + E_CONV * data->sys_en.e_kin, data->therm.T, + system->big_box.V, data->iso_bar.P ); fprintf( out_control->pot, - "%-6d%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f\n", - data->step, - data->sys_en.e_bond, - data->sys_en.e_ov + data->sys_en.e_un, data->sys_en.e_lp, - data->sys_en.e_ang + data->sys_en.e_pen, data->sys_en.e_coa, - data->sys_en.e_hb, - data->sys_en.e_tor, data->sys_en.e_con, - data->sys_en.e_vdW, data->sys_en.e_ele, data->sys_en.e_pol); + "%-6d%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f\n", + data->step, + data->sys_en.e_bond, + data->sys_en.e_ov + data->sys_en.e_un, data->sys_en.e_lp, + data->sys_en.e_ang + data->sys_en.e_pen, data->sys_en.e_coa, + data->sys_en.e_hb, + data->sys_en.e_tor, data->sys_en.e_con, + data->sys_en.e_vdW, data->sys_en.e_ele, data->sys_en.e_pol); #else fprintf( out_control->out, - "%-6d%24.15e%24.15e%24.15e%13.5f%16.5f%13.5f\n", - data->step, data->sys_en.e_tot, data->sys_en.e_pot, - E_CONV * data->sys_en.e_kin, data->therm.T, - system->big_box.V, data->iso_bar.P ); + "%-6d%24.15e%24.15e%24.15e%13.5f%16.5f%13.5f\n", + data->step, data->sys_en.e_tot, data->sys_en.e_pot, + E_CONV * data->sys_en.e_kin, data->therm.T, + system->big_box.V, data->iso_bar.P ); fprintf( out_control->pot, - "%-6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", - data->step, - data->sys_en.e_bond, - data->sys_en.e_ov + data->sys_en.e_un, data->sys_en.e_lp, - data->sys_en.e_ang + data->sys_en.e_pen, data->sys_en.e_coa, - data->sys_en.e_hb, - data->sys_en.e_tor, data->sys_en.e_con, - data->sys_en.e_vdW, data->sys_en.e_ele, data->sys_en.e_pol); + "%-6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", + data->step, + data->sys_en.e_bond, + data->sys_en.e_ov + data->sys_en.e_un, data->sys_en.e_lp, + data->sys_en.e_ang + data->sys_en.e_pen, data->sys_en.e_coa, + data->sys_en.e_hb, + data->sys_en.e_tor, data->sys_en.e_con, + data->sys_en.e_vdW, data->sys_en.e_ele, data->sys_en.e_pol); #endif //DEBUG #if defined(LOG_PERFORMANCE) @@ -1266,36 +1267,60 @@ void Output_Results( reax_system *system, control_params *control, denom = 1.0 / out_control->energy_update_freq; else denom = 1; - fprintf( out_control->log, "%6d%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%6d\n", - data->step, - t_elapsed * denom, - data->timing.comm * denom, - data->timing.nbrs * denom, - data->timing.init_forces * denom, - data->timing.bonded * denom, - data->timing.nonb * denom, - data->timing.qEq * denom, - (int)((data->timing.s_matvecs + data->timing.t_matvecs)*denom) ); - - Reset_Timing( &(data->timing) ); + fprintf( out_control->log, "%6d %10.2f %10.2f %10.2f %10.2f %10.2f %10.2f %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f\n", + data->step, + t_elapsed * denom, + data->timing.comm * denom, + data->timing.nbrs * denom, + data->timing.init_forces * denom, + data->timing.bonded * denom, + data->timing.nonb * denom, + data->timing.cm * denom, + data->timing.cm_sort_mat_rows * denom, + (double)( data->timing.cm_solver_iters * denom), + data->timing.cm_solver_comm * denom, + data->timing.cm_solver_pre_comp * denom, + data->timing.cm_solver_pre_app * denom, + data->timing.cm_solver_spmv * denom, + data->timing.cm_solver_vector_ops * denom, + data->timing.cm_solver_orthog * denom, + data->timing.cm_solver_tri_solve * denom ); + + //Reset_Timing( &(data->timing) ); + data->timing.total = Get_Time( ); + data->timing.comm = ZERO; + data->timing.nbrs = 0; + data->timing.init_forces = 0; + data->timing.bonded = 0; + data->timing.nonb = 0; + data->timing.cm = ZERO; + data->timing.cm_sort_mat_rows = ZERO; + data->timing.cm_solver_comm = ZERO; + data->timing.cm_solver_pre_comp = ZERO; + data->timing.cm_solver_pre_app = ZERO; + data->timing.cm_solver_iters = 0; + data->timing.cm_solver_spmv = ZERO; + data->timing.cm_solver_vector_ops = ZERO; + data->timing.cm_solver_orthog = ZERO; + data->timing.cm_solver_tri_solve = ZERO; fflush( out_control->log ); #endif //LOG_PERFORMANCE if ( control->virial ) { fprintf( out_control->prs, - "%8d%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f\n", - data->step, - data->int_press[0], data->int_press[1], data->int_press[2], - data->ext_press[0], data->ext_press[1], data->ext_press[2], - data->kin_press ); + "%8d%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f\n", + data->step, + data->int_press[0], data->int_press[1], data->int_press[2], + data->ext_press[0], data->ext_press[1], data->ext_press[2], + data->kin_press ); fprintf( out_control->prs, - "%8s%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f\n", - "", system->big_box.box_norms[0], system->big_box.box_norms[1], - system->big_box.box_norms[2], - data->tot_press[0], data->tot_press[1], data->tot_press[2], - system->big_box.V ); + "%8s%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f\n", + "", system->big_box.box_norms[0], system->big_box.box_norms[1], + system->big_box.box_norms[2], + data->tot_press[0], data->tot_press[1], data->tot_press[2], + system->big_box.V ); fflush( out_control->prs); } @@ -1322,39 +1347,39 @@ void Output_Results( reax_system *system, control_params *control, void Debug_Marker_Bonded( output_controls *out_control, int step ) { fprintf( out_control->ebond, "step: %d\n%6s%6s%12s%12s%12s\n", - step, "atom1", "atom2", "bo", "ebond", "total" ); + step, "atom1", "atom2", "bo", "ebond", "total" ); fprintf( out_control->elp, "step: %d\n%6s%12s%12s%12s\n", - step, "atom", "nlp", "elp", "total" ); + step, "atom", "nlp", "elp", "total" ); fprintf( out_control->eov, "step: %d\n%6s%12s%12s\n", - step, "atom", "eov", "total" ); + step, "atom", "eov", "total" ); fprintf( out_control->eun, "step: %d\n%6s%12s%12s\n", - step, "atom", "eun", "total" ); + step, "atom", "eun", "total" ); fprintf( out_control->eval, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "angle", "theta0", - "bo(12)", "bo(23)", "eval", "total" ); + step, "atom1", "atom2", "atom3", "angle", "theta0", + "bo(12)", "bo(23)", "eval", "total" ); fprintf( out_control->epen, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "angle", "bo(12)", "bo(23)", - "epen", "total" ); + step, "atom1", "atom2", "atom3", "angle", "bo(12)", "bo(23)", + "epen", "total" ); fprintf( out_control->ecoa, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "angle", "bo(12)", "bo(23)", - "ecoa", "total" ); + step, "atom1", "atom2", "atom3", "angle", "bo(12)", "bo(23)", + "ecoa", "total" ); fprintf( out_control->ehb, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "r(23)", "angle", "bo(12)", - "ehb", "total" ); + step, "atom1", "atom2", "atom3", "r(23)", "angle", "bo(12)", + "ehb", "total" ); fprintf( out_control->etor, "step: %d\n%6s%6s%6s%6s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "atom4", "phi", "bo(23)", - "etor", "total" ); + step, "atom1", "atom2", "atom3", "atom4", "phi", "bo(23)", + "etor", "total" ); fprintf( out_control->econ, "step:%d\n%6s%6s%6s%6s%12s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "atom4", - "phi", "bo(12)", "bo(23)", "bo(34)", "econ", "total" ); + step, "atom1", "atom2", "atom3", "atom4", + "phi", "bo(12)", "bo(23)", "bo(34)", "econ", "total" ); } void Debug_Marker_Nonbonded( output_controls *out_control, int step ) { fprintf( out_control->evdw, "step: %d\n%6s%6s%12s%12s%12s\n", - step, "atom1", "atom2", "r12", "evdw", "total" ); + step, "atom1", "atom2", "r12", "evdw", "total" ); fprintf( out_control->ecou, "step: %d\n%6s%6s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "r12", "q1", "q2", "ecou", "total" ); + step, "atom1", "atom2", "r12", "q1", "q2", "ecou", "total" ); } #endif @@ -1362,16 +1387,16 @@ void Debug_Marker_Nonbonded( output_controls *out_control, int step ) #ifdef TEST_FORCES void Dummy_Printer( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { } void Print_Bond_Orders( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { int i, pj, pk; bond_order_data *bo_ij; @@ -1382,52 +1407,52 @@ void Print_Bond_Orders( reax_system *system, control_params *control, /* bond orders */ fprintf( out_control->fbo, "step: %d\n", data->step ); fprintf( out_control->fbo, "%6s%6s%12s%12s%12s%12s%12s\n", - "atom1", "atom2", "r_ij", "total_bo", "bo_s", "bo_p", "bo_pp" ); + "atom1", "atom2", "r_ij", "total_bo", "bo_s", "bo_p", "bo_pp" ); for ( i = 0; i < system->N; ++i ) for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { bo_ij = &(bonds->bond_list[pj].bo_data); fprintf( out_control->fbo, - "%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - bonds->bond_list[pj].d, - bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); + "%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + bonds->bond_list[pj].d, + bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); } /* derivatives of bond orders */ fprintf( out_control->fdbo, "step: %d\n", data->step ); fprintf( out_control->fdbo, "%6s%6s%6s%24s%24s%24s\n", - "atom1", "atom2", "atom2", "dBO", "dBOpi", "dBOpi2" ); + "atom1", "atom2", "atom2", "dBO", "dBOpi", "dBOpi2" ); for ( i = 0; i < system->N; ++i ) for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { /* fprintf( out_control->fdbo, "%6d %6d\tstart: %6d\tend: %6d\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - Start_Index( pj, dBOs ), End_Index( pj, dBOs ) ); */ + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + Start_Index( pj, dBOs ), End_Index( pj, dBOs ) ); */ for ( pk = Start_Index(pj, dBOs); pk < End_Index(pj, dBOs); ++pk ) { dbo_k = &(dBOs->dbo_list[pk]); fprintf( out_control->fdbo, "%6d%6d%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - system->my_atoms[dbo_k->wrt].orig_id, - dbo_k->dBO[0], dbo_k->dBO[1], dbo_k->dBO[2] ); + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + system->my_atoms[dbo_k->wrt].orig_id, + dbo_k->dBO[0], dbo_k->dBO[1], dbo_k->dBO[2] ); fprintf( out_control->fdbo, "%6d%6d%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - system->my_atoms[dbo_k->wrt].orig_id, - dbo_k->dBOpi[0], dbo_k->dBOpi[1], dbo_k->dBOpi[2] ); + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + system->my_atoms[dbo_k->wrt].orig_id, + dbo_k->dBOpi[0], dbo_k->dBOpi[1], dbo_k->dBOpi[2] ); fprintf( out_control->fdbo, "%6d%6d%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - system->my_atoms[dbo_k->wrt].orig_id, - dbo_k->dBOpi2[0], dbo_k->dBOpi2[1], dbo_k->dBOpi2[2] ); + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + system->my_atoms[dbo_k->wrt].orig_id, + dbo_k->dBOpi2[0], dbo_k->dBOpi2[1], dbo_k->dBOpi2[2] ); } } } @@ -1442,15 +1467,15 @@ void Print_Forces( FILE *f, storage *workspace, int N, int step ) //fprintf( f, "%6d %23.15e %23.15e %23.15e\n", //fprintf( f, "%6d%12.6f%12.6f%12.6f\n", fprintf( f, "%6d %19.9e %19.9e %19.9e\n", - workspace->id_all[i], workspace->f_all[i][0], - workspace->f_all[i][1], workspace->f_all[i][2] ); + workspace->id_all[i], workspace->f_all[i][0], + workspace->f_all[i][1], workspace->f_all[i][2] ); } void Print_Force_Files( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - mpi_datatypes *mpi_data ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data ) { int i, d; @@ -1514,11 +1539,11 @@ void Print_Force_Files( reax_system *system, control_params *control, { for ( d = 0; d < 3; ++d ) workspace->f_tot[i][d] = workspace->f_be[i][d] + - workspace->f_lp[i][d] + workspace->f_ov[i][d] + workspace->f_un[i][d] + - workspace->f_ang[i][d] + workspace->f_pen[i][d] + workspace->f_coa[i][d] + - workspace->f_tor[i][d] + workspace->f_con[i][d] + - workspace->f_vdw[i][d] + workspace->f_ele[i][d] + - workspace->f_hb[i][d]; + workspace->f_lp[i][d] + workspace->f_ov[i][d] + workspace->f_un[i][d] + + workspace->f_ang[i][d] + workspace->f_pen[i][d] + workspace->f_coa[i][d] + + workspace->f_tor[i][d] + workspace->f_con[i][d] + + workspace->f_vdw[i][d] + workspace->f_ele[i][d] + + workspace->f_hb[i][d]; } Coll_rvecs_at_Master( system, workspace, mpi_data, workspace->f_tot ); @@ -1531,8 +1556,8 @@ void Print_Force_Files( reax_system *system, control_params *control, #if defined(TEST_FORCES) || defined(TEST_ENERGY) void Print_Far_Neighbors_List( reax_system *system, reax_list **lists, - control_params *control, simulation_data *data, - output_controls *out_control ) + control_params *control, simulation_data *data, + output_controls *out_control ) { int i, j, id_i, id_j, nbr, natoms; int num = 0; @@ -1566,8 +1591,8 @@ void Print_Far_Neighbors_List( reax_system *system, reax_list **lists, } void Print_Bond_List( reax_system *system, control_params *control, - simulation_data *data, reax_list **lists, - output_controls *out_control) + simulation_data *data, reax_list **lists, + output_controls *out_control) { int i, j, id_i, id_j, nbr, pj; reax_list *bonds = lists[BONDS]; @@ -1609,283 +1634,283 @@ void Print_Init_Atoms( reax_system *system, storage *workspace ) int i; fprintf( stderr, "p%d had %d atoms\n", - system->my_rank, workspace->init_cnt ); + system->my_rank, workspace->init_cnt ); for ( i = 0; i < workspace->init_cnt; ++i ) fprintf( stderr, "p%d, atom%d: %d %s %8.3f %8.3f %8.3f\n", - system->my_rank, i, - workspace->init_atoms[i].type, workspace->init_atoms[i].name, - workspace->init_atoms[i].x[0], - workspace->init_atoms[i].x[1], - workspace->init_atoms[i].x[2] ); + system->my_rank, i, + workspace->init_atoms[i].type, workspace->init_atoms[i].name, + workspace->init_atoms[i].x[0], + workspace->init_atoms[i].x[1], + workspace->init_atoms[i].x[2] ); } #endif //OLD_VERSION /*void Print_Bond_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) -{ + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) + { int i; fprintf( out_control->fbond, "step: %d\n", data->step ); fprintf( out_control->fbond, "%6s%24s%24s%24s\n", - "atom", "f_be[0]", "f_be[1]", "f_be[2]" ); + "atom", "f_be[0]", "f_be[1]", "f_be[2]" ); for( i = 0; i < system->bigN; ++i ) - fprintf(out_control->fbond, "%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - workspace->f_all[i][0], workspace->f_all[i][1], - workspace->f_all[i][2]); -} + fprintf(out_control->fbond, "%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + workspace->f_all[i][0], workspace->f_all[i][1], + workspace->f_all[i][2]); + } -void Print_LonePair_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) -{ + void Print_LonePair_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) + { int i; fprintf( out_control->flp, "step: %d\n", data->step ); fprintf( out_control->flp, "%6s%24s\n", "atom", "f_lonepair" ); for( i = 0; i < system->bigN; ++i ) - fprintf(out_control->flp, "%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - workspace->f_all[i][0], workspace->f_all[i][1], - workspace->f_all[i][2]); -} + fprintf(out_control->flp, "%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + workspace->f_all[i][0], workspace->f_all[i][1], + workspace->f_all[i][2]); + } -void Print_OverCoor_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) -{ + void Print_OverCoor_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) + { int i; fprintf( out_control->fov, "step: %d\n", data->step ); fprintf( out_control->fov, "%6s%-38s%-38s%-38s\n", - "atom","f_over[0]", "f_over[1]", "f_over[2]" ); + "atom","f_over[0]", "f_over[1]", "f_over[2]" ); for( i = 0; i < system->bigN; ++i ) - fprintf( out_control->fov, - "%6d %24.15e%24.15e%24.15e 0 0 0\n", - system->my_atoms[i].orig_id, - workspace->f_all[i][0], workspace->f_all[i][1], - workspace->f_all[i][2] ); -} + fprintf( out_control->fov, + "%6d %24.15e%24.15e%24.15e 0 0 0\n", + system->my_atoms[i].orig_id, + workspace->f_all[i][0], workspace->f_all[i][1], + workspace->f_all[i][2] ); + } -void Print_UnderCoor_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) -{ + void Print_UnderCoor_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) + { int i; fprintf( out_control->fun, "step: %d\n", data->step ); fprintf( out_control->fun, "%6s%-38s%-38s%-38s\n", - "atom","f_under[0]", "f_under[1]", "f_under[2]" ); + "atom","f_under[0]", "f_under[1]", "f_under[2]" ); for( i = 0; i < system->bigN; ++i ) - fprintf( out_control->fun, - "%6d %24.15e%24.15e%24.15e 0 0 0\n", - system->my_atoms[i].orig_id, - workspace->f_all[i][0], workspace->f_all[i][1], - workspace->f_all[i][2] ); -} + fprintf( out_control->fun, + "%6d %24.15e%24.15e%24.15e 0 0 0\n", + system->my_atoms[i].orig_id, + workspace->f_all[i][0], workspace->f_all[i][1], + workspace->f_all[i][2] ); + } void Print_ValAngle_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int j; - - fprintf( out_control->f3body, "step: %d\n", data->step ); - fprintf( out_control->f3body, "%6s%-37s%-37s%-37s%-38s\n", - "atom", "3-body total", "f_ang", "f_pen", "f_coa" ); - - for( j = 0; j < system->N; ++j ){ - if( rvec_isZero(workspace->f_pen[j]) && rvec_isZero(workspace->f_coa[j]) ) - fprintf( out_control->f3body, - "%6d %24.15e%24.15e%24.15e 0 0 0 0 0 0\n", - system->my_atoms[j].orig_id, - workspace->f_ang[j][0], workspace->f_ang[j][1], - workspace->f_ang[j][2] ); - else if( rvec_isZero(workspace->f_coa[j]) ) - fprintf( out_control->f3body, - "%6d %24.15e%24.15e%24.15e %24.15e%24.15e%24.15e " \ - "%24.15e%24.15e%24.15e\n", - system->my_atoms[j].orig_id, - workspace->f_ang[j][0] + workspace->f_pen[j][0], - workspace->f_ang[j][1] + workspace->f_pen[j][1], - workspace->f_ang[j][2] + workspace->f_pen[j][2], - workspace->f_ang[j][0], workspace->f_ang[j][1], - workspace->f_ang[j][2], - workspace->f_pen[j][0], workspace->f_pen[j][1], - workspace->f_pen[j][2] ); - else{ - fprintf( out_control->f3body, "%6d %24.15e%24.15e%24.15e ", - system->my_atoms[j].orig_id, - workspace->f_ang[j][0] + workspace->f_pen[j][0] + - workspace->f_coa[j][0], - workspace->f_ang[j][1] + workspace->f_pen[j][1] + - workspace->f_coa[j][1], - workspace->f_ang[j][2] + workspace->f_pen[j][2] + - workspace->f_coa[j][2] ); - - fprintf( out_control->f3body, - "%24.15e%24.15e%24.15e %24.15e%24.15e%24.15e "\ - "%24.15e%24.15e%24.15e\n", - workspace->f_ang[j][0], workspace->f_ang[j][1], - workspace->f_ang[j][2], - workspace->f_pen[j][0], workspace->f_pen[j][1], - workspace->f_pen[j][2], - workspace->f_coa[j][0], workspace->f_coa[j][1], - workspace->f_coa[j][2] ); + int j; + + fprintf( out_control->f3body, "step: %d\n", data->step ); + fprintf( out_control->f3body, "%6s%-37s%-37s%-37s%-38s\n", + "atom", "3-body total", "f_ang", "f_pen", "f_coa" ); + + for( j = 0; j < system->N; ++j ){ + if( rvec_isZero(workspace->f_pen[j]) && rvec_isZero(workspace->f_coa[j]) ) + fprintf( out_control->f3body, + "%6d %24.15e%24.15e%24.15e 0 0 0 0 0 0\n", + system->my_atoms[j].orig_id, + workspace->f_ang[j][0], workspace->f_ang[j][1], + workspace->f_ang[j][2] ); + else if( rvec_isZero(workspace->f_coa[j]) ) + fprintf( out_control->f3body, + "%6d %24.15e%24.15e%24.15e %24.15e%24.15e%24.15e " \ + "%24.15e%24.15e%24.15e\n", + system->my_atoms[j].orig_id, + workspace->f_ang[j][0] + workspace->f_pen[j][0], + workspace->f_ang[j][1] + workspace->f_pen[j][1], + workspace->f_ang[j][2] + workspace->f_pen[j][2], + workspace->f_ang[j][0], workspace->f_ang[j][1], + workspace->f_ang[j][2], + workspace->f_pen[j][0], workspace->f_pen[j][1], + workspace->f_pen[j][2] ); + else{ + fprintf( out_control->f3body, "%6d %24.15e%24.15e%24.15e ", + system->my_atoms[j].orig_id, + workspace->f_ang[j][0] + workspace->f_pen[j][0] + + workspace->f_coa[j][0], + workspace->f_ang[j][1] + workspace->f_pen[j][1] + + workspace->f_coa[j][1], + workspace->f_ang[j][2] + workspace->f_pen[j][2] + + workspace->f_coa[j][2] ); + + fprintf( out_control->f3body, + "%24.15e%24.15e%24.15e %24.15e%24.15e%24.15e "\ + "%24.15e%24.15e%24.15e\n", + workspace->f_ang[j][0], workspace->f_ang[j][1], + workspace->f_ang[j][2], + workspace->f_pen[j][0], workspace->f_pen[j][1], + workspace->f_pen[j][2], + workspace->f_coa[j][0], workspace->f_coa[j][1], + workspace->f_coa[j][2] ); + } } - } } void Print_Hydrogen_Bond_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control) { - int j; + int j; - fprintf( out_control->fhb, "step: %d\n", data->step ); - fprintf( out_control->fhb, "%6s\t%-38s\n", "atom", "f_hb[0,1,2]" ); + fprintf( out_control->fhb, "step: %d\n", data->step ); + fprintf( out_control->fhb, "%6s\t%-38s\n", "atom", "f_hb[0,1,2]" ); - for( j = 0; j < system->N; ++j ) - fprintf(out_control->fhb, "%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[j].orig_id, - workspace->f_hb[j][0], - workspace->f_hb[j][1], - workspace->f_hb[j][2] ); + for( j = 0; j < system->N; ++j ) + fprintf(out_control->fhb, "%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[j].orig_id, + workspace->f_hb[j][0], + workspace->f_hb[j][1], + workspace->f_hb[j][2] ); } void Print_Four_Body_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int j; - - fprintf( out_control->f4body, "step: %d\n", data->step ); - fprintf( out_control->f4body, "%6s\t%-38s%-38s%-38s\n", - "atom", "4-body total", "f_tor", "f_con" ); - - for( j = 0; j < system->N; ++j ){ - if( !rvec_isZero( workspace->f_con[j] ) ) - fprintf( out_control->f4body, - "%6d %24.15e%24.15e%24.15e %24.15e%24.15e%24.15e "\ - "%24.15e%24.15e%24.15e\n", - system->my_atoms[j].orig_id, - workspace->f_tor[j][0] + workspace->f_con[j][0], - workspace->f_tor[j][1] + workspace->f_con[j][1], - workspace->f_tor[j][2] + workspace->f_con[j][2], - workspace->f_tor[j][0], workspace->f_tor[j][1], - workspace->f_tor[j][2], - workspace->f_con[j][0], workspace->f_con[j][1], - workspace->f_con[j][2] ); - else - fprintf( out_control->f4body, - "%6d %24.15e%24.15e%24.15e 0 0 0\n", - system->my_atoms[j].orig_id, workspace->f_tor[j][0], - workspace->f_tor[j][1], workspace->f_tor[j][2] ); - } + int j; + + fprintf( out_control->f4body, "step: %d\n", data->step ); + fprintf( out_control->f4body, "%6s\t%-38s%-38s%-38s\n", + "atom", "4-body total", "f_tor", "f_con" ); + + for( j = 0; j < system->N; ++j ){ + if( !rvec_isZero( workspace->f_con[j] ) ) + fprintf( out_control->f4body, + "%6d %24.15e%24.15e%24.15e %24.15e%24.15e%24.15e "\ + "%24.15e%24.15e%24.15e\n", + system->my_atoms[j].orig_id, + workspace->f_tor[j][0] + workspace->f_con[j][0], + workspace->f_tor[j][1] + workspace->f_con[j][1], + workspace->f_tor[j][2] + workspace->f_con[j][2], + workspace->f_tor[j][0], workspace->f_tor[j][1], + workspace->f_tor[j][2], + workspace->f_con[j][0], workspace->f_con[j][1], + workspace->f_con[j][2] ); + else + fprintf( out_control->f4body, + "%6d %24.15e%24.15e%24.15e 0 0 0\n", + system->my_atoms[j].orig_id, workspace->f_tor[j][0], + workspace->f_tor[j][1], workspace->f_tor[j][2] ); + } } void Print_vdW_Coulomb_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int i; - - return; - - fprintf( out_control->fnonb, "step: %d\n", data->step ); - fprintf( out_control->fnonb, "%6s\t%-38s%-38s%-38s\n", - "atom", "nonbonded_total[0,1,2]", "f_vdw[0,1,2]", "f_ele[0,1,2]" ); - - for( i = 0; i < system->N; ++i ) - fprintf( out_control->fnonb, - "%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - workspace->f_vdw[i][0] + workspace->f_ele[i][0], - workspace->f_vdw[i][1] + workspace->f_ele[i][1], - workspace->f_vdw[i][2] + workspace->f_ele[i][2], - workspace->f_vdw[i][0], - workspace->f_vdw[i][1], - workspace->f_vdw[i][2], - workspace->f_ele[i][0], - workspace->f_ele[i][1], - workspace->f_ele[i][2] ); + int i; + + return; + + fprintf( out_control->fnonb, "step: %d\n", data->step ); + fprintf( out_control->fnonb, "%6s\t%-38s%-38s%-38s\n", + "atom", "nonbonded_total[0,1,2]", "f_vdw[0,1,2]", "f_ele[0,1,2]" ); + + for( i = 0; i < system->N; ++i ) + fprintf( out_control->fnonb, + "%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + workspace->f_vdw[i][0] + workspace->f_ele[i][0], + workspace->f_vdw[i][1] + workspace->f_ele[i][1], + workspace->f_vdw[i][2] + workspace->f_ele[i][2], + workspace->f_vdw[i][0], + workspace->f_vdw[i][1], + workspace->f_vdw[i][2], + workspace->f_ele[i][0], + workspace->f_ele[i][1], + workspace->f_ele[i][2] ); } void Print_Total_Force( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int i; + int i; - return; + return; - fprintf( out_control->ftot, "step: %d\n", data->step ); - fprintf( out_control->ftot, "%6s\t%-38s\n", "atom", "atom.f[0,1,2]"); + fprintf( out_control->ftot, "step: %d\n", data->step ); + fprintf( out_control->ftot, "%6s\t%-38s\n", "atom", "atom.f[0,1,2]"); - for( i = 0; i < system->n; ++i ) - fprintf( out_control->ftot, "%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[i].f[0], - system->my_atoms[i].f[1], - system->my_atoms[i].f[2] ); + for( i = 0; i < system->n; ++i ) + fprintf( out_control->ftot, "%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + system->my_atoms[i].f[0], + system->my_atoms[i].f[1], + system->my_atoms[i].f[2] ); } void Compare_Total_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int i; + int i; - return; - - fprintf( out_control->ftot2, "step: %d\n", data->step ); - fprintf( out_control->ftot2, "%6s\t%-38s%-38s\n", - "atom", "f_total[0,1,2]", "test_force_total[0,1,2]" ); - - for( i = 0; i < system->N; ++i ) - fprintf( out_control->ftot2, "%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[i].f[0], - system->my_atoms[i].f[1], - system->my_atoms[i].f[2], - workspace->f_be[i][0] + workspace->f_lp[i][0] + - workspace->f_ov[i][0] + workspace->f_un[i][0] + - workspace->f_ang[i][0]+ workspace->f_pen[i][0]+ - workspace->f_coa[i][0]+ + workspace->f_hb[i][0] + - workspace->f_tor[i][0] + workspace->f_con[i][0] + - workspace->f_vdw[i][0] + workspace->f_ele[i][0], - workspace->f_be[i][1] + workspace->f_lp[i][1] + - workspace->f_ov[i][1] + workspace->f_un[i][1] + - workspace->f_ang[i][1]+ workspace->f_pen[i][1]+ - workspace->f_coa[i][1]+ + workspace->f_hb[i][1] + - workspace->f_tor[i][1] + workspace->f_con[i][1] + - workspace->f_vdw[i][1] + workspace->f_ele[i][1], - workspace->f_be[i][2] + workspace->f_lp[i][2] + - workspace->f_ov[i][2] + workspace->f_un[i][2] + - workspace->f_ang[i][2]+ workspace->f_pen[i][2] + - workspace->f_coa[i][2]+ + workspace->f_hb[i][2] + - workspace->f_tor[i][2] + workspace->f_con[i][2] + - workspace->f_vdw[i][2] + workspace->f_ele[i][2] ); + return; + + fprintf( out_control->ftot2, "step: %d\n", data->step ); + fprintf( out_control->ftot2, "%6s\t%-38s%-38s\n", + "atom", "f_total[0,1,2]", "test_force_total[0,1,2]" ); + + for( i = 0; i < system->N; ++i ) + fprintf( out_control->ftot2, "%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + system->my_atoms[i].f[0], + system->my_atoms[i].f[1], + system->my_atoms[i].f[2], + workspace->f_be[i][0] + workspace->f_lp[i][0] + + workspace->f_ov[i][0] + workspace->f_un[i][0] + + workspace->f_ang[i][0]+ workspace->f_pen[i][0]+ + workspace->f_coa[i][0]+ + workspace->f_hb[i][0] + + workspace->f_tor[i][0] + workspace->f_con[i][0] + + workspace->f_vdw[i][0] + workspace->f_ele[i][0], + workspace->f_be[i][1] + workspace->f_lp[i][1] + + workspace->f_ov[i][1] + workspace->f_un[i][1] + + workspace->f_ang[i][1]+ workspace->f_pen[i][1]+ + workspace->f_coa[i][1]+ + workspace->f_hb[i][1] + + workspace->f_tor[i][1] + workspace->f_con[i][1] + + workspace->f_vdw[i][1] + workspace->f_ele[i][1], + workspace->f_be[i][2] + workspace->f_lp[i][2] + + workspace->f_ov[i][2] + workspace->f_un[i][2] + + workspace->f_ang[i][2]+ workspace->f_pen[i][2] + + workspace->f_coa[i][2]+ + workspace->f_hb[i][2] + + workspace->f_tor[i][2] + workspace->f_con[i][2] + + workspace->f_vdw[i][2] + workspace->f_ele[i][2] ); }*/ /*void Init_Force_Test_Functions( ) -{ + { Print_Interactions[0] = Print_Bond_Orders; Print_Interactions[1] = Print_Bond_Forces; Print_Interactions[2] = Print_LonePair_Forces; diff --git a/PuReMD/src/linear_solvers.c b/PuReMD/src/linear_solvers.c index 83ddaf8a..bcb04327 100644 --- a/PuReMD/src/linear_solvers.c +++ b/PuReMD/src/linear_solvers.c @@ -34,9 +34,9 @@ #include "lapacke.h" #endif -#if defined(CG_PERFORMANCE) +/*#if defined(CG_PERFORMANCE) real t_start, t_elapsed, matvec_time, dot_time; -#endif +#endif*/ int compare_dbls( const void* arg1, const void* arg2 ) { @@ -102,7 +102,7 @@ int find_bucket( double *list, int len, double a ) return s; } -void setup_sparse_approx_inverse( reax_system *system, storage *workspace, +real setup_sparse_approx_inverse( reax_system *system, simulation_data *data, storage *workspace, mpi_datatypes *mpi_data, sparse_matrix *A, sparse_matrix **A_spar_patt, int nprocs, real filter ) { @@ -126,6 +126,12 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, MPI_Comm comm; + real start, t_start, t_comm; + real total_comm; + + start = Get_Time(); + t_comm = 0.0; + srecv = NULL; sdispls = NULL; samplelist_local = NULL; @@ -160,8 +166,10 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, } s_local = (int) (12.0 * log2(n_local*nprocs)); + t_start = Get_Time( ); MPI_Allreduce(&n_local, &n, 1, MPI_INT, MPI_SUM, comm); MPI_Reduce(&s_local, &s, 1, MPI_INT, MPI_SUM, MASTER_NODE, comm); + t_comm += Get_Timing_Info( t_start ); /* count num. bin elements for each processor, uniform bin sizes */ input_array = malloc( sizeof(real) * n_local ); @@ -196,7 +204,9 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, } /* gather samples at the root process */ + t_start = Get_Time( ); MPI_Gather( &s_local, 1, MPI_INT, srecv, 1, MPI_INT, MASTER_NODE, comm ); + t_comm += Get_Timing_Info( t_start ); if( system->my_rank == MASTER_NODE ) { @@ -207,8 +217,10 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, } } + t_start = Get_Time( ); MPI_Gatherv( samplelist_local, s_local, MPI_DOUBLE, samplelist, srecv, sdispls, MPI_DOUBLE, MASTER_NODE, comm); + t_comm += Get_Timing_Info( t_start ); /* sort samples at the root process and select pivots */ if ( system->my_rank == MASTER_NODE ) @@ -222,7 +234,9 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, } /* broadcast pivots */ + t_start = Get_Time( ); MPI_Bcast( pivotlist, nprocs - 1, MPI_DOUBLE, MASTER_NODE, comm ); + t_comm += Get_Timing_Info( t_start ); for ( i = 0; i < nprocs; ++i ) { @@ -258,7 +272,9 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, } /* determine counts for elements per process */ + t_start = Get_Time( ); MPI_Allreduce( MPI_IN_PLACE, scounts, nprocs, MPI_INT, MPI_SUM, comm ); + t_comm += Get_Timing_Info( t_start ); /* find the target process */ target_proc = 0; @@ -283,7 +299,10 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, } /* send local buckets to target processor for quickselect */ + t_start = Get_Time( ); MPI_Gather( scounts_local + target_proc, 1, MPI_INT, scounts, 1, MPI_INT, target_proc, comm ); + t_comm += Get_Timing_Info( t_start ); + if ( system->my_rank == target_proc ) { dspls[0] = 0; @@ -293,8 +312,10 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, } } + t_start = Get_Time( ); MPI_Gatherv( bucketlist_local + dspls_local[target_proc], scounts_local[target_proc], MPI_DOUBLE, bucketlist, scounts, dspls, MPI_DOUBLE, target_proc, comm); + t_comm += Get_Timing_Info( t_start ); /* apply quick select algorithm at the target process */ if( system->my_rank == target_proc) @@ -362,7 +383,9 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, } /*broadcast the filtering value*/ + t_start = Get_Time( ); MPI_Bcast( &threshold, 1, MPI_DOUBLE, target_proc, comm ); + t_comm += Get_Timing_Info( t_start ); // int nnz = 0; uncomment to check the nnz's in the sparsity pattern @@ -391,11 +414,19 @@ void setup_sparse_approx_inverse( reax_system *system, storage *workspace, fprintf(stdout,"total nnz in all sparsity patterns = %d\nthreshold = %.15lf\n", nnz, threshold); fflush(stdout); }*/ + + MPI_Reduce(&t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + + if( system->my_rank == MASTER_NODE ) + { + data->timing.cm_solver_comm += total_comm / nprocs; + } + return Get_Timing_Info( start ); } -void sparse_approx_inverse(reax_system *system, storage *workspace, mpi_datatypes *mpi_data, - sparse_matrix *A, sparse_matrix *A_spar_patt, sparse_matrix **A_app_inv ) +real sparse_approx_inverse(reax_system *system, simulation_data *data, storage *workspace, mpi_datatypes *mpi_data, + sparse_matrix *A, sparse_matrix *A_spar_patt, sparse_matrix **A_app_inv, int nprocs ) { int N, M, d_i, d_j; int i, k, pj, j_temp; @@ -420,9 +451,11 @@ void sparse_approx_inverse(reax_system *system, storage *workspace, mpi_datatype int *j_send, *j_recv1, *j_recv2; real *val_send, *val_recv1, *val_recv2; - /*real start; + real start, t_start, t_comm; + real total_comm; - start = Get_Time( ); */ + start = Get_Time( ); + t_comm = 0.0; comm = mpi_data->world; @@ -468,8 +501,10 @@ void sparse_approx_inverse(reax_system *system, storage *workspace, mpi_datatype } /* Announce the nnz's in each row that will be communicated later */ + t_start = Get_Time( ); scale = sizeof(int) / sizeof(void); Dist( system, mpi_data, row_nnz, MPI_INT, scale, int_packer ); + t_comm += Get_Timing_Info( t_start ); comm = mpi_data->comm_mesh3D; out_bufs = mpi_data->out_buffers; @@ -501,8 +536,11 @@ void sparse_approx_inverse(reax_system *system, storage *workspace, mpi_datatype flag1 = 1; j_recv1 = (int *) malloc( sizeof(int) * cnt ); val_recv1 = (real *) malloc( sizeof(real) * cnt ); + + t_start = Get_Time( ); MPI_Irecv( j_recv1, cnt, MPI_INT, nbr1->rank, 2 * d + 1, comm, &req1 ); MPI_Irecv( val_recv1, cnt, MPI_DOUBLE, nbr1->rank, 2 * d + 1, comm, &req2 ); + t_comm += Get_Timing_Info( t_start ); } } @@ -525,8 +563,11 @@ void sparse_approx_inverse(reax_system *system, storage *workspace, mpi_datatype flag2 = 1; j_recv2 = (int *) malloc( sizeof(int) * cnt ); val_recv2 = (real *) malloc( sizeof(real) * cnt ); + + t_start = Get_Time( ); MPI_Irecv( j_recv2, cnt, MPI_INT, nbr2->rank, 2 * d, comm, &req3 ); MPI_Irecv( val_recv2, cnt, MPI_DOUBLE, nbr2->rank, 2 * d, comm, &req4 ); + t_comm += Get_Timing_Info( t_start ); } } @@ -568,8 +609,10 @@ void sparse_approx_inverse(reax_system *system, storage *workspace, mpi_datatype } } + t_start = Get_Time( ); MPI_Send( j_send, cnt, MPI_INT, nbr1->rank, 2 * d, comm ); MPI_Send( val_send, cnt, MPI_DOUBLE, nbr1->rank, 2 * d, comm ); + t_comm += Get_Timing_Info( t_start ); } } @@ -610,16 +653,20 @@ void sparse_approx_inverse(reax_system *system, storage *workspace, mpi_datatype } } + t_start = Get_Time( ); MPI_Send( j_send, cnt, MPI_INT, nbr1->rank, 2 * d + 1, comm ); MPI_Send( val_send, cnt, MPI_DOUBLE, nbr1->rank, 2 * d + 1, comm ); + t_comm += Get_Timing_Info( t_start ); } } if( flag1 ) { + t_start = Get_Time( ); MPI_Wait( &req1, &stat1 ); MPI_Wait( &req2, &stat2 ); + t_comm += Get_Timing_Info( t_start ); cnt = 0; for( i = nbr1->atoms_str; i < (nbr1->atoms_str + nbr1->atoms_cnt); ++i ) @@ -641,9 +688,10 @@ void sparse_approx_inverse(reax_system *system, storage *workspace, mpi_datatype if( flag2 ) { + t_start = Get_Time( ); MPI_Wait( &req3, &stat3 ); MPI_Wait( &req4, &stat4 ); - + t_comm += Get_Timing_Info( t_start ); cnt = 0; for( i = nbr2->atoms_str; i < (nbr2->atoms_str + nbr2->atoms_cnt); ++i ) @@ -826,7 +874,14 @@ void sparse_approx_inverse(reax_system *system, storage *workspace, mpi_datatype free( pos_x); free( X ); - /* return Get_Timing_Info( start ); */ + MPI_Reduce(&t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + + if( system->my_rank == MASTER_NODE ) + { + data->timing.cm_solver_comm += total_comm / nprocs; + } + + return Get_Timing_Info( start ); } void dual_Sparse_MatVec( sparse_matrix *A, rvec2 *x, rvec2 *b, int N ) @@ -872,7 +927,7 @@ void dual_Sparse_MatVec( sparse_matrix *A, rvec2 *x, rvec2 *b, int N ) } -int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, +/*int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, rvec2 *b, real tol, rvec2 *x, mpi_datatypes* mpi_data, FILE *fout ) { int i, j, n, N, matvecs, scale; @@ -912,15 +967,15 @@ int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, for ( j = 0; j < system->n; ++j ) { - /* residual */ + // residual workspace->r2[j][0] = b[j][0] - workspace->q2[j][0]; workspace->r2[j][1] = b[j][1] - workspace->q2[j][1]; - /* apply diagonal pre-conditioner */ + // apply diagonal pre-conditioner workspace->d2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j]; workspace->d2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j]; } - /* norm of b */ + // norm of b my_sum[0] = my_sum[1] = 0; for ( j = 0; j < n; ++j ) { @@ -932,7 +987,7 @@ int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, b_norm[1] = sqrt( norm_sqr[1] ); //fprintf( stderr, "bnorm = %f %f\n", b_norm[0], b_norm[1] ); - /* dot product: r.d */ + // dot product: r.d my_dot[0] = my_dot[1] = 0; for ( j = 0; j < n; ++j ) { @@ -963,7 +1018,7 @@ int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, } #endif - /* dot product: d.q */ + // dot product: d.q my_dot[0] = my_dot[1] = 0; for ( j = 0; j < n; ++j ) { @@ -978,16 +1033,16 @@ int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, my_dot[0] = my_dot[1] = 0; for ( j = 0; j < system->n; ++j ) { - /* update x */ + // update x x[j][0] += alpha[0] * workspace->d2[j][0]; x[j][1] += alpha[1] * workspace->d2[j][1]; - /* update residual */ + // update residual workspace->r2[j][0] -= alpha[0] * workspace->q2[j][0]; workspace->r2[j][1] -= alpha[1] * workspace->q2[j][1]; - /* apply diagonal pre-conditioner */ + // apply diagonal pre-conditioner workspace->p2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j]; workspace->p2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j]; - /* dot product: r.p */ + // dot product: r.p my_dot[0] += workspace->r2[j][0] * workspace->p2[j][0]; my_dot[1] += workspace->r2[j][1] * workspace->p2[j][1]; } @@ -1012,7 +1067,7 @@ int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, beta[1] = sig_new[1] / sig_old[1]; for ( j = 0; j < system->n; ++j ) { - /* d = p + beta * d */ + // d = p + beta * d workspace->d2[j][0] = workspace->p2[j][0] + beta[0] * workspace->d2[j][0]; workspace->d2[j][1] = workspace->p2[j][1] + beta[1] * workspace->d2[j][1]; } @@ -1059,7 +1114,7 @@ int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, #endif return (i + 1) + matvecs; -} +}*/ void Sparse_MatVec( sparse_matrix *A, real *x, real *b, int N ) @@ -1126,124 +1181,146 @@ static void Sparse_MatVec_full( const sparse_matrix * const A, }*/ -int CG( reax_system *system, storage *workspace, sparse_matrix *H, real *b, - real tol, real *x, mpi_datatypes* mpi_data, FILE *fout, int step ) +int CG( reax_system *system, control_params *control, simulation_data *data, + storage *workspace, sparse_matrix *H, real *b, + real tol, real *x, mpi_datatypes* mpi_data, FILE *fout, int nprocs ) { - int i, scale; + int i, j, scale; real tmp, alpha, beta, b_norm; real sig_old, sig_new; -#if defined(HALF_LIST) - int j; -#endif + real t_start, t_pa, t_spmv, t_vops, t_comm; + real total_pa, total_spmv, total_vops, total_comm; -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) - { - t_start = matvec_time = dot_time = 0; - t_start = Get_Time( ); - } -#endif + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; + t_comm = 0.0; + t_start = Get_Time( ); scale = sizeof(real) / sizeof(void); Dist( system, mpi_data, x, MPI_DOUBLE, scale, real_packer ); + t_comm += Get_Timing_Info( t_start ); + + t_start = Get_Time( ); Sparse_MatVec( H, x, workspace->q, system->N ); + t_spmv += Get_Timing_Info( t_start ); + #if defined(HALF_LIST) + t_start = Get_Time( ); Coll( system, mpi_data, workspace->q, MPI_DOUBLE, scale, real_unpacker ); + t_comm += Get_Timing_Info( t_start ); #endif -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) - { - Update_Timing_Info( &t_start, &matvec_time ); - } -#endif - + t_start = Get_Time( ); Vector_Sum( workspace->r , 1., b, -1., workspace->q, system->n ); + t_vops += Get_Timing_Info( t_start ); /* pre-conditioning */ -#if defined(SAI_PRECONDITIONER) && !defined(HALF_LIST) - Dist( system, mpi_data, workspace->r, MPI_DOUBLE, scale, real_packer ); - Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->d, system->n ); -#else - for ( j = 0; j < system->n; ++j ) + if( control->cm_solver_pre_comp_type == SAI_PC ) { - workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + t_start = Get_Time( ); + Dist( system, mpi_data, workspace->r, MPI_DOUBLE, scale, real_packer ); + t_comm += Get_Timing_Info( t_start ); + + t_start = Get_Time( ); + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->d, system->n ); + t_pa += Get_Timing_Info( t_start ); } -#endif - b_norm = Parallel_Norm( b, system->n, mpi_data->world ); - sig_new = Parallel_Dot(workspace->r, workspace->d, system->n, mpi_data->world); - -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + else if ( control->cm_solver_pre_comp_type == DIAG_PC) { - Update_Timing_Info( &t_start, &dot_time ); + t_start = Get_Time( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } + t_pa += Get_Timing_Info( t_start ); } -#endif - for ( i = 1; i < 300 && sqrt(sig_new) / b_norm > tol; ++i ) + t_start = Get_Time( ); + b_norm = Parallel_Norm( b, system->n, mpi_data->world ); + sig_new = Parallel_Dot(workspace->r, workspace->d, system->n, mpi_data->world); + t_vops += Get_Timing_Info( t_start ); + + for ( i = 0; i < control->cm_solver_max_iters && sqrt(sig_new) / b_norm > tol; ++i ) { + t_start = Get_Time( ); Dist( system, mpi_data, workspace->d, MPI_DOUBLE, scale, real_packer ); + t_comm += Get_Timing_Info( t_start ); + + t_start = Get_Time( ); Sparse_MatVec( H, workspace->d, workspace->q, system->N ); + t_spmv += Get_Timing_Info( t_start ); + #if defined(HALF_LIST) + t_start = Get_Time( ); Coll(system, mpi_data, workspace->q, MPI_DOUBLE, scale, real_unpacker); + t_comm += Get_Timing_Info( t_start ); #endif -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) - { - Update_Timing_Info( &t_start, &matvec_time ); - } -#endif - + t_start = Get_Time( ); tmp = Parallel_Dot(workspace->d, workspace->q, system->n, mpi_data->world); alpha = sig_new / tmp; Vector_Add( x, alpha, workspace->d, system->n ); Vector_Add( workspace->r, -alpha, workspace->q, system->n ); + t_vops += Get_Timing_Info( t_start ); /* pre-conditioning */ -#if defined(SAI_PRECONDITIONER) && !defined(HALF_LIST) - Dist( system, mpi_data, workspace->r, MPI_DOUBLE, scale, real_packer ); - Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->p, system->n ); -#else - for ( j = 0; j < system->n; ++j ) + if( control->cm_solver_pre_comp_type == SAI_PC ) { - workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; + t_start = Get_Time( ); + Dist( system, mpi_data, workspace->r, MPI_DOUBLE, scale, real_packer ); + t_comm += Get_Timing_Info( t_start ); + + t_start = Get_Time( ); + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->p, system->n ); + t_pa += Get_Timing_Info( t_start ); } -#endif + else if ( control->cm_solver_pre_comp_type == DIAG_PC) + { + t_start = Get_Time( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } + t_pa += Get_Timing_Info( t_start ); + } + + t_start = Get_Time( ); sig_old = sig_new; sig_new = Parallel_Dot(workspace->r, workspace->p, system->n, mpi_data->world); beta = sig_new / sig_old; Vector_Sum( workspace->d, 1., workspace->p, beta, workspace->d, system->n ); - -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) - { - Update_Timing_Info( &t_start, &dot_time ); - } -#endif + t_vops += Get_Timing_Info( t_start ); } - if ( i >= 300 ) + MPI_Reduce(&t_pa, &total_pa, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + MPI_Reduce(&t_spmv, &total_spmv, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + MPI_Reduce(&t_vops, &total_vops, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + MPI_Reduce(&t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + + if( system->my_rank == MASTER_NODE ) { - fprintf( stderr, "CG convergence failed at step %d!\n", step ); - return i; + data->timing.cm_solver_pre_app += total_pa / nprocs; + data->timing.cm_solver_spmv += total_spmv / nprocs; + data->timing.cm_solver_vector_ops += total_vops / nprocs; + data->timing.cm_solver_comm += total_comm / nprocs; } -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + MPI_Barrier(mpi_data->world); + + if ( i >= control->cm_solver_max_iters ) { - fprintf( fout, "QEq %d iters. matvecs: %f dot: %f\n", i, matvec_time, - dot_time ); + fprintf( stderr, "CG convergence failed!\n" ); + return i; } -#endif return i; } -int CG_test( reax_system *system, storage *workspace, sparse_matrix *H, +/*int CG_test( reax_system *system, storage *workspace, sparse_matrix *H, real *b, real tol, real *x, mpi_datatypes* mpi_data, FILE *fout ) { int i, j, scale; @@ -1327,7 +1404,7 @@ int CG_test( reax_system *system, storage *workspace, sparse_matrix *H, Vector_Add( x, alpha, workspace->d, system->n ); Vector_Add( workspace->r, -alpha, workspace->q, system->n ); - /* pre-conditioning */ + // pre-conditioning for ( j = 0; j < system->n; ++j ) workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; @@ -1365,10 +1442,10 @@ int CG_test( reax_system *system, storage *workspace, sparse_matrix *H, } return i; -} +}*/ -void Forward_Subs( sparse_matrix *L, real *b, real *y ) +/*void Forward_Subs( sparse_matrix *L, real *b, real *y ) { int i, pj, j, si, ei; real val; @@ -1386,10 +1463,10 @@ void Forward_Subs( sparse_matrix *L, real *b, real *y ) } y[i] /= L->entries[pj].val; } -} +}*/ -void Backward_Subs( sparse_matrix *U, real *y, real *x ) +/*void Backward_Subs( sparse_matrix *U, real *y, real *x ) { int i, pj, j, si, ei; real val; @@ -1407,10 +1484,10 @@ void Backward_Subs( sparse_matrix *U, real *y, real *x ) } x[i] /= U->entries[si].val; } -} +}*/ -int PCG( reax_system *system, storage *workspace, +/*int PCG( reax_system *system, storage *workspace, sparse_matrix *H, real *b, real tol, sparse_matrix *L, sparse_matrix *U, real *x, mpi_datatypes* mpi_data, FILE *fout ) @@ -1498,11 +1575,11 @@ int PCG( reax_system *system, storage *workspace, fprintf( stderr, "PCG convergence failed!\n" ); return i; -} +}*/ #if defined(OLD_STUFF) -int sCG( reax_system *system, storage *workspace, sparse_matrix *H, +/*int sCG( reax_system *system, storage *workspace, sparse_matrix *H, real *b, real tol, real *x, mpi_datatypes* mpi_data, FILE *fout ) { int i, j; @@ -1563,7 +1640,7 @@ int sCG( reax_system *system, storage *workspace, sparse_matrix *H, Vector_Add( x, alpha, workspace->d, system->n ); Vector_Add( workspace->r, -alpha, workspace->q, system->n ); - /* pre-conditioning */ + // pre-conditioning for ( j = 0; j < system->n; ++j ) workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; @@ -1591,10 +1668,10 @@ int sCG( reax_system *system, storage *workspace, sparse_matrix *H, } return i; -} +}*/ -int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, +/*int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, real *b, real tol, real *x, mpi_datatypes* mpi_data, FILE *fout ) { int i, j, k, itr, N; @@ -1603,14 +1680,14 @@ int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, N = system->N; bnorm = Norm( b, N ); - /* apply the diagonal pre-conditioner to rhs */ + // apply the diagonal pre-conditioner to rhs for ( i = 0; i < N; ++i ) workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; - /* GMRES outer-loop */ + // GMRES outer-loop for ( itr = 0; itr < MAX_ITR; ++itr ) { - /* calculate r0 */ + // calculate r0 Sparse_MatVec( H, x, workspace->b_prm, N ); for ( i = 0; i < N; ++i ) workspace->b_prm[i] *= workspace->Hdia_inv[i]; // pre-conditioner @@ -1623,17 +1700,17 @@ int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, // fprintf( stderr, "%10.6f\n", workspace->g[0] ); - /* GMRES inner-loop */ + // GMRES inner-loop for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) { - /* matvec */ + // matvec Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1], N ); for ( k = 0; k < N; ++k ) workspace->v[j + 1][k] *= workspace->Hdia_inv[k]; // pre-conditioner // fprintf( stderr, "%d-%d: matvec done.\n", itr, j ); - /* apply modified Gram-Schmidt to orthogonalize the new residual */ + // apply modified Gram-Schmidt to orthogonalize the new residual for ( i = 0; i <= j; i++ ) { workspace->h[i][j] = Dot(workspace->v[i], workspace->v[j + 1], N); @@ -1646,7 +1723,7 @@ int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); // fprintf(stderr, "%d-%d: orthogonalization completed.\n", itr, j); - /* Givens rotations on the H matrix to make it U */ + // Givens rotations on the H matrix to make it U for ( i = 0; i <= j; i++ ) { if ( i == j ) @@ -1665,7 +1742,7 @@ int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, workspace->h[i + 1][j] = tmp2; } - /* apply Givens rotations to the rhs as well */ + // apply Givens rotations to the rhs as well tmp1 = workspace->hc[j] * workspace->g[j]; tmp2 = -workspace->hs[j] * workspace->g[j]; workspace->g[j] = tmp1; @@ -1674,8 +1751,8 @@ int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, // fprintf( stderr, "%10.6f\n", fabs(workspace->g[j+1]) ); } - /* solve Hy = g. - H is now upper-triangular, do back-substitution */ + // solve Hy = g. + // H is now upper-triangular, do back-substitution for ( i = j - 1; i >= 0; i-- ) { temp = workspace->g[i]; @@ -1684,23 +1761,23 @@ int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, workspace->y[i] = temp / workspace->h[i][i]; } - /* update x = x_0 + Vy */ + // update x = x_0 + Vy for ( i = 0; i < j; i++ ) Vector_Add( x, workspace->y[i], workspace->v[i], N ); - /* stopping condition */ + // stopping condition if ( fabs(workspace->g[j]) / bnorm <= tol ) break; } - /*Sparse_MatVec( system, H, x, workspace->b_prm, mpi_data ); - for( i = 0; i < N; ++i ) - workspace->b_prm[i] *= workspace->Hdia_inv[i]; + //Sparse_MatVec( system, H, x, workspace->b_prm, mpi_data ); + // for( i = 0; i < N; ++i ) + // workspace->b_prm[i] *= workspace->Hdia_inv[i]; - fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - for( i = 0; i < N; ++i ) - fprintf( fout, "%10.5f%15.12f%15.12f\n", - workspace->b_prc[i], workspace->b_prm[i], x[i] );*/ + // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); + // for( i = 0; i < N; ++i ) + //fprintf( fout, "%10.5f%15.12f%15.12f\n", + //workspace->b_prc[i], workspace->b_prm[i], x[i] ); fprintf( fout, "GMRES outer: %d, inner: %d - |rel residual| = %15.10f\n", itr, j, fabs( workspace->g[j] ) / bnorm ); @@ -1712,10 +1789,10 @@ int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, } return SUCCESS; -} +}*/ -int GMRES_HouseHolder( reax_system *system, storage *workspace, +/*int GMRES_HouseHolder( reax_system *system, storage *workspace, sparse_matrix *H, real *b, real tol, real *x, mpi_datatypes* mpi_data, FILE *fout ) { @@ -1727,18 +1804,18 @@ int GMRES_HouseHolder( reax_system *system, storage *workspace, N = system->N; bnorm = Norm( b, N ); - /* apply the diagonal pre-conditioner to rhs */ + // apply the diagonal pre-conditioner to rhs for ( i = 0; i < N; ++i ) workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; - /* GMRES outer-loop */ + // GMRES outer-loop for ( itr = 0; itr < MAX_ITR; ++itr ) { - /* compute z = r0 */ + // compute z = r0 Sparse_MatVec( H, x, workspace->b_prm, N ); for ( i = 0; i < N; ++i ) - workspace->b_prm[i] *= workspace->Hdia_inv[i]; /* pre-conditioner */ + workspace->b_prm[i] *= workspace->Hdia_inv[i]; // pre-conditioner Vector_Sum( z[0], 1., workspace->b_prc, -1., workspace->b_prm, N ); @@ -1752,28 +1829,28 @@ int GMRES_HouseHolder( reax_system *system, storage *workspace, w[0] *= ( u[0][0] < 0.0 ? 1 : -1 ); // fprintf( stderr, "\n\n%12.6f\n", w[0] ); - /* GMRES inner-loop */ + // GMRES inner-loop for ( j = 0; j < RESTART && fabs( w[j] ) / bnorm > tol; j++ ) { - /* compute v_j */ + // compute v_j Vector_Scale( z[j], -2 * u[j][j], u[j], N ); - z[j][j] += 1.; /* due to e_j */ + z[j][j] += 1.; // due to e_j for ( i = j - 1; i >= 0; --i ) Vector_Add( z[j] + i, -2 * Dot( u[i] + i, z[j] + i, N - i ), u[i] + i, N - i ); - /* matvec */ + // matvec Sparse_MatVec( H, z[j], v, N ); for ( k = 0; k < N; ++k ) - v[k] *= workspace->Hdia_inv[k]; /* pre-conditioner */ + v[k] *= workspace->Hdia_inv[k]; // pre-conditioner for ( i = 0; i <= j; ++i ) Vector_Add( v + i, -2 * Dot( u[i] + i, v + i, N - i ), u[i] + i, N - i ); if ( !Vector_isZero( v + (j + 1), N - (j + 1) ) ) { - /* compute the HouseHolder unit vector u_j+1 */ + // compute the HouseHolder unit vector u_j+1 for ( i = 0; i <= j; ++i ) u[j + 1][i] = 0; @@ -1784,14 +1861,14 @@ int GMRES_HouseHolder( reax_system *system, storage *workspace, Vector_Scale( u[j + 1], 1 / Norm( u[j + 1], N ), u[j + 1], N ); - /* overwrite v with P_m+1 * v */ + // overwrite v with P_m+1 * v v[j + 1] -= 2 * Dot( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ) * u[j + 1][j + 1]; Vector_MakeZero( v + (j + 2), N - (j + 2) ); } - /* previous Givens rotations on H matrix to make it U */ + // previous Givens rotations on H matrix to make it U for ( i = 0; i < j; i++ ) { tmp1 = workspace->hc[i] * v[i] + workspace->hs[i] * v[i + 1]; @@ -1801,7 +1878,7 @@ int GMRES_HouseHolder( reax_system *system, storage *workspace, v[i + 1] = tmp2; } - /* apply the new Givens rotation to H and right-hand side */ + // apply the new Givens rotation to H and right-hand side if ( fabs(v[j + 1]) >= ALMOST_ZERO ) { cc = sqrt( SQR( v[j] ) + SQR( v[j + 1] ) ); @@ -1814,14 +1891,14 @@ int GMRES_HouseHolder( reax_system *system, storage *workspace, v[j] = tmp1; v[j + 1] = tmp2; - /* Givens rotations to rhs */ + / Givens rotations to rhs tmp1 = workspace->hc[j] * w[j]; tmp2 = -workspace->hs[j] * w[j]; w[j] = tmp1; w[j + 1] = tmp2; } - /* extend R */ + // extend R for ( i = 0; i <= j; ++i ) workspace->h[i][j] = v[i]; @@ -1834,8 +1911,8 @@ int GMRES_HouseHolder( reax_system *system, storage *workspace, } - /* solve Hy = w. - H is now upper-triangular, do back-substitution */ + // solve Hy = w. + // H is now upper-triangular, do back-substitution for ( i = j - 1; i >= 0; i-- ) { temp = w[i]; @@ -1848,7 +1925,7 @@ int GMRES_HouseHolder( reax_system *system, storage *workspace, for ( i = j - 1; i >= 0; i-- ) Vector_Add( x, workspace->y[i], z[i], N ); - /* stopping condition */ + // stopping condition if ( fabs( w[j] ) / bnorm <= tol ) break; } @@ -1872,5 +1949,5 @@ int GMRES_HouseHolder( reax_system *system, storage *workspace, } return SUCCESS; -} +}*/ #endif diff --git a/PuReMD/src/linear_solvers.h b/PuReMD/src/linear_solvers.h index 3b6fc1a0..980a8b90 100644 --- a/PuReMD/src/linear_solvers.h +++ b/PuReMD/src/linear_solvers.h @@ -24,11 +24,11 @@ #include "reax_types.h" -void setup_sparse_approx_inverse( reax_system*, storage*, mpi_datatypes*, +real setup_sparse_approx_inverse( reax_system*, simulation_data*, storage*, mpi_datatypes*, sparse_matrix *, sparse_matrix **, int, double ); -void sparse_approx_inverse( reax_system*, storage*, mpi_datatypes*, - sparse_matrix*, sparse_matrix*, sparse_matrix** ); +real sparse_approx_inverse( reax_system*, simulation_data*, storage*, mpi_datatypes*, + sparse_matrix*, sparse_matrix*, sparse_matrix**, int ); int GMRES( reax_system*, storage*, sparse_matrix*, real*, real, real*, mpi_datatypes*, FILE* ); @@ -36,7 +36,7 @@ int GMRES_HouseHolder( reax_system*, storage*, sparse_matrix*, real*, real, real*, mpi_datatypes*, FILE* ); int dual_CG( reax_system*, storage*, sparse_matrix*, rvec2*, real, rvec2*, mpi_datatypes*, FILE* ); -int CG( reax_system*, storage*, sparse_matrix*, +int CG( reax_system*, control_params*, simulation_data*, storage*, sparse_matrix*, real*, real, real*, mpi_datatypes*, FILE*, int ); int PCG( reax_system*, storage*, sparse_matrix*, real*, real, sparse_matrix*, sparse_matrix*, real*, mpi_datatypes*, FILE* ); diff --git a/PuReMD/src/parallelreax.c b/PuReMD/src/parallelreax.c index ea30ceed..894d7fde 100644 --- a/PuReMD/src/parallelreax.c +++ b/PuReMD/src/parallelreax.c @@ -211,7 +211,7 @@ int main( int argc, char* argv[] ) #endif /* start the simulation */ - int total_itr = data->timing.s_matvecs + data->timing.t_matvecs; + int total_itr = data->timing.cm_solver_iters; for ( ++data->step; data->step <= control->nsteps; data->step++ ) { if ( control->T_mode ) @@ -221,7 +221,7 @@ int main( int argc, char* argv[] ) Post_Evolve(system, control, data, workspace, lists, out_control, mpi_data); if( system->my_rank == MASTER_NODE ) { - total_itr += data->timing.s_matvecs + data->timing.t_matvecs; + total_itr += data->timing.cm_solver_iters; } Output_Results( system, control, data, lists, out_control, mpi_data ); //Analysis(system, control, data, workspace, lists, out_control, mpi_data); diff --git a/PuReMD/src/qEq.c b/PuReMD/src/qEq.c index 9c5650f3..96917198 100644 --- a/PuReMD/src/qEq.c +++ b/PuReMD/src/qEq.c @@ -368,20 +368,41 @@ void Calculate_Charges( reax_system *system, storage *workspace, static void Setup_Preconditioner_QEq( reax_system *system, control_params *control, simulation_data *data, storage *workspace, mpi_datatypes *mpi_data ) { + real time, t_sort, t_pc, total_sort, total_pc; + /* sort H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */ + time = Get_Time( ); Sort_Matrix_Rows( workspace->H ); + t_sort = Get_Timing_Info( time ); - //TODO: add sai filtering value, which will be passed as the last parameter - setup_sparse_approx_inverse( system, workspace, mpi_data, workspace->H, &workspace->H_spar_patt, + t_pc = setup_sparse_approx_inverse( system, data, workspace, mpi_data, workspace->H, &workspace->H_spar_patt, control->nprocs, control->sai_thres ); + + + MPI_Reduce(&t_sort, &total_sort, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + MPI_Reduce(&t_pc, &total_pc, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + + if( system->my_rank == MASTER_NODE ) + { + data->timing.cm_sort_mat_rows += total_sort / control->nprocs; + data->timing.cm_solver_pre_comp += total_pc / control->nprocs; + } } static void Compute_Preconditioner_QEq( reax_system *system, control_params *control, simulation_data *data, storage *workspace, mpi_datatypes *mpi_data ) { + real t_pc, total_pc; #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) - sparse_approx_inverse( system, workspace, mpi_data, - workspace->H, workspace->H_spar_patt, &workspace->H_app_inv ); + t_pc = sparse_approx_inverse( system, data, workspace, mpi_data, + workspace->H, workspace->H_spar_patt, &workspace->H_app_inv, control->nprocs ); + + MPI_Reduce(&t_pc, &total_pc, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + + if( system->my_rank == MASTER_NODE ) + { + data->timing.cm_solver_pre_comp += total_pc / control->nprocs; + } #else fprintf( stderr, "[ERROR] LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); exit( INVALID_INPUT ); @@ -392,7 +413,7 @@ void QEq( reax_system *system, control_params *control, simulation_data *data, storage *workspace, output_controls *out_control, mpi_datatypes *mpi_data ) { - int j, s_matvecs, t_matvecs; + int j, iters; Init_MatVec( system, data, control, workspace, mpi_data ); @@ -401,44 +422,35 @@ void QEq( reax_system *system, control_params *control, simulation_data *data, //Print_Linear_System( system, control, workspace, data->step ); #endif - //s_matvecs = dual_CG(system, workspace, workspace->H, workspace->b, - // control->q_err, workspace->x, mpi_data, out_control->log); - //t_matvecs = 0; - -#if defined(SAI_PRECONDITIONER) && !defined(HALF_LIST) - if( control->refactor > 0 && ((data->step - data->prev_steps) % control->refactor == 0)) + if( control->cm_solver_pre_comp_type == SAI_PC ) { - Setup_Preconditioner_QEq( system, control, data, workspace, mpi_data ); + if( control->refactor > 0 && ((data->step - data->prev_steps) % control->refactor == 0)) + { + Setup_Preconditioner_QEq( system, control, data, workspace, mpi_data ); - Compute_Preconditioner_QEq( system, control, data, workspace, mpi_data ); + Compute_Preconditioner_QEq( system, control, data, workspace, mpi_data ); + } } -#endif for ( j = 0; j < system->n; ++j ) workspace->s[j] = workspace->x[j][0]; - s_matvecs = CG(system, workspace, workspace->H, workspace->b_s,//newQEq sCG - control->q_err, workspace->s, mpi_data, out_control->log , data->step ); + iters = CG(system, control, data, workspace, workspace->H, workspace->b_s, + control->q_err, workspace->s, mpi_data, out_control->log , control->nprocs ); for ( j = 0; j < system->n; ++j ) workspace->x[j][0] = workspace->s[j]; - //s_matvecs = PCG( system, workspace, workspace->H, workspace->b_s, - // control->q_err, workspace->L, workspace->U, workspace->s, - // mpi_data, out_control->log ); #if defined(DEBUG) fprintf( stderr, "p%d: first CG completed\n", system->my_rank ); #endif for ( j = 0; j < system->n; ++j ) workspace->t[j] = workspace->x[j][1]; - t_matvecs = CG(system, workspace, workspace->H, workspace->b_t,//newQEq sCG - control->q_err, workspace->t, mpi_data, out_control->log, data->step ); + iters += CG(system, control, data, workspace, workspace->H, workspace->b_t,//newQEq sCG + control->q_err, workspace->t, mpi_data, out_control->log, control->nprocs ); for ( j = 0; j < system->n; ++j ) workspace->x[j][1] = workspace->t[j]; - //t_matvecs = PCG( system, workspace, workspace->H, workspace->b_t, - // control->q_err, workspace->L, workspace->U, workspace->t, - // mpi_data, out_control->log ); #if defined(DEBUG) fprintf( stderr, "p%d: second CG completed\n", system->my_rank ); #endif @@ -452,8 +464,7 @@ void QEq( reax_system *system, control_params *control, simulation_data *data, #if defined(LOG_PERFORMANCE) if ( system->my_rank == MASTER_NODE ) { - data->timing.s_matvecs += s_matvecs; - data->timing.t_matvecs += t_matvecs; + data->timing.cm_solver_iters += iters; } #endif } diff --git a/PuReMD/src/reax_types.h b/PuReMD/src/reax_types.h index 18cc8c49..4335d66c 100644 --- a/PuReMD/src/reax_types.h +++ b/PuReMD/src/reax_types.h @@ -41,18 +41,18 @@ #define PURE_REAX //#define LAMMPS_REAX -#define SAI_PRECONDITIONER //#define HALF_LIST //#define DEBUG //#define DEBUG_FOCUS //#define TEST_ENERGY //#define TEST_FORCES -#define CG_PERFORMANCE +//#define CG_PERFORMANCE #define LOG_PERFORMANCE #define STANDARD_BOUNDARIES //#define OLD_BOUNDARIES //#define MIDPOINT_BOUNDARIES +#define ZERO (0.000000000000000e+00) #define REAX_MAX_STR 1024 #define REAX_MAX_NBRS 6 #define REAX_MAX_3BODY_PARAM 5 @@ -69,6 +69,20 @@ #define NAN_REAL(a) (0) #endif + +/* preconditioner computation type for charge method linear solver */ +enum pre_comp +{ + NONE_PC = 0, + DIAG_PC = 1, + ICHOLT_PC = 2, + ILU_PAR_PC = 3, + ILUT_PAR_PC = 4, + ILU_SUPERLU_MT_PC = 5, + SAI_PC = 6, +}; + + /********************** TYPE DEFINITIONS ********************/ typedef int ivec[3]; typedef double real; @@ -534,7 +548,9 @@ typedef struct int tabulate; int qeq_freq; + int cm_solver_max_iters; real q_err; + int cm_solver_pre_comp_type; real sai_thres; int refactor; real droptol; @@ -603,19 +619,44 @@ typedef struct typedef struct { + /* start time of event */ real start; + /* end time of event */ real end; + /* total elapsed time of event */ real elapsed; - + /* total simulation time */ real total; + /* communication time */ real comm; + /* neighbor list generation time */ real nbrs; + /* force initialization time */ real init_forces; + /* bonded force calculation time */ real bonded; + /* non-bonded force calculation time */ real nonb; - real qEq; - int s_matvecs; - int t_matvecs; + /* atomic charge distribution calculation time */ + real cm; + /**/ + real cm_sort_mat_rows; + /**/ + real cm_solver_comm; + /**/ + real cm_solver_pre_comp; + /**/ + real cm_solver_pre_app; // update CG() + /* num. of steps in iterative linear solver for charge distribution */ + int cm_solver_iters; + /**/ + real cm_solver_spmv; // update CG() + /**/ + real cm_solver_vector_ops; // update CG() + /**/ + real cm_solver_orthog; + /**/ + real cm_solver_tri_solve; } reax_timing; diff --git a/PuReMD/src/reset_tools.c b/PuReMD/src/reset_tools.c index eb7c6465..32811672 100644 --- a/PuReMD/src/reset_tools.c +++ b/PuReMD/src/reset_tools.c @@ -106,15 +106,14 @@ void Reset_Simulation_Data( simulation_data* data, int virial ) void Reset_Timing( reax_timing *rt ) { - rt->total = Get_Time(); + /*rt->total = Get_Time(); rt->comm = 0; rt->nbrs = 0; rt->init_forces = 0; rt->bonded = 0; rt->nonb = 0; rt->qEq = 0; - rt->s_matvecs = 0; - rt->t_matvecs = 0; + rt->cm_solver_iters = 0;*/ } #ifdef TEST_FORCES -- GitLab