diff --git a/sPuReMD/configure.ac b/sPuReMD/configure.ac index ce330dc6b5aa08436972f70edb79df3f27703cce..89eec9342401993b1678bf01d45a3b851b5d6996 100644 --- a/sPuReMD/configure.ac +++ b/sPuReMD/configure.ac @@ -76,6 +76,7 @@ if test "x${BUILD_OPENMP}" = "xyes"; then fi AC_SUBST(AM_CFLAGS, "$OPENMP_CFLAGS") AC_SUBST(AM_CPPFLAGS, "$OPENMP_CFLAGS") + LIBS="${LIBS} -lgomp" fi fi diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c index 4ad3069f78e7fa0158f92c7872eaad6fdc81a139..86b6ebce8310e4a8c044bae6aed709c863e565f8 100644 --- a/sPuReMD/src/forces.c +++ b/sPuReMD/src/forces.c @@ -34,10 +34,6 @@ #include "vector.h" -/* File scope variables */ -static interaction_function Interaction_Functions[NO_OF_INTERACTIONS]; - - typedef enum { DIAGONAL = 0, @@ -52,32 +48,33 @@ void Dummy_Interaction( reax_system *system, control_params *control, } -void Init_Bonded_Force_Functions( control_params *control ) +void Init_Bonded_Force_Functions( control_params *control, + interaction_function *interaction_functions ) { - Interaction_Functions[0] = Calculate_Bond_Orders; - Interaction_Functions[1] = Bond_Energy; //*/Dummy_Interaction; - Interaction_Functions[2] = LonePair_OverUnder_Coordination_Energy; + interaction_functions[0] = Calculate_Bond_Orders; + interaction_functions[1] = Bond_Energy; //*/Dummy_Interaction; + interaction_functions[2] = LonePair_OverUnder_Coordination_Energy; //*/Dummy_Interaction; - Interaction_Functions[3] = Three_Body_Interactions; //*/Dummy_Interaction; - Interaction_Functions[4] = Four_Body_Interactions; //*/Dummy_Interaction; + interaction_functions[3] = Three_Body_Interactions; //*/Dummy_Interaction; + interaction_functions[4] = Four_Body_Interactions; //*/Dummy_Interaction; if ( control->hb_cut > 0.0 ) { - Interaction_Functions[5] = Hydrogen_Bonds; //*/Dummy_Interaction; + interaction_functions[5] = Hydrogen_Bonds; //*/Dummy_Interaction; } else { - Interaction_Functions[5] = Dummy_Interaction; + interaction_functions[5] = Dummy_Interaction; } - Interaction_Functions[6] = Dummy_Interaction; //empty - Interaction_Functions[7] = Dummy_Interaction; //empty - Interaction_Functions[8] = Dummy_Interaction; //empty - Interaction_Functions[9] = Dummy_Interaction; //empty + interaction_functions[6] = Dummy_Interaction; //empty + interaction_functions[7] = Dummy_Interaction; //empty + interaction_functions[8] = Dummy_Interaction; //empty + interaction_functions[9] = Dummy_Interaction; //empty } void Compute_Bonded_Forces( reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, static_storage *workspace, reax_list **lists, + output_controls *out_control, interaction_function *interaction_functions ) { int i; @@ -116,7 +113,7 @@ void Compute_Bonded_Forces( reax_system *system, control_params *control, /* Implement all the function calls as function pointers */ for ( i = 0; i < NO_OF_INTERACTIONS; i++ ) { - (Interaction_Functions[i])( system, control, data, workspace, + (interaction_functions[i])( system, control, data, workspace, lists, out_control ); #if defined(DEBUG_FOCUS) @@ -329,7 +326,7 @@ void Validate_Lists( static_storage *workspace, reax_list **lists, int step, int static inline real Init_Charge_Matrix_Entry_Tab( reax_system *system, - control_params *control, int i, int j, + control_params *control, LR_lookup_table **LR, int i, int j, real r_ij, MATRIX_ENTRY_POSITION pos ) { int r; @@ -1009,8 +1006,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control, twbp = &(system->reaxprm.tbp[type_i][type_j]); H->j[Htop] = j; - H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control, i, j, - r_ij, OFF_DIAGONAL ); + H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control, + workspace->LR, i, j, r_ij, OFF_DIAGONAL ); ++Htop; /* H_sp matrix entry */ @@ -1165,8 +1162,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control, /* diagonal entry */ H->j[Htop] = i; - H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control, i, j, - r_ij, DIAGONAL ); + H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control, workspace->LR, + i, j, r_ij, DIAGONAL ); ++Htop; H_sp->j[H_sp_top] = i; @@ -1301,7 +1298,8 @@ void Estimate_Storage_Sizes( reax_system *system, control_params *control, void Compute_Forces( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, - reax_list** lists, output_controls *out_control ) + reax_list** lists, output_controls *out_control, + interaction_function *interaction_functions ) { real t_start, t_elapsed; @@ -1322,7 +1320,8 @@ void Compute_Forces( reax_system *system, control_params *control, #endif t_start = Get_Time( ); - Compute_Bonded_Forces( system, control, data, workspace, lists, out_control ); + Compute_Bonded_Forces( system, control, data, workspace, lists, out_control, + interaction_functions ); t_elapsed = Get_Timing_Info( t_start ); data->timing.bonded += t_elapsed; diff --git a/sPuReMD/src/forces.h b/sPuReMD/src/forces.h index 477732584d7fa7515a286f70d1b2343a5459f09c..eeccac22c003250eae1bad325fc5ccdad08d9d88 100644 --- a/sPuReMD/src/forces.h +++ b/sPuReMD/src/forces.h @@ -24,10 +24,12 @@ #include "mytypes.h" -void Init_Bonded_Force_Functions( control_params* ); +void Init_Bonded_Force_Functions( control_params*, + interaction_function* ); void Compute_Forces( reax_system*, control_params*, simulation_data*, - static_storage*, reax_list**, output_controls* ); + static_storage*, reax_list**, output_controls*, + interaction_function* ); void Estimate_Storage_Sizes( reax_system*, control_params*, reax_list**, int*, int*, int*, int* ); diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index c071a46f18e6df9a36ea0ee96a84d64ad5b43132..01b0fc5593bbdb562b374b86227764edbfbc553e 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -942,7 +942,7 @@ void Init_Out_Controls( reax_system *system, control_params *control, void Initialize( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, reax_list **lists, output_controls *out_control, evolve_function *Evolve, - const int output_enabled ) + interaction_function *interaction_functions, const int output_enabled ) { #if defined(DEBUG) real start, end; @@ -972,7 +972,7 @@ void Initialize( reax_system *system, control_params *control, } /* These are done in forces.c, only forces.c can see all those functions */ - Init_Bonded_Force_Functions( control ); + Init_Bonded_Force_Functions( control, interaction_functions ); #ifdef TEST_FORCES Init_Force_Test_Functions( ); @@ -984,7 +984,7 @@ void Initialize( reax_system *system, control_params *control, start = Get_Time( ); #endif - Make_LR_Lookup_Table( system, control ); + Make_LR_Lookup_Table( system, control, workspace ); #if defined(DEBUG) end = Get_Timing_Info( start ); @@ -1446,7 +1446,7 @@ void Finalize( reax_system *system, control_params *control, { if ( control->tabulate ) { - Finalize_LR_Lookup_Table( system, control ); + Finalize_LR_Lookup_Table( system, control, workspace ); } if ( output_enabled == TRUE ) diff --git a/sPuReMD/src/init_md.h b/sPuReMD/src/init_md.h index 0643e0dd739fbbe8c662a8fa5e8ff1f651aabecc..aff4d01d1cbed94e6e271b50ac962ac377891a4e 100644 --- a/sPuReMD/src/init_md.h +++ b/sPuReMD/src/init_md.h @@ -27,7 +27,7 @@ void Initialize( reax_system*, control_params*, simulation_data*, static_storage*, reax_list**, output_controls*, evolve_function*, - const int ); + interaction_function *, const int ); void Finalize( reax_system*, control_params*, simulation_data*, static_storage*, reax_list**, output_controls*, const int ); diff --git a/sPuReMD/src/integrate.c b/sPuReMD/src/integrate.c index 567e96a03bbfb01241b887512cd05c40fbd47dac..aff852c30006753fc38cd2890906a65892cf77ee 100644 --- a/sPuReMD/src/integrate.c +++ b/sPuReMD/src/integrate.c @@ -35,9 +35,10 @@ #include "list.h" -void Velocity_Verlet_NVE(reax_system* system, control_params* control, +void Velocity_Verlet_NVE(reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, - reax_list **lists, output_controls *out_control ) + reax_list **lists, output_controls *out_control, + interaction_function *interaction_functions ) { int i, steps, renbr; real inv_m, dt, dt_sqr; @@ -75,7 +76,8 @@ void Velocity_Verlet_NVE(reax_system* system, control_params* control, Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); } - Compute_Forces( system, control, data, workspace, lists, out_control ); + Compute_Forces( system, control, data, workspace, lists, out_control, + interaction_functions ); for ( i = 0; i < system->N; i++ ) { @@ -93,7 +95,7 @@ void Velocity_Verlet_NVE(reax_system* system, control_params* control, void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* control, simulation_data *data, static_storage *workspace, reax_list **lists, - output_controls *out_control ) + output_controls *out_control, interaction_function *interaction_functions ) { int i, itr, steps, renbr; real inv_m, coef_v, dt, dt_sqr; @@ -138,7 +140,8 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* lists, out_control ); } /* Calculate Forces at time (t + dt) */ - Compute_Forces( system, control, data, workspace, lists, out_control ); + Compute_Forces( system, control, data, workspace, lists, out_control, + interaction_functions ); /* Compute iteration constants for each atom's velocity */ for ( i = 0; i < system->N; ++i ) @@ -206,11 +209,9 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein(reax_system* system, control_params* All box dimensions are scaled by the same amount, there is no change in the angles between axes. */ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, - control_params* control, - simulation_data *data, - static_storage *workspace, - reax_list **lists, - output_controls *out_control ) + control_params* control, simulation_data *data, static_storage *workspace, + reax_list **lists, output_controls *out_control, + interaction_function *interaction_functions ) { int i, steps, renbr; real inv_m, dt, lambda, mu; @@ -256,7 +257,8 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); } - Compute_Forces( system, control, data, workspace, lists, out_control ); + Compute_Forces( system, control, data, workspace, lists, out_control, + interaction_functions ); /* velocity verlet, 2nd part */ for ( i = 0; i < system->N; i++ ) @@ -321,11 +323,9 @@ void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system* system, All box dimensions are scaled by the same amount, there is no change in the angles between axes. */ void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system, - control_params* control, - simulation_data *data, - static_storage *workspace, - reax_list **lists, - output_controls *out_control ) + control_params* control, simulation_data *data, static_storage *workspace, + reax_list **lists, output_controls *out_control, + interaction_function *interaction_functions ) { int i, d, steps, renbr; real dt, inv_m, lambda; @@ -371,7 +371,8 @@ void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system* system, Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); } - Compute_Forces( system, control, data, workspace, lists, out_control ); + Compute_Forces( system, control, data, workspace, lists, out_control, + interaction_functions ); /* velocity verlet, 2nd part */ for ( i = 0; i < system->N; i++ ) @@ -452,6 +453,7 @@ void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system, static_storage *workspace, reax_list **lists, output_controls *out_control ) + interaction_function *interaction_functions ) { int i; real inv_m; @@ -492,7 +494,8 @@ void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system, /* Compute_Charges( system, control, workspace, lists[FAR_NBRS], out_control ); fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */ - Compute_Forces( system, control, data, workspace, lists, out_control ); + Compute_Forces( system, control, data, workspace, lists, out_control, + interaction_functions ); fprintf(out_control->log, "forces\n"); fflush( out_control->log ); @@ -521,11 +524,9 @@ void Velocity_Verlet_Nose_Hoover_NVT(reax_system* system, void Velocity_Verlet_Isotropic_NPT( reax_system* system, - control_params* control, - simulation_data *data, - static_storage *workspace, - reax_list **lists, - output_controls *out_control ) + control_params* control, simulation_data *data, + static_storage *workspace, reax_list **lists, + output_controls *out_control ) { int i, itr; real deps, v_eps_new = 0, v_eps_old = 0, G_xi_new; @@ -598,7 +599,8 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system, /* Compute_Charges( system, control, workspace, lists[FAR_NBRS], out_control ); fprintf(out_control->log,"qeq-"); fflush( out_control->log ); */ - Compute_Forces( system, control, data, workspace, lists, out_control ); + Compute_Forces( system, control, data, workspace, lists, + out_control, interaction_functions ); fprintf(out_control->log, "forces\n"); fflush( out_control->log ); @@ -698,12 +700,10 @@ void Velocity_Verlet_Isotropic_NPT( reax_system* system, All box dimensions are scaled by the same amount, there is no change in the angles between axes. */ void Velocity_Verlet_Berendsen_NVT( reax_system* system, - control_params* control, - simulation_data *data, - static_storage *workspace, - reax_list **lists, - output_controls *out_control - ) + control_params* control, simulation_data *data, + static_storage *workspace, reax_list **lists, + output_controls *out_control, + interaction_function *interaction_functions ) { int i, steps, renbr; real inv_m, dt, lambda; @@ -746,7 +746,7 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system, Generate_Neighbor_Lists( system, control, data, workspace, lists, out_control ); Compute_Forces( system, control, data, workspace, - lists, out_control ); + lists, out_control, interaction_functions ); /* velocity verlet, 2nd part */ for ( i = 0; i < system->N; i++ ) diff --git a/sPuReMD/src/integrate.h b/sPuReMD/src/integrate.h index 1a821fe8d90c240355305e2b56cb9eee1fed9e15..8b847601c012dda8d6192b1d116a5531870e1f3e 100644 --- a/sPuReMD/src/integrate.h +++ b/sPuReMD/src/integrate.h @@ -22,36 +22,41 @@ #ifndef __INTEGRATE_H_ #define __INTEGRATE_H_ + #include "mytypes.h" + void Velocity_Verlet_NVE( reax_system*, control_params*, simulation_data*, - static_storage*, reax_list**, output_controls* ); + static_storage*, reax_list**, output_controls*, + interaction_function* ); + void Velocity_Verlet_Nose_Hoover_NVT( reax_system*, control_params*, - simulation_data*, static_storage*, - reax_list**, output_controls* ); + simulation_data*, static_storage*, reax_list**, output_controls*, + interaction_function* ); + void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system*, control_params*, - simulation_data*, static_storage*, - reax_list**, output_controls* ); + simulation_data*, static_storage*, reax_list**, output_controls*, + interaction_function* ); + void Velocity_Verlet_Flexible_NPT( reax_system*, control_params*, - simulation_data*, static_storage*, - reax_list**, output_controls* ); + simulation_data*, static_storage*, reax_list**, output_controls*, + interaction_function* ); + void Velocity_Verlet_Isotropic_NPT( reax_system*, control_params*, - simulation_data*, static_storage*, - reax_list**, output_controls* ); + simulation_data*, static_storage*, reax_list**, output_controls*, + interaction_function* ); + void Velocity_Verlet_Berendsen_Isotropic_NPT( reax_system*, control_params*, - simulation_data*, static_storage*, - reax_list**, output_controls* ); + simulation_data*, static_storage*, reax_list**, output_controls*, + interaction_function* ); + void Velocity_Verlet_Berendsen_SemiIsotropic_NPT( reax_system*, control_params*, - simulation_data*, - static_storage*, reax_list**, - output_controls* ); - -//upon Adri's request moved from parallel code to serial code -void Velocity_Verlet_Berendsen_NVT( reax_system* , - control_params* , - simulation_data *, - static_storage *, - reax_list **, - output_controls * - ); + simulation_data*, static_storage*, reax_list**, output_controls*, + interaction_function* ); + +void Velocity_Verlet_Berendsen_NVT( reax_system* , control_params* , + simulation_data *, static_storage *, reax_list **, output_controls *, + interaction_function* ); + + #endif diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index c7db2641e3eeb2d92da124254393becef33b2456..5ea019d003c2e42e8f2b20dc16cd8e429e633e3c 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -20,6 +20,7 @@ ----------------------------------------------------------------------*/ #include "lin_alg.h" + #include "allocate.h" #include "tool_box.h" #include "vector.h" @@ -34,6 +35,7 @@ #include "lapacke.h" #endif + typedef struct { unsigned int j; @@ -41,39 +43,57 @@ typedef struct } sparse_matrix_entry; +enum preconditioner_type +{ + LEFT = 0, + RIGHT = 1, +}; + + /* global to make OpenMP shared (Sparse_MatVec) */ #ifdef _OPENMP -real *b_local = NULL; +static real *b_local = NULL; #endif /* global to make OpenMP shared (apply_preconditioner) */ -real *Dinv_L = NULL, *Dinv_U = NULL; +static real *Dinv_L = NULL; +static real *Dinv_U = NULL; /* global to make OpenMP shared (tri_solve_level_sched) */ -int levels = 1; -int levels_L = 1, levels_U = 1; -unsigned int *row_levels_L = NULL, *level_rows_L = NULL, *level_rows_cnt_L = NULL; -unsigned int *row_levels_U = NULL, *level_rows_U = NULL, *level_rows_cnt_U = NULL; -unsigned int *row_levels, *level_rows, *level_rows_cnt; -unsigned int *top = NULL; +static int levels = 1; +static int levels_L = 1; +static int levels_U = 1; +static unsigned int *row_levels_L = NULL; +static unsigned int *level_rows_L = NULL; +static unsigned int *level_rows_cnt_L = NULL; +static unsigned int *row_levels_U = NULL; +static unsigned int *level_rows_U = NULL; +static unsigned int *level_rows_cnt_U = NULL; +static unsigned int *row_levels; +static unsigned int *level_rows; +static unsigned int *level_rows_cnt; +static unsigned int *top = NULL; /* global to make OpenMP shared (graph_coloring) */ -unsigned int *color = NULL; -unsigned int *to_color = NULL; -unsigned int *conflict = NULL; -unsigned int *conflict_cnt = NULL; -unsigned int *temp_ptr; -unsigned int *recolor = NULL; -unsigned int recolor_cnt; -unsigned int *color_top = NULL; +static unsigned int *color = NULL; +static unsigned int *to_color = NULL; +static unsigned int *conflict = NULL; +static unsigned int *conflict_cnt = NULL; +static unsigned int *temp_ptr; +static unsigned int *recolor = NULL; +static unsigned int recolor_cnt; +static unsigned int *color_top = NULL; /* global to make OpenMP shared (sort_colors) */ -unsigned int *permuted_row_col = NULL; -unsigned int *permuted_row_col_inv = NULL; -real *y_p = NULL; +static unsigned int *permuted_row_col = NULL; +static unsigned int *permuted_row_col_inv = NULL; +static real *y_p = NULL; /* global to make OpenMP shared (permute_vector) */ -real *x_p = NULL; -unsigned int *mapping = NULL; -sparse_matrix *H_full; -sparse_matrix *H_p; +static real *x_p = NULL; +static unsigned int *mapping = NULL; +static sparse_matrix *H_full; +static sparse_matrix *H_p; /* global to make OpenMP shared (jacobi_iter) */ -real *Dinv_b = NULL, *rp = NULL, *rp2 = NULL, *rp3 = NULL; +static real *Dinv_b = NULL; +static real *rp = NULL; +static real *rp2 = NULL; +static real *rp3 = NULL; #if defined(TEST_MAT) @@ -2734,8 +2754,10 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, const real * const b, real * const x, const TRIANGULARITY tri, const unsigned int maxiter ) { - unsigned int i, k, si = 0, ei = 0, iter; + unsigned int i, k, si, ei, iter; + si = 0; + ei = 0; iter = 0; #ifdef _OPENMP @@ -2815,176 +2837,274 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, } -/* Solve triangular system LU*x = y using level scheduling +/* Apply left-sided preconditioning while solver M^{-1}Ax = M^{-1}b * - * workspace: data struct containing matrices, lower/upper triangular, stored in CSR + * workspace: data struct containing matrices, stored in CSR * control: data struct containing parameters - * y: constants in linear system (RHS) - * x: solution + * y: vector to which to apply preconditioning, + * specific to internals of iterative solver being used + * x: preconditioned vector (output) * fresh_pre: parameter indicating if this is a newly computed (fresh) preconditioner + * side: used in determining how to apply preconditioner if the preconditioner is + * factorized as M = M_{1}M_{2} (e.g., incomplete LU, A \approx LU) * * Assumptions: * Matrices have non-zero diagonals * Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */ static void apply_preconditioner( const static_storage * const workspace, const control_params * const control, - const real * const y, real * const x, const int fresh_pre ) + const real * const y, real * const x, const int fresh_pre, + const int side ) { int i, si; /* no preconditioning */ if ( control->cm_solver_pre_comp_type == NONE_PC ) { - Vector_Copy( x, y, workspace->H->n ); + if ( x != y ) + { + Vector_Copy( x, y, workspace->H->n ); + } } else { - switch ( control->cm_solver_pre_app_type ) + switch ( side ) { - case TRI_SOLVE_PA: - switch ( control->cm_solver_pre_comp_type ) + case LEFT: + switch ( control->cm_solver_pre_app_type ) { - case DIAG_PC: - diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); - break; - case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: - tri_solve( workspace->L, y, x, workspace->L->n, LOWER ); - tri_solve( workspace->U, x, x, workspace->U->n, UPPER ); - break; - case SAI_PC: - Sparse_MatVec_full( workspace->H_app_inv, y, x ); - break; - default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); - exit( INVALID_INPUT ); - break; - } - break; - case TRI_SOLVE_LEVEL_SCHED_PA: - switch ( control->cm_solver_pre_comp_type ) - { - case DIAG_PC: - diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); - break; - case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: - tri_solve_level_sched( workspace->L, y, x, workspace->L->n, LOWER, fresh_pre ); - tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre ); + case TRI_SOLVE_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve( workspace->L, y, x, workspace->L->n, LOWER ); + break; + case SAI_PC: + Sparse_MatVec_full( workspace->H_app_inv, y, x ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } break; - case SAI_PC: - Sparse_MatVec_full( workspace->H_app_inv, y, x ); + case TRI_SOLVE_LEVEL_SCHED_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve_level_sched( workspace->L, y, x, workspace->L->n, LOWER, fresh_pre ); + break; + case SAI_PC: + Sparse_MatVec_full( workspace->H_app_inv, y, x ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } break; - default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); - exit( INVALID_INPUT ); + case TRI_SOLVE_GC_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + case SAI_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: +#ifdef _OPENMP + #pragma omp single +#endif + { + memcpy( y_p, y, sizeof(real) * workspace->H->n ); + } + + permute_vector( y_p, workspace->H->n, FALSE, LOWER ); + tri_solve_level_sched( workspace->L, y_p, x, workspace->L->n, LOWER, fresh_pre ); break; - } - break; - case TRI_SOLVE_GC_PA: - switch ( control->cm_solver_pre_comp_type ) - { - case DIAG_PC: - case SAI_PC: - fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); - exit( INVALID_INPUT ); + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } break; - case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case JACOBI_ITER_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + case SAI_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: #ifdef _OPENMP - #pragma omp single + #pragma omp single #endif - { - memcpy( y_p, y, sizeof(real) * workspace->H->n ); - } + { + if ( Dinv_L == NULL ) + { + Dinv_L = (real*) smalloc( sizeof(real) * workspace->L->n, + "apply_preconditioner::Dinv_L" ); + } + } - permute_vector( y_p, workspace->H->n, FALSE, LOWER ); - tri_solve_level_sched( workspace->L, y_p, x, workspace->L->n, LOWER, fresh_pre ); - tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre ); - permute_vector( x, workspace->H->n, TRUE, UPPER ); - break; + /* construct D^{-1}_L */ + if ( fresh_pre == TRUE ) + { +#ifdef _OPENMP + #pragma omp for schedule(static) +#endif + for ( i = 0; i < workspace->L->n; ++i ) + { + si = workspace->L->start[i + 1] - 1; + Dinv_L[i] = 1.0 / workspace->L->val[si]; + } + } + + jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + break; default: fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; + } break; - case JACOBI_ITER_PA: - switch ( control->cm_solver_pre_comp_type ) + + case RIGHT: + switch ( control->cm_solver_pre_app_type ) { - case DIAG_PC: - case SAI_PC: - fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); - exit( INVALID_INPUT ); + case TRI_SOLVE_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + case SAI_PC: + if ( x != y ) + { + Vector_Copy( x, y, workspace->H->n ); + } + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve( workspace->U, y, x, workspace->U->n, UPPER ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } break; - case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: -#ifdef _OPENMP - #pragma omp single -#endif - { - if ( Dinv_L == NULL ) + case TRI_SOLVE_LEVEL_SCHED_PA: + switch ( control->cm_solver_pre_comp_type ) { - Dinv_L = (real*) smalloc( sizeof(real) * workspace->L->n, - "apply_preconditioner::Dinv_L" ); + case DIAG_PC: + case SAI_PC: + if ( x != y ) + { + Vector_Copy( x, y, workspace->H->n ); + } + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve_level_sched( workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; } - } - - /* construct D^{-1}_L */ - if ( fresh_pre == TRUE ) - { -#ifdef _OPENMP - #pragma omp for schedule(static) -#endif - for ( i = 0; i < workspace->L->n; ++i ) + break; + case TRI_SOLVE_GC_PA: + switch ( control->cm_solver_pre_comp_type ) { - si = workspace->L->start[i + 1] - 1; - Dinv_L[i] = 1. / workspace->L->val[si]; + case DIAG_PC: + case SAI_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve_level_sched( workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); + permute_vector( x, workspace->H->n, TRUE, UPPER ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; } - } - - jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters ); - + break; + case JACOBI_ITER_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + case SAI_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: #ifdef _OPENMP - #pragma omp single + #pragma omp single #endif - { - if ( Dinv_U == NULL ) { - Dinv_U = (real*) smalloc( sizeof(real) * workspace->U->n, - "apply_preconditioner::Dinv_U" ); + if ( Dinv_U == NULL ) + { + Dinv_U = (real*) smalloc( sizeof(real) * workspace->U->n, + "apply_preconditioner::Dinv_U" ); + } } - } - /* construct D^{-1}_U */ - if ( fresh_pre == TRUE ) - { + /* construct D^{-1}_U */ + if ( fresh_pre == TRUE ) + { #ifdef _OPENMP - #pragma omp for schedule(static) + #pragma omp for schedule(static) #endif - for ( i = 0; i < workspace->U->n; ++i ) - { - si = workspace->U->start[i]; - Dinv_U[i] = 1. / workspace->U->val[si]; + for ( i = 0; i < workspace->U->n; ++i ) + { + si = workspace->U->start[i]; + Dinv_U[i] = 1. / workspace->U->val[si]; + } } - } - jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->cm_solver_pre_app_jacobi_iters ); - break; + jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->cm_solver_pre_app_jacobi_iters ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + break; default: fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; + } break; - default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); - exit( INVALID_INPUT ); - break; - } } } @@ -2996,17 +3116,22 @@ int GMRES( const static_storage * const workspace, const control_params * const const real tol, real * const x, const int fresh_pre ) { int i, j, k, itr, N, g_j, g_itr; - real cc, tmp1, tmp2, temp, ret_temp, bnorm; + real cc, tmp1, tmp2, temp, bnorm; real t_start, t_ortho, t_pa, t_spmv, t_ts, t_vops; N = H->n; g_j = 0; g_itr = 0; + t_ortho = 0.0; + t_pa = 0.0; + t_spmv = 0.0; + t_ts = 0.0; + t_vops = 0.0; #ifdef _OPENMP #pragma omp parallel default(none) \ - private(i, j, k, itr, bnorm, ret_temp, t_start) \ - shared(N, cc, tmp1, tmp2, temp, g_itr, g_j, stderr) \ + private(i, j, k, itr, bnorm, temp, t_start) \ + shared(N, cc, tmp1, tmp2, g_itr, g_j, stderr) \ reduction(+: t_ortho, t_pa, t_spmv, t_ts, t_vops) #endif { @@ -3031,25 +3156,25 @@ int GMRES( const static_storage * const workspace, const control_params * const t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); - Vector_Sum( workspace->b_prc, 1., b, -1., workspace->b_prm, N ); + Vector_Sum( workspace->b_prc, 1.0, b, -1.0, workspace->b_prm, N ); t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); - apply_preconditioner( workspace, control, workspace->b_prc, workspace->v[0], - itr == 0 ? fresh_pre : FALSE ); + apply_preconditioner( workspace, control, workspace->b_prc, workspace->b_prm, + itr == 0 ? fresh_pre : FALSE, LEFT ); + apply_preconditioner( workspace, control, workspace->b_prm, workspace->v[0], + itr == 0 ? fresh_pre : FALSE, RIGHT ); t_pa += Get_Timing_Info( t_start ); t_start = Get_Time( ); - ret_temp = Norm( workspace->v[0], N ); + temp = Norm( workspace->v[0], N ); #ifdef _OPENMP #pragma omp single #endif - { - workspace->g[0] = ret_temp; - } + workspace->g[0] = temp; - Vector_Scale( workspace->v[0], 1. / ret_temp, workspace->v[0], N ); + Vector_Scale( workspace->v[0], 1.0 / temp, workspace->v[0], N ); t_vops += Get_Timing_Info( t_start ); /* GMRES inner-loop */ @@ -3061,87 +3186,40 @@ int GMRES( const static_storage * const workspace, const control_params * const t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); - apply_preconditioner( workspace, control, workspace->b_prc, workspace->v[j + 1], FALSE ); + apply_preconditioner( workspace, control, workspace->b_prc, + workspace->b_prm, FALSE, LEFT ); + apply_preconditioner( workspace, control, workspace->b_prm, + workspace->v[j + 1], FALSE, RIGHT ); t_pa += Get_Timing_Info( t_start ); -// if ( control->cm_solver_pre_comp_type == DIAG_PC ) -// { /* apply modified Gram-Schmidt to orthogonalize the new residual */ t_start = Get_Time( ); for ( i = 0; i <= j; i++ ) { - ret_temp = Dot( workspace->v[i], workspace->v[j + 1], N ); + temp = Dot( workspace->v[i], workspace->v[j + 1], N ); #ifdef _OPENMP #pragma omp single #endif - { - workspace->h[i][j] = ret_temp; - } + workspace->h[i][j] = temp; - Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); + Vector_Add( workspace->v[j + 1], -1.0 * temp, workspace->v[i], N ); } t_vops += Get_Timing_Info( t_start ); -// } -// else -// { -// //TODO: investigate correctness of not explicitly orthogonalizing first few vectors -// /* apply modified Gram-Schmidt to orthogonalize the new residual */ -// -// -// -// { -// t_start = Get_Time( ); -// } -// -// #pragma omp single -// -// { -// for ( i = 0; i < j - 1; i++ ) -// { -// workspace->h[i][j] = 0.0; -// } -// } -// -// for ( i = MAX(j - 1, 0); i <= j; i++ ) -// { -// ret_temp = Dot( workspace->v[i], workspace->v[j + 1], N ); -// -// #pragma omp single -// -// { -// workspace->h[i][j] = ret_temp; -// } -// -// Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); -// } -// -// -// -// { -// t_vops += Get_Timing_Info( t_start ); -// } -// } t_start = Get_Time( ); - ret_temp = Norm( workspace->v[j + 1], N ); + temp = Norm( workspace->v[j + 1], N ); #ifdef _OPENMP #pragma omp single #endif - { - workspace->h[j + 1][j] = ret_temp; - } + workspace->h[j + 1][j] = temp; - Vector_Scale( workspace->v[j + 1], - 1.0 / workspace->h[j + 1][j], workspace->v[j + 1], N ); + Vector_Scale( workspace->v[j + 1], 1.0 / temp, workspace->v[j + 1], N ); t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); -// if ( control->cm_solver_pre_comp_type == NONE_PC || -// control->cm_solver_pre_comp_type == DIAG_PC ) -// { /* Givens rotations on the upper-Hessenberg matrix to make it U */ #ifdef _OPENMP #pragma omp single @@ -3156,50 +3234,23 @@ int GMRES( const static_storage * const workspace, const control_params * const workspace->hs[j] = workspace->h[j + 1][j] / cc; } - tmp1 = workspace->hc[i] * workspace->h[i][j] + - workspace->hs[i] * workspace->h[i + 1][j]; - tmp2 = -workspace->hs[i] * workspace->h[i][j] + - workspace->hc[i] * workspace->h[i + 1][j]; + tmp1 = workspace->hc[i] * workspace->h[i][j] + + workspace->hs[i] * workspace->h[i + 1][j]; + tmp2 = -1.0 * workspace->hs[i] * workspace->h[i][j] + + workspace->hc[i] * workspace->h[i + 1][j]; workspace->h[i][j] = tmp1; workspace->h[i + 1][j] = tmp2; } -// } -// else -// { -// //TODO: investigate correctness of not explicitly orthogonalizing first few vectors -// /* Givens rotations on the upper-Hessenberg matrix to make it U */ -// for ( i = MAX(j - 1, 0); i <= j; i++ ) -// { -// if ( i == j ) -// { -// cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) ); -// workspace->hc[j] = workspace->h[j][j] / cc; -// workspace->hs[j] = workspace->h[j + 1][j] / cc; -// } -// -// tmp1 = workspace->hc[i] * workspace->h[i][j] + -// workspace->hs[i] * workspace->h[i + 1][j]; -// tmp2 = -workspace->hs[i] * workspace->h[i][j] + -// workspace->hc[i] * workspace->h[i + 1][j]; -// -// workspace->h[i][j] = tmp1; -// workspace->h[i + 1][j] = tmp2; -// } -// } /* apply Givens rotations to the rhs as well */ - tmp1 = workspace->hc[j] * workspace->g[j]; - tmp2 = -workspace->hs[j] * workspace->g[j]; + tmp1 = workspace->hc[j] * workspace->g[j]; + tmp2 = -1.0 * workspace->hs[j] * workspace->g[j]; workspace->g[j] = tmp1; workspace->g[j + 1] = tmp2; } t_ortho += Get_Timing_Info( t_start ); - -#ifdef _OPENMP - #pragma omp barrier -#endif } /* solve Hy = g: H is now upper-triangular, do back-substitution */ @@ -3230,7 +3281,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N ); } - Vector_Add( x, 1., workspace->p, N ); + Vector_Add( x, 1.0, workspace->p, N ); t_vops += Get_Timing_Info( t_start ); /* stopping condition */ @@ -3266,10 +3317,10 @@ int GMRES( const static_storage * const workspace, const control_params * const if ( g_itr >= control->cm_solver_max_iters ) { fprintf( stderr, "[WARNING] GMRES convergence failed (%d outer iters)\n", g_itr ); - return g_itr * (control->cm_solver_restart + 1) + g_j + 1; + return g_itr * control->cm_solver_restart + g_j; } - return g_itr * (control->cm_solver_restart + 1) + g_j + 1; + return g_itr * control->cm_solver_restart + g_j + 1; } @@ -3279,21 +3330,27 @@ int GMRES_HouseHolder( const static_storage * const workspace, real * const x, const int fresh_pre ) { int i, j, k, itr, N, g_j, g_itr; - real cc, tmp1, tmp2, temp, bnorm, ret_temp; + real cc, tmp1, tmp2, temp, bnorm; real v[10000], z[control->cm_solver_restart + 2][10000], w[control->cm_solver_restart + 2]; real u[control->cm_solver_restart + 2][10000]; real t_start, t_ortho, t_pa, t_spmv, t_ts, t_vops; j = 0; N = H->n; + t_ortho = 0.0; + t_pa = 0.0; + t_spmv = 0.0; + t_ts = 0.0; + t_vops = 0.0; #ifdef _OPENMP #pragma omp parallel default(none) \ - private(i, j, k, itr, bnorm, ret_temp, t_start) \ - shared(v, z, w, u, tol, N, cc, tmp1, tmp2, temp, g_itr, g_j, stderr) \ + private(i, j, k, itr, bnorm, temp, t_start) \ + shared(v, z, w, u, tol, N, cc, tmp1, tmp2, g_itr, g_j, stderr) \ reduction(+: t_ortho, t_pa, t_spmv, t_ts, t_vops) #endif { + j = 0; t_ortho = 0.0; t_pa = 0.0; t_spmv = 0.0; @@ -3319,7 +3376,10 @@ int GMRES_HouseHolder( const static_storage * const workspace, t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); - apply_preconditioner( workspace, control, workspace->b_prc, z[0], fresh_pre ); + apply_preconditioner( workspace, control, workspace->b_prc, + workspace->b_prm, fresh_pre, LEFT ); + apply_preconditioner( workspace, control, workspace->b_prm, + z[0], fresh_pre, RIGHT ); t_pa += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3353,7 +3413,10 @@ int GMRES_HouseHolder( const static_storage * const workspace, t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); - apply_preconditioner( workspace, control, workspace->b_prc, v, fresh_pre ); + apply_preconditioner( workspace, control, workspace->b_prc, + workspace->b_prm, fresh_pre, LEFT ); + apply_preconditioner( workspace, control, workspace->b_prm, + v, fresh_pre, RIGHT ); t_pa += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3367,11 +3430,11 @@ int GMRES_HouseHolder( const static_storage * const workspace, /* compute the HouseHolder unit vector u_j+1 */ Vector_MakeZero( u[j + 1], j + 1 ); Vector_Copy( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ); - ret_temp = Norm( v + (j + 1), N - (j + 1) ); + temp = Norm( v + (j + 1), N - (j + 1) ); #ifdef _OPENMP #pragma omp single #endif - u[j + 1][j + 1] += ( v[j + 1] < 0.0 ? -1 : 1 ) * ret_temp; + u[j + 1][j + 1] += ( v[j + 1] < 0.0 ? -1 : 1 ) * temp; #ifdef _OPENMP #pragma omp barrier @@ -3380,11 +3443,11 @@ int GMRES_HouseHolder( const static_storage * const workspace, Vector_Scale( u[j + 1], 1 / Norm( u[j + 1], N ), u[j + 1], N ); /* overwrite v with P_m+1 * v */ - ret_temp = 2 * Dot( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ) * u[j + 1][j + 1]; + temp = 2 * Dot( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ) * u[j + 1][j + 1]; #ifdef _OPENMP #pragma omp single #endif - v[j + 1] -= ret_temp; + v[j + 1] -= temp; #ifdef _OPENMP #pragma omp barrier @@ -3523,6 +3586,9 @@ int CG( const static_storage * const workspace, const control_params * const con r = workspace->r; p = workspace->q; z = workspace->p; + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; #ifdef _OPENMP #pragma omp parallel default(none) \ @@ -3549,7 +3615,8 @@ int CG( const static_storage * const workspace, const control_params * const con t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); - apply_preconditioner( workspace, control, r, z, fresh_pre ); + apply_preconditioner( workspace, control, r, d, fresh_pre, LEFT ); + apply_preconditioner( workspace, control, d, z, fresh_pre, RIGHT ); t_pa += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3572,7 +3639,8 @@ int CG( const static_storage * const workspace, const control_params * const con t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); - apply_preconditioner( workspace, control, r, z, FALSE ); + apply_preconditioner( workspace, control, r, d, FALSE, LEFT ); + apply_preconditioner( workspace, control, d, z, FALSE, RIGHT ); t_pa += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3620,6 +3688,9 @@ int SDM( const static_storage * const workspace, const control_params * const co real t_start, t_pa, t_spmv, t_vops; N = H->n; + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; #ifdef _OPENMP #pragma omp parallel default(none) \ @@ -3645,7 +3716,8 @@ int SDM( const static_storage * const workspace, const control_params * const co t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); - apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre ); + apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre, LEFT ); + apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre, RIGHT ); t_pa += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3677,7 +3749,8 @@ int SDM( const static_storage * const workspace, const control_params * const co t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); - apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE ); + apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE, LEFT ); + apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE, RIGHT ); t_pa += Get_Timing_Info( t_start ); } diff --git a/sPuReMD/src/lookup.c b/sPuReMD/src/lookup.c index 7ec372f4392fea03e81219eb2780c64ab45b7e4c..c10931436b73d78feb0be46d0b905028ff6e64e7 100644 --- a/sPuReMD/src/lookup.c +++ b/sPuReMD/src/lookup.c @@ -249,7 +249,8 @@ void LR_Lookup( LR_lookup_table *t, real r, LR_data *y ) } -void Make_LR_Lookup_Table( reax_system *system, control_params *control ) +void Make_LR_Lookup_Table( reax_system *system, control_params *control, + static_storage *workspace ) { int i, j, r; int num_atom_types; @@ -286,11 +287,11 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) /* allocate Long-Range LookUp Table space based on number of atom types in the ffield file */ - LR = (LR_lookup_table**) smalloc( num_atom_types * sizeof(LR_lookup_table*), + workspace->LR = (LR_lookup_table**) smalloc( num_atom_types * sizeof(LR_lookup_table*), "Make_LR_Lookup_Table::LR" ); for ( i = 0; i < num_atom_types; ++i ) { - LR[i] = (LR_lookup_table*) smalloc( num_atom_types * sizeof(LR_lookup_table), + workspace->LR[i] = (LR_lookup_table*) smalloc( num_atom_types * sizeof(LR_lookup_table), "Make_LR_Lookup_Table::LR[i]"); } @@ -316,61 +317,62 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) { if ( existing_types[j] ) { - LR[i][j].xmin = 0; - LR[i][j].xmax = control->r_cut; - LR[i][j].n = control->tabulate + 1; - LR[i][j].dx = dr; - LR[i][j].inv_dx = control->tabulate / control->r_cut; - LR[i][j].y = (LR_data*) - smalloc( LR[i][j].n * sizeof(LR_data), + workspace->LR[i][j].xmin = 0; + workspace->LR[i][j].xmax = control->r_cut; + workspace->LR[i][j].n = control->tabulate + 1; + workspace->LR[i][j].dx = dr; + workspace->LR[i][j].inv_dx = control->tabulate / control->r_cut; + workspace->LR[i][j].y = (LR_data*) + smalloc( workspace->LR[i][j].n * sizeof(LR_data), "Make_LR_Lookup_Table::LR[i][j].y" ); - LR[i][j].H = (cubic_spline_coef*) - smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + workspace->LR[i][j].H = (cubic_spline_coef*) + smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef), "Make_LR_Lookup_Table::LR[i][j].H" ); - LR[i][j].vdW = (cubic_spline_coef*) - smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + workspace->LR[i][j].vdW = (cubic_spline_coef*) + smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef), "Make_LR_Lookup_Table::LR[i][j].vdW" ); - LR[i][j].CEvd = (cubic_spline_coef*) - smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + workspace->LR[i][j].CEvd = (cubic_spline_coef*) + smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef), "Make_LR_Lookup_Table::LR[i][j].CEvd" ); - LR[i][j].ele = (cubic_spline_coef*) - smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + workspace->LR[i][j].ele = (cubic_spline_coef*) + smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef), "Make_LR_Lookup_Table::LR[i][j].ele" ); - LR[i][j].CEclmb = (cubic_spline_coef*) - smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + workspace->LR[i][j].CEclmb = (cubic_spline_coef*) + smalloc( workspace->LR[i][j].n * sizeof(cubic_spline_coef), "Make_LR_Lookup_Table::LR[i][j].CEclmb" ); for ( r = 1; r <= control->tabulate; ++r ) { - LR_vdW_Coulomb( system, control, i, j, r * dr, &(LR[i][j].y[r]) ); - h[r] = LR[i][j].dx; - fh[r] = LR[i][j].y[r].H; - fvdw[r] = LR[i][j].y[r].e_vdW; - fCEvd[r] = LR[i][j].y[r].CEvd; - fele[r] = LR[i][j].y[r].e_ele; - fCEclmb[r] = LR[i][j].y[r].CEclmb; + LR_vdW_Coulomb( system, control, i, j, r * dr, + &(workspace->LR[i][j].y[r]) ); + h[r] = workspace->LR[i][j].dx; + fh[r] = workspace->LR[i][j].y[r].H; + fvdw[r] = workspace->LR[i][j].y[r].e_vdW; + fCEvd[r] = workspace->LR[i][j].y[r].CEvd; + fele[r] = workspace->LR[i][j].y[r].e_ele; + fCEclmb[r] = workspace->LR[i][j].y[r].CEclmb; if ( r == 1 ) { - v0_vdw = LR[i][j].y[r].CEvd; - v0_ele = LR[i][j].y[r].CEclmb; + v0_vdw = workspace->LR[i][j].y[r].CEvd; + v0_ele = workspace->LR[i][j].y[r].CEclmb; } else if ( r == control->tabulate ) { - vlast_vdw = LR[i][j].y[r].CEvd; - vlast_ele = LR[i][j].y[r].CEclmb; + vlast_vdw = workspace->LR[i][j].y[r].CEvd; + vlast_ele = workspace->LR[i][j].y[r].CEclmb; } } Natural_Cubic_Spline( h, fh, - &(LR[i][j].H[1]), control->tabulate + 1 ); + &(workspace->LR[i][j].H[1]), control->tabulate + 1 ); // fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fh" ); // for( r = 1; r <= control->tabulate; ++r ) // fprintf( stderr, "%f %f %f\n", r * dr, h[r], fh[r] ); Complete_Cubic_Spline( h, fvdw, v0_vdw, vlast_vdw, - &(LR[i][j].vdW[1]), control->tabulate + 1 ); + &(workspace->LR[i][j].vdW[1]), control->tabulate + 1 ); // fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fvdw" ); // for( r = 1; r <= control->tabulate; ++r ) @@ -378,7 +380,7 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) // fprintf( stderr, "v0_vdw: %f, vlast_vdw: %f\n", v0_vdw, vlast_vdw ); Natural_Cubic_Spline( h, fCEvd, - &(LR[i][j].CEvd[1]), control->tabulate + 1 ); + &(workspace->LR[i][j].CEvd[1]), control->tabulate + 1 ); // fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fele" ); // for( r = 1; r <= control->tabulate; ++r ) @@ -386,7 +388,7 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) // fprintf( stderr, "v0_ele: %f, vlast_ele: %f\n", v0_ele, vlast_ele ); Complete_Cubic_Spline( h, fele, v0_ele, vlast_ele, - &(LR[i][j].ele[1]), control->tabulate + 1 ); + &(workspace->LR[i][j].ele[1]), control->tabulate + 1 ); // fprintf( stderr, "%-6s %-6s %-6s\n", "r", "h", "fele" ); // for( r = 1; r <= control->tabulate; ++r ) @@ -394,7 +396,7 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) // fprintf( stderr, "v0_ele: %f, vlast_ele: %f\n", v0_ele, vlast_ele ); Natural_Cubic_Spline( h, fCEclmb, - &(LR[i][j].CEclmb[1]), control->tabulate + 1 ); + &(workspace->LR[i][j].CEclmb[1]), control->tabulate + 1 ); } } } @@ -410,7 +412,7 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) for( r = 1; r <= 100; ++r ) { rand_dist = (real)rand()/RAND_MAX * control->r_cut; LR_vdW_Coulomb( system, control, i, j, rand_dist, &y ); - LR_Lookup( &(LR[i][j]), rand_dist, &y_spline ); + LR_Lookup( &(workspace->LR[i][j]), rand_dist, &y_spline ); evdw_abserr = FABS(y.e_vdW - y_spline.e_vdW); evdw_relerr = FABS(evdw_abserr / y.e_vdW); @@ -457,7 +459,8 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) } -void Finalize_LR_Lookup_Table( reax_system *system, control_params *control ) +void Finalize_LR_Lookup_Table( reax_system *system, control_params *control, + static_storage *workspace ) { int i, j; int num_atom_types; @@ -482,20 +485,20 @@ void Finalize_LR_Lookup_Table( reax_system *system, control_params *control ) { if ( existing_types[j] ) { - sfree( LR[i][j].y, "Finalize_LR_Lookup_Table::LR[i][j].y" ); - sfree( LR[i][j].H, "Finalize_LR_Lookup_Table::LR[i][j].H" ); - sfree( LR[i][j].vdW, "Finalize_LR_Lookup_Table::LR[i][j].vdW" ); - sfree( LR[i][j].CEvd, "Finalize_LR_Lookup_Table::LR[i][j].CEvd" ); - sfree( LR[i][j].ele, "Finalize_LR_Lookup_Table::LR[i][j].ele" ); - sfree( LR[i][j].CEclmb, "Finalize_LR_Lookup_Table::LR[i][j].CEclmb" ); + sfree( workspace->LR[i][j].y, "Finalize_LR_Lookup_Table::LR[i][j].y" ); + sfree( workspace->LR[i][j].H, "Finalize_LR_Lookup_Table::LR[i][j].H" ); + sfree( workspace->LR[i][j].vdW, "Finalize_LR_Lookup_Table::LR[i][j].vdW" ); + sfree( workspace->LR[i][j].CEvd, "Finalize_LR_Lookup_Table::LR[i][j].CEvd" ); + sfree( workspace->LR[i][j].ele, "Finalize_LR_Lookup_Table::LR[i][j].ele" ); + sfree( workspace->LR[i][j].CEclmb, "Finalize_LR_Lookup_Table::LR[i][j].CEclmb" ); } } } - sfree( LR[i], "Finalize_LR_Lookup_Table::LR[i]" ); + sfree( workspace->LR[i], "Finalize_LR_Lookup_Table::LR[i]" ); } - sfree( LR, "Finalize_LR_Lookup_Table::LR" ); + sfree( workspace->LR, "Finalize_LR_Lookup_Table::LR" ); } diff --git a/sPuReMD/src/lookup.h b/sPuReMD/src/lookup.h index 29720cfbc05b8522d62f209d42be3bd744b61b2c..6fbd6ffafb921fc41ff17be571e1fb7892f6dac1 100644 --- a/sPuReMD/src/lookup.h +++ b/sPuReMD/src/lookup.h @@ -35,9 +35,11 @@ int Lookup_Index_Of( real, lookup_table* ); real Lookup( real, lookup_table* ); -void Make_LR_Lookup_Table( reax_system*, control_params* ); +void Make_LR_Lookup_Table( reax_system*, control_params*, + static_storage* ); -void Finalize_LR_Lookup_Table( reax_system *, control_params * ); +void Finalize_LR_Lookup_Table( reax_system*, control_params*, + static_storage* ); #endif diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index 38d1e35c5c7e26990d777315c491cb653779f7fd..e46d217c766685ed6c6d6153f83e2679e100b6b0 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -869,6 +869,61 @@ typedef struct } reallocate_data; +typedef struct +{ + real H; + real e_vdW; + real CEvd; + real e_ele; + real CEclmb; +} LR_data; + + +typedef struct +{ + real a; + real b; + real c; + real d; +} cubic_spline_coef; + + +typedef struct +{ + real xmin; + real xmax; + int n; + real dx; + real inv_dx; + real a; + + real m; + real c; + + real *y; +} lookup_table; + + +typedef struct +{ + real xmin; + real xmax; + int n; + real dx; + real inv_dx; + real a; + real m; + real c; + + LR_data *y; + cubic_spline_coef *H; + cubic_spline_coef *vdW; + cubic_spline_coef *CEvd; + cubic_spline_coef *ele; + cubic_spline_coef *CEclmb; +} LR_lookup_table; + + typedef struct { /* bond order related storage */ @@ -948,6 +1003,8 @@ typedef struct reallocate_data realloc; + LR_lookup_table **LR; + #ifdef TEST_FORCES rvec *f_ele; rvec *f_vdw; @@ -1067,59 +1124,15 @@ typedef struct } output_controls; -typedef struct -{ - real H; - real e_vdW; - real CEvd; - real e_ele; - real CEclmb; -} LR_data; - - -typedef struct -{ - real a; - real b; - real c; - real d; -} cubic_spline_coef; - - -typedef struct -{ - real xmin; - real xmax; - int n; - real dx; - real inv_dx; - real a; - - real m; - real c; - - real *y; -} lookup_table; - +/* Function pointer definitions */ +typedef void (*interaction_function)(reax_system*, control_params*, + simulation_data*, static_storage*, reax_list**, output_controls*); -typedef struct -{ - real xmin; - real xmax; - int n; - real dx; - real inv_dx; - real a; - real m; - real c; +typedef void (*evolve_function)(reax_system*, control_params*, + simulation_data*, static_storage*, + reax_list**, output_controls*, interaction_function*); - LR_data *y; - cubic_spline_coef *H; - cubic_spline_coef *vdW; - cubic_spline_coef *CEvd; - cubic_spline_coef *ele; - cubic_spline_coef *CEclmb; -} LR_lookup_table; +typedef void (*callback_function)(reax_atom*, simulation_data*, reax_list*); /* Handle for working with an instance of the sPuReMD library */ @@ -1139,20 +1152,11 @@ typedef struct output_controls *out_control; /* TRUE if file I/O for simulation output enabled, FALSE otherwise */ int output_enabled; + /* */ + interaction_function interaction_functions[NO_OF_INTERACTIONS]; + /* Callback for getting simulation state at the end of each time step */ + callback_function callback; } spuremd_handle; -/* Function pointer definitions */ -typedef void (*interaction_function)(reax_system*, control_params*, - simulation_data*, static_storage*, reax_list**, output_controls*); - -typedef void (*evolve_function)(reax_system*, control_params*, - simulation_data*, static_storage*, - reax_list**, output_controls*); - - -/* Global variables */ -LR_lookup_table **LR; - - #endif diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c index d37fe5ea32a8f1829c43d3a86fbe85978f3c6d09..30c071251bdfcfb9a55f147dbeab16b50994132b 100644 --- a/sPuReMD/src/print_utils.c +++ b/sPuReMD/src/print_utils.c @@ -604,6 +604,7 @@ void Output_Results( reax_system *system, control_params *control, data->E_Ang + data->E_Pen + data->E_Coa + data->E_HB + data->E_Tor + data->E_Con + data->E_vdW + data->E_Ele + data->E_Pol; + data->E_Tot = data->E_Pot + E_CONV * data->E_Kin; /* output energies if it is the time */ @@ -723,6 +724,24 @@ void Output_Results( reax_system *system, control_params *control, //t_elapsed = Get_Timing_Info( t_start ); //fprintf(stdout, "append_frame took %.6f seconds\n", t_elapsed ); } + + if ( IS_NAN_REAL(data->E_Pol) ) + { + fprintf( stderr, "[ERROR] NaN detected for polarization energy. Terminating...\n" ); + exit( NUMERIC_BREAKDOWN ); + } + + if ( IS_NAN_REAL(data->E_Pot) ) + { + fprintf( stderr, "[ERROR] NaN detected for potential energy. Terminating...\n" ); + exit( NUMERIC_BREAKDOWN ); + } + + if ( IS_NAN_REAL(data->E_Tot) ) + { + fprintf( stderr, "[ERROR] NaN detected for total energy. Terminating...\n" ); + exit( NUMERIC_BREAKDOWN ); + } } diff --git a/sPuReMD/src/spuremd.c b/sPuReMD/src/spuremd.c index 5eca7537520ab593712d9bf8408a94a2b8dc9deb..f18e71d521d5e0195db3957bb29bfb760d4b94bb 100644 --- a/sPuReMD/src/spuremd.c +++ b/sPuReMD/src/spuremd.c @@ -21,8 +21,6 @@ #include "spuremd.h" -#include "mytypes.h" - #include "analyze.h" #include "control.h" #include "ffield.h" @@ -172,6 +170,7 @@ void* setup( const char * const geo_file, const char * const ffield_file, "Setup::spmd_handle->out_control" ); spmd_handle->output_enabled = TRUE; + spmd_handle->callback = NULL; /* parse geometry file */ Read_System( geo_file, ffield_file, control_file, @@ -185,6 +184,25 @@ void* setup( const char * const geo_file, const char * const ffield_file, } +int setup_callback( const void * const handle, const callback_function callback ) +{ + int ret; + spuremd_handle *spmd_handle; + + + ret = SPUREMD_FAILURE; + + if ( handle != NULL && callback != NULL ) + { + spmd_handle = (spuremd_handle*) handle; + spmd_handle->callback = callback; + ret = SPUREMD_SUCCESS; + } + + return ret; +} + + int simulate( const void * const handle ) { int steps, ret; @@ -199,20 +217,24 @@ int simulate( const void * const handle ) Initialize( spmd_handle->system, spmd_handle->control, spmd_handle->data, spmd_handle->workspace, &spmd_handle->lists, - spmd_handle->out_control, &Evolve, + spmd_handle->out_control, &Evolve, spmd_handle->interaction_functions, spmd_handle->output_enabled ); /* compute f_0 */ //if( control.restart == 0 ) { Reset( spmd_handle->system, spmd_handle->control, spmd_handle->data, spmd_handle->workspace, &spmd_handle->lists ); + Generate_Neighbor_Lists( spmd_handle->system, spmd_handle->control, spmd_handle->data, spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control ); //fprintf( stderr, "total: %.2f secs\n", data.timing.nbrs); Compute_Forces( spmd_handle->system, spmd_handle->control, spmd_handle->data, - spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control ); + spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control, + spmd_handle->interaction_functions ); + Compute_Kinetic_Energy( spmd_handle->system, spmd_handle->data ); + if ( spmd_handle->output_enabled == TRUE ) { Output_Results( spmd_handle->system, spmd_handle->control, spmd_handle->data, @@ -230,7 +252,9 @@ int simulate( const void * const handle ) } Evolve( spmd_handle->system, spmd_handle->control, spmd_handle->data, - spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control ); + spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control, + spmd_handle->interaction_functions ); + Post_Evolve( spmd_handle->system, spmd_handle->control, spmd_handle->data, spmd_handle->workspace, &spmd_handle->lists, spmd_handle->out_control ); @@ -250,6 +274,12 @@ int simulate( const void * const handle ) Write_Restart( spmd_handle->system, spmd_handle->control, spmd_handle->data, spmd_handle->workspace, spmd_handle->out_control ); } + + if ( spmd_handle->callback != NULL ) + { + spmd_handle->callback( spmd_handle->system->atoms, spmd_handle->data, + spmd_handle->lists ); + } } if ( spmd_handle->out_control->write_steps > 0 && spmd_handle->output_enabled == TRUE ) @@ -261,6 +291,7 @@ int simulate( const void * const handle ) spmd_handle->data->timing.end = Get_Time( ); spmd_handle->data->timing.elapsed = Get_Timing_Info( spmd_handle->data->timing.start ); + if ( spmd_handle->output_enabled == TRUE ) { fprintf( spmd_handle->out_control->log, "total: %.2f secs\n", spmd_handle->data->timing.elapsed ); @@ -302,3 +333,20 @@ int cleanup( const void * const handle ) return ret; } + + +reax_atom* get_atoms( const void * const handle ) +{ + spuremd_handle *spmd_handle; + reax_atom *atoms; + + atoms = NULL; + + if ( handle != NULL ) + { + spmd_handle = (spuremd_handle*) handle; + atoms = spmd_handle->system->atoms; + } + + return atoms; +} diff --git a/sPuReMD/src/spuremd.h b/sPuReMD/src/spuremd.h index 7ed56288f8f26bce4505cbbf98babb42a9d4d658..ed2b98d3cc21ab0a68d5195d4e33ffe422685dfd 100644 --- a/sPuReMD/src/spuremd.h +++ b/sPuReMD/src/spuremd.h @@ -26,12 +26,19 @@ #define SPUREMD_FAILURE (-1) +#include "mytypes.h" + + void* setup( const char * const, const char * const, const char * const ); +int setup_callback( const void * const, const callback_function ); + int simulate( const void * const ); int cleanup( const void * const ); +reax_atom* get_atoms( const void * const ); + #endif diff --git a/sPuReMD/src/two_body_interactions.c b/sPuReMD/src/two_body_interactions.c index e533d1869efdf5152853d65f5c192ddb856248b2..664d7c0f3088d1b11beceb6e6a2647c49fb46cb6 100644 --- a/sPuReMD/src/two_body_interactions.c +++ b/sPuReMD/src/two_body_interactions.c @@ -571,7 +571,7 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, self_coef = (i == j) ? 0.5 : 1.0; tmin = MIN( type_i, type_j ); tmax = MAX( type_i, type_j ); - t = &( LR[tmin][tmax] ); + t = &( workspace->LR[tmin][tmax] ); /* Cubic Spline Interpolation */ r = (int)(r_ij * t->inv_dx); diff --git a/sPuReMD/src/vector.h b/sPuReMD/src/vector.h index 557057489b3a52a407836360046b6a9e80d9c8df..f150b84cc933bb4f1f23d400473546794ce4cd00 100644 --- a/sPuReMD/src/vector.h +++ b/sPuReMD/src/vector.h @@ -48,8 +48,6 @@ static inline int Vector_isZero( const real * const v, const unsigned int k ) } #ifdef _OPENMP - #pragma omp barrier - #pragma omp for simd reduction(&&: ret_omp) schedule(simd:static) #endif for ( i = 0; i < k; ++i ) @@ -60,10 +58,6 @@ static inline int Vector_isZero( const real * const v, const unsigned int k ) } } -#ifdef _OPENMP - #pragma omp barrier -#endif - return ret_omp; } @@ -154,8 +148,6 @@ static inline real Dot( const real * const v1, const real * const v2, } #ifdef _OPENMP - #pragma omp barrier - #pragma omp for simd reduction(+: ret2_omp) schedule(simd:static) #endif for ( i = 0; i < k; ++i ) @@ -163,10 +155,6 @@ static inline real Dot( const real * const v1, const real * const v2, ret2_omp += v1[i] * v2[i]; } -#ifdef _OPENMP - #pragma omp barrier -#endif - return ret2_omp; } @@ -183,8 +171,6 @@ static inline real Norm( const real * const v1, const unsigned int k ) } #ifdef _OPENMP - #pragma omp barrier - #pragma omp for simd reduction(+: ret2_omp) schedule(simd:static) #endif for ( i = 0; i < k; ++i ) @@ -193,18 +179,12 @@ static inline real Norm( const real * const v1, const unsigned int k ) } #ifdef _OPENMP - #pragma omp barrier - #pragma omp single #endif { ret2_omp = SQRT( ret2_omp ); } -#ifdef _OPENMP - #pragma omp barrier -#endif - return ret2_omp; } @@ -459,7 +439,8 @@ static inline void rvec_Cross( rvec ret, const rvec v1, const rvec v2 ) #endif for ( i = 0; i < 3; ++i ) { - ret[i] = v1[(i + 1) % 3] * v2[(i + 2) % 3] - v1[(i + 2) % 3] * v2[(i + 1) % 3]; + ret[i] = v1[(i + 1) % 3] * v2[(i + 2) % 3] + - v1[(i + 2) % 3] * v2[(i + 1) % 3]; } // ret[0] = v1[1] * v2[2] - v1[2] * v2[1];