diff --git a/sPuReMD/Makefile.am b/sPuReMD/Makefile.am index 2531185b87dbb0b8cd6b4ba80864e75265be43fd..92d61d1fd85e7a54ec316aa0b90e4731c93a720b 100644 --- a/sPuReMD/Makefile.am +++ b/sPuReMD/Makefile.am @@ -20,9 +20,23 @@ check_PROGRAMS = TESTS = if BUILD_TEST +check_PROGRAMS += tests/test_spuremd +check_PROGRAMS += tests/test_lin_alg check_PROGRAMS += tests/test_vector TESTS += $(check_PROGRAMS) +tests_test_spuremd_SOURCES = tests/test_spuremd.cpp +tests_test_spuremd_CPPFLAGS = -Isrc $(GTEST_CPPFLAGS) +tests_test_spuremd_CXXFLAGS = $(GTEST_CXXFLAGS) +tests_test_spuremd_LDFLAGS = $(GTEST_LDFLAGS) $(GTEST_LIBS) +tests_test_spuremd_LDADD = lib/libspuremd.la -lgtest + +tests_test_lin_alg_SOURCES = tests/test_lin_alg.cpp +tests_test_lin_alg_CPPFLAGS = -Isrc $(GTEST_CPPFLAGS) +tests_test_lin_alg_CXXFLAGS = $(GTEST_CXXFLAGS) +tests_test_lin_alg_LDFLAGS = $(GTEST_LDFLAGS) $(GTEST_LIBS) +tests_test_lin_alg_LDADD = -lgtest + tests_test_vector_SOURCES = tests/test_vector.cpp tests_test_vector_CPPFLAGS = -Isrc $(GTEST_CPPFLAGS) tests_test_vector_CXXFLAGS = $(GTEST_CXXFLAGS) diff --git a/sPuReMD/src/allocate.c b/sPuReMD/src/allocate.c index 15f425652cc274249f53e3ef6b5dbd8270d74fa7..6c07fe4f2e8ae5a25f4fc0108057d74a7b00b98e 100644 --- a/sPuReMD/src/allocate.c +++ b/sPuReMD/src/allocate.c @@ -269,7 +269,7 @@ void Reallocate( reax_system *system, control_params *control, static_storage *w if ( realloc->num_far > 0 && nbr_flag ) { - Reallocate_Neighbor_List( (*lists) + FAR_NBRS, + Reallocate_Neighbor_List( &(*lists)[FAR_NBRS], system->N, realloc->num_far * SAFE_ZONE ); realloc->num_far = -1; } @@ -287,29 +287,31 @@ void Reallocate( reax_system *system, control_params *control, static_storage *w if ( control->hb_cut > 0.0 && realloc->hbonds > 0 ) { - Reallocate_HBonds_List(system->N, workspace->num_H, workspace->hbond_index, - (*lists) + HBONDS ); + Reallocate_HBonds_List( system->N, workspace->num_H, workspace->hbond_index, + &(*lists)[HBONDS] ); realloc->hbonds = -1; } num_bonds = est_3body = -1; if ( realloc->bonds > 0 ) { - Reallocate_Bonds_List( system->N, (*lists) + BONDS, &num_bonds, &est_3body ); + Reallocate_Bonds_List( system->N, &(*lists)[BONDS], &num_bonds, &est_3body ); realloc->bonds = -1; realloc->num_3body = MAX( realloc->num_3body, est_3body ); } if ( realloc->num_3body > 0 ) { - Delete_List( TYP_THREE_BODY, (*lists) + THREE_BODIES ); + Delete_List( TYP_THREE_BODY, &(*lists)[THREE_BODIES] ); if ( num_bonds == -1 ) - num_bonds = ((*lists) + BONDS)->total_intrs; + { + num_bonds = (*lists)[BONDS].total_intrs; + } realloc->num_3body *= SAFE_ZONE; Make_List( num_bonds, realloc->num_3body, - TYP_THREE_BODY, (*lists) + THREE_BODIES ); + TYP_THREE_BODY, &(*lists)[THREE_BODIES] ); realloc->num_3body = -1; #if defined(DEBUG_FOCUS) fprintf( stderr, "reallocating 3 bodies\n" ); diff --git a/sPuReMD/src/analyze.c b/sPuReMD/src/analyze.c index 675e900718cdf06c193cc1e7a7bfd6f1aad90ff1..1b260eea95d5e16ab443a8ba3d5ccae8a749e22a 100644 --- a/sPuReMD/src/analyze.c +++ b/sPuReMD/src/analyze.c @@ -64,8 +64,8 @@ void Copy_Bond_List( reax_system *system, control_params *control, reax_list **lists ) { int i, j, top_old; - reax_list *new_bonds = (*lists) + BONDS; - reax_list *old_bonds = (*lists) + OLD_BONDS; + reax_list *new_bonds = &(*lists)[BONDS]; + reax_list *old_bonds = &(*lists)[OLD_BONDS]; for ( top_old = 0, i = 0; i < system->N; ++i ) { @@ -94,8 +94,8 @@ void Copy_Bond_List( reax_system *system, control_params *control, int Compare_Bond_Lists( int atom, control_params *control, reax_list **lists ) { int oldp, newp; - reax_list *new_bonds = (*lists) + BONDS; - reax_list *old_bonds = (*lists) + OLD_BONDS; + reax_list *new_bonds = &(*lists)[BONDS]; + reax_list *old_bonds = &(*lists)[OLD_BONDS]; /*fprintf( stdout, "\n%d\nold_bonds:", atom ); for( oldp = Start_Index( atom, old_bonds ); @@ -229,8 +229,8 @@ void Analyze_Molecules( reax_system *system, control_params *control, int *old_mark = workspace->old_mark; int num_old, num_new; char s[MAX_MOLECULE_SIZE * 10]; - reax_list *new_bonds = (*lists) + BONDS; - reax_list *old_bonds = (*lists) + OLD_BONDS; + reax_list *new_bonds = &(*lists)[BONDS]; + reax_list *old_bonds = &(*lists)[OLD_BONDS]; molecule old_molecules[20], new_molecules[20]; fprintf( fout, "molecular analysis @ %d\n", data->step ); @@ -543,8 +543,8 @@ void Analyze_Fragments( reax_system *system, control_params *control, char fragments[MAX_FRAGMENT_TYPES][MAX_ATOM_TYPES]; int fragment_count[MAX_FRAGMENT_TYPES]; molecule m; - reax_list *new_bonds = (*lists) + BONDS; - //reax_list *old_bonds = (*lists) + OLD_BONDS; + reax_list *new_bonds = &(*lists)[BONDS]; +// reax_list *old_bonds = &(*lists)[OLD_BONDS]; /* fragment analysis */ fprintf( fout, "step%d fragments\n", data->step ); @@ -609,14 +609,16 @@ void Analyze_Silica( reax_system *system, control_params *control, int O_SI_O_count, SI_O_SI_count; int si_coord[10], ox_coord[10]; real O_SI_O, SI_O_SI; - reax_list *new_bonds = (*lists) + BONDS; - reax_list *thb_intrs = (*lists) + THREE_BODIES; + reax_list *new_bonds = &(*lists)[BONDS]; + reax_list *thb_intrs = &(*lists)[THREE_BODIES]; Analyze_Fragments( system, control, data, workspace, lists, fout, 0 ); /* analyze atom coordinations */ for ( i = 0; i < 10; ++i ) + { si_coord[i] = ox_coord[i] = 0; + } for ( atom = 0; atom < system->N; ++atom ) { @@ -624,8 +626,12 @@ void Analyze_Silica( reax_system *system, control_params *control, for ( newp = Start_Index( atom, new_bonds ); newp < End_Index( atom, new_bonds ); ++newp ) + { if ( new_bonds->select.bond_list[newp].bo_data.BO >= control->bg_cut ) + { ++coord; + } + } if ( system->atoms[ atom ].type == SI_ATOM ) { @@ -718,11 +724,12 @@ void Analyze_Silica( reax_system *system, control_params *control, } - int Get_Type_of_Molecule( molecule *m ) { if ( m->atom_count == 3 && m->mtypes[1] == 2 && m->mtypes[2] == 1 ) + { return WATER; + } return UNKNOWN; } @@ -982,7 +989,7 @@ void Analysis( reax_system *system, control_params *control, /****** Electric Dipole Moment ******/ if ( control->dipole_anal && steps % control->freq_dipole_anal == 0 ) Calculate_Dipole_Moment( system, control, data, workspace, - (*lists) + BONDS, out_control->dpl ); + &(*lists)[BONDS], out_control->dpl ); /****** Drift ******/ if ( control->diffusion_coef && steps % control->freq_diffusion_coef == 0 ) diff --git a/sPuReMD/src/bond_orders.c b/sPuReMD/src/bond_orders.c index 52d5ad6162adbbdd2f5bb7e09f6259c5a1a57d51..d8f71ee014c4d6b072476a4e5fe1138fec31e425 100644 --- a/sPuReMD/src/bond_orders.c +++ b/sPuReMD/src/bond_orders.c @@ -37,10 +37,12 @@ static inline real Cf45( real p1, real p2 ) void Get_dBO( reax_system *system, reax_list **lists, int i, int pj, real C, rvec *v ) { - reax_list *bonds = (*lists) + BONDS; - reax_list *dBOs = (*lists) + DBO; + reax_list *bonds; + reax_list *dBOs; int start_pj, end_pj, k; + bonds = &(*lists)[BONDS]; + dBOs = &(*lists)[DBO]; pj = bonds->select.bond_list[pj].dbond_index; start_pj = Start_Index(pj, dBOs); end_pj = End_Index(pj, dBOs); @@ -56,11 +58,13 @@ void Get_dBO( reax_system *system, reax_list **lists, void Get_dBOpinpi2( reax_system *system, reax_list **lists, int i, int pj, real Cpi, real Cpi2, rvec *vpi, rvec *vpi2 ) { - reax_list *bonds = (*lists) + BONDS; - reax_list *dBOs = (*lists) + DBO; + reax_list *bonds; + reax_list *dBOs; dbond_data *dbo_k; int start_pj, end_pj, k; + bonds = &(*lists)[BONDS]; + dBOs = &(*lists)[DBO]; pj = bonds->select.bond_list[pj].dbond_index; start_pj = Start_Index(pj, dBOs); end_pj = End_Index(pj, dBOs); @@ -77,10 +81,12 @@ void Get_dBOpinpi2( reax_system *system, reax_list **lists, void Add_dBO( reax_system *system, reax_list **lists, int i, int pj, real C, rvec *v ) { - reax_list *bonds = (*lists) + BONDS; - reax_list *dBOs = (*lists) + DBO; + reax_list *bonds; + reax_list *dBOs; int start_pj, end_pj, k; + bonds = &(*lists)[BONDS]; + dBOs = &(*lists)[DBO]; pj = bonds->select.bond_list[pj].dbond_index; start_pj = Start_Index(pj, dBOs); end_pj = End_Index(pj, dBOs); @@ -98,11 +104,13 @@ void Add_dBO( reax_system *system, reax_list **lists, void Add_dBOpinpi2( reax_system *system, reax_list **lists, int i, int pj, real Cpi, real Cpi2, rvec *vpi, rvec *vpi2 ) { - reax_list *bonds = (*lists) + BONDS; - reax_list *dBOs = (*lists) + DBO; + reax_list *bonds; + reax_list *dBOs; dbond_data *dbo_k; int start_pj, end_pj, k; + bonds = &(*lists)[BONDS]; + dBOs = &(*lists)[DBO]; pj = bonds->select.bond_list[pj].dbond_index; start_pj = Start_Index(pj, dBOs); end_pj = End_Index(pj, dBOs); @@ -119,10 +127,12 @@ void Add_dBOpinpi2( reax_system *system, reax_list **lists, void Add_dBO_to_Forces( reax_system *system, reax_list **lists, int i, int pj, real C ) { - reax_list *bonds = (*lists) + BONDS; - reax_list *dBOs = (*lists) + DBO; + reax_list *bonds; + reax_list *dBOs; int start_pj, end_pj, k; + bonds = &(*lists)[BONDS]; + dBOs = &(*lists)[DBO]; pj = bonds->select.bond_list[pj].dbond_index; start_pj = Start_Index(pj, dBOs); end_pj = End_Index(pj, dBOs); @@ -138,11 +148,13 @@ void Add_dBO_to_Forces( reax_system *system, reax_list **lists, void Add_dBOpinpi2_to_Forces( reax_system *system, reax_list **lists, int i, int pj, real Cpi, real Cpi2 ) { - reax_list *bonds = (*lists) + BONDS; - reax_list *dBOs = (*lists) + DBO; + reax_list *bonds; + reax_list *dBOs; dbond_data *dbo_k; int start_pj, end_pj, k; + bonds = &(*lists)[BONDS]; + dBOs = &(*lists)[DBO]; pj = bonds->select.bond_list[pj].dbond_index; start_pj = Start_Index(pj, dBOs); end_pj = End_Index(pj, dBOs); @@ -201,8 +213,8 @@ void Calculate_dBO( int i, int pj, static_storage *workspace, reax_list **lists, dbond_data *top_dbo; /* Initializations */ - bonds = (*lists) + BONDS; - dBOs = (*lists) + DBO; + bonds = &(*lists)[BONDS]; + dBOs = &(*lists)[DBO]; j = bonds->select.bond_list[pj].nbr; bo_ij = &(bonds->select.bond_list[pj].bo_data); start_i = Start_Index( i, bonds ); @@ -333,7 +345,7 @@ void Calculate_dBO( int i, int pj, static_storage *workspace, reax_list **lists, void Add_dBond_to_Forces_NPT( int i, int pj, reax_system *system, simulation_data *data, static_storage *workspace, reax_list **lists ) { - reax_list *bonds = (*lists) + BONDS; + reax_list *bonds; bond_data *nbr_j, *nbr_k; bond_order_data *bo_ij, *bo_ji; dbond_coefficients coef; @@ -346,6 +358,7 @@ void Add_dBond_to_Forces_NPT( int i, int pj, reax_system *system, #endif /* Initializations */ + bonds = &(*lists)[BONDS]; nbr_j = &(bonds->select.bond_list[pj]); j = nbr_j->nbr; bo_ij = &(nbr_j->bo_data); @@ -523,7 +536,7 @@ void Add_dBond_to_Forces_NPT( int i, int pj, reax_system *system, void Add_dBond_to_Forces( int i, int pj, reax_system *system, simulation_data *data, static_storage *workspace, reax_list **lists ) { - reax_list *bonds = (*lists) + BONDS; + reax_list *bonds; bond_data *nbr_j, *nbr_k; bond_order_data *bo_ij, *bo_ji; dbond_coefficients coef; @@ -534,6 +547,7 @@ void Add_dBond_to_Forces( int i, int pj, reax_system *system, #endif /* Initializations */ + bonds = &(*lists)[BONDS]; nbr_j = &(bonds->select.bond_list[pj]); j = nbr_j->nbr; bo_ij = &(nbr_j->bo_data); @@ -728,7 +742,7 @@ void Calculate_Bond_Orders( reax_system *system, control_params *control, p_lp1 = system->reaxprm.gp.l[15]; p_boc1 = system->reaxprm.gp.l[0]; p_boc2 = system->reaxprm.gp.l[1]; - bonds = (*lists) + BONDS; + bonds = &(*lists)[BONDS]; #ifdef _OPENMP #pragma omp parallel default(shared) @@ -758,8 +772,8 @@ void Calculate_Bond_Orders( reax_system *system, control_params *control, top_dbo = 0; top_dDelta = 0; - dDeltas = (*lists) + DDELTA; - dBOs = (*lists) + DBO; + dDeltas = &(*lists)[DDELTA]; + dBOs = &(*lists)[DBO]; #endif /* Calculate Deltaprime, Deltaprime_boc values */ diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c index 61c5c8a702cacfc14123761756e81ab8717227aa..5c2d3bed963b38ea951da040cefe96b8dc9b14de 100644 --- a/sPuReMD/src/charges.c +++ b/sPuReMD/src/charges.c @@ -146,26 +146,19 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, Hptr = workspace->H; } +#if defined(TEST_MAT) + Hptr = create_test_mat( ); +#endif + time = Get_Time( ); if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) { - if ( control->cm_domain_sparsify_enabled == TRUE ) - { - Hptr = setup_graph_coloring( workspace->H_sp ); - } - else - { - Hptr = setup_graph_coloring( workspace->H ); - } - - Sort_Matrix_Rows( Hptr ); + setup_graph_coloring( control, workspace, Hptr, &workspace->H_full, &workspace->H_p ); + Sort_Matrix_Rows( workspace->H_p ); + Hptr = workspace->H_p; } data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); -#if defined(TEST_MAT) - Hptr = create_test_mat( ); -#endif - switch ( control->cm_solver_pre_comp_type ) { case NONE_PC: @@ -503,27 +496,25 @@ static void Compute_Preconditioner_EE( const reax_system * const system, Hptr = workspace->H; } +#if defined(TEST_MAT) + Hptr = create_test_mat( ); +#endif + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILU_PAR_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + } + time = Get_Time( ); if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) { - if ( control->cm_domain_sparsify_enabled == TRUE ) - { - Hptr = setup_graph_coloring( workspace->H_sp ); - } - else - { - Hptr = setup_graph_coloring( workspace->H ); - } - - Sort_Matrix_Rows( Hptr ); + setup_graph_coloring( control, workspace, Hptr, &workspace->H_full, &workspace->H_p ); + Sort_Matrix_Rows( workspace->H_p ); + Hptr = workspace->H_p; } data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); - -#if defined(TEST_MAT) - Hptr = create_test_mat( ); -#endif - - Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; switch ( control->cm_solver_pre_comp_type ) { @@ -578,7 +569,24 @@ static void Compute_Preconditioner_EE( const reax_system * const system, break; } - Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + } + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILU_PAR_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + } #if defined(DEBUG) #define SIZE (1000) @@ -620,28 +628,26 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system, Hptr = workspace->H; } +#if defined(TEST_MAT) + Hptr = create_test_mat( ); +#endif + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILU_PAR_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; + } + time = Get_Time( ); if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) { - if ( control->cm_domain_sparsify_enabled == TRUE ) - { - Hptr = setup_graph_coloring( workspace->H_sp ); - } - else - { - Hptr = setup_graph_coloring( workspace->H ); - } - - Sort_Matrix_Rows( Hptr ); + setup_graph_coloring( control, workspace, Hptr, &workspace->H_full, &workspace->H_p ); + Sort_Matrix_Rows( workspace->H_p ); + Hptr = workspace->H_p; } data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); - -#if defined(TEST_MAT) - Hptr = create_test_mat( ); -#endif - - Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; - Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; switch ( control->cm_solver_pre_comp_type ) { @@ -696,8 +702,25 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system, break; } - Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; - Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + if ( control->cm_domain_sparsify_enabled == TRUE ) + { + Hptr = workspace->H_sp; + } + else + { + Hptr = workspace->H; + } + } + + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILU_PAR_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; + } #if defined(DEBUG) #define SIZE (1000) @@ -748,10 +771,6 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, } data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); -#if defined(DEBUG) - fprintf( stderr, "H matrix sorted\n" ); -#endif - switch ( control->cm_solver_pre_comp_type ) { case NONE_PC: @@ -768,18 +787,8 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, case ICHOLT_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "drop tolerances calculated\n" ); -#endif - fillin = Estimate_LU_Fill( Hptr, workspace->droptol ); -#if defined(DEBUG) - fprintf( stderr, "fillin = %d\n", fillin ); - fprintf( stderr, "allocated memory: L = U = %ldMB\n", - fillin * (sizeof(real) + sizeof(unsigned int)) / (1024 * 1024) ); -#endif - if ( workspace->L == NULL ) { if ( Allocate_Matrix( &(workspace->L), Hptr->n, fillin ) == FAILURE || @@ -816,10 +825,6 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, case ILUT_PAR_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "drop tolerances calculated\n" ); -#endif - if ( workspace->L == NULL ) { /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */ @@ -896,11 +901,12 @@ static void Setup_Preconditioner_EE( const reax_system * const system, } data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); - Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; - -#if defined(DEBUG) - fprintf( stderr, "H matrix sorted\n" ); -#endif + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILU_PAR_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + } switch ( control->cm_solver_pre_comp_type ) { @@ -918,18 +924,8 @@ static void Setup_Preconditioner_EE( const reax_system * const system, case ICHOLT_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "drop tolerances calculated\n" ); -#endif - fillin = Estimate_LU_Fill( Hptr, workspace->droptol ); -#if defined(DEBUG) - fprintf( stderr, "fillin = %d\n", fillin ); - fprintf( stderr, "allocated memory: L = U = %ldMB\n", - fillin * (sizeof(real) + sizeof(unsigned int)) / (1024 * 1024) ); -#endif - if ( workspace->L == NULL ) { if ( Allocate_Matrix( &(workspace->L), system->N_cm, fillin + system->N_cm ) == FAILURE || @@ -966,10 +962,6 @@ static void Setup_Preconditioner_EE( const reax_system * const system, case ILUT_PAR_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "drop tolerances calculated\n" ); -#endif - if ( workspace->L == NULL ) { /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */ @@ -1015,7 +1007,12 @@ static void Setup_Preconditioner_EE( const reax_system * const system, break; } - Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILU_PAR_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + } } @@ -1048,12 +1045,13 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, } data->timing.cm_sort_mat_rows += Get_Timing_Info( time ); - Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; - Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; - -#if defined(DEBUG) - fprintf( stderr, "H matrix sorted\n" ); -#endif + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILU_PAR_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0; + } switch ( control->cm_solver_pre_comp_type ) { @@ -1071,18 +1069,8 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, case ICHOLT_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "drop tolerances calculated\n" ); -#endif - fillin = Estimate_LU_Fill( Hptr, workspace->droptol ); -#if defined(DEBUG) - fprintf( stderr, "fillin = %d\n", fillin ); - fprintf( stderr, "allocated memory: L = U = %ldMB\n", - fillin * (sizeof(real) + sizeof(unsigned int)) / (1024 * 1024) ); -#endif - if ( workspace->L == NULL ) { if ( Allocate_Matrix( &(workspace->L), Hptr->n, fillin ) == FAILURE || @@ -1118,10 +1106,6 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, case ILUT_PAR_PC: Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "drop tolerances calculated\n" ); -#endif - if ( workspace->L == NULL ) { /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */ @@ -1167,8 +1151,13 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, break; } - Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; - Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; + if ( control->cm_solver_pre_comp_type == ICHOLT_PC || + control->cm_solver_pre_comp_type == ILU_PAR_PC || + control->cm_solver_pre_comp_type == ILUT_PAR_PC ) + { + Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0; + Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0; + } } @@ -1292,10 +1281,6 @@ static void QEq( reax_system * const system, control_params * const control, data->timing.cm_solver_iters += iters; -#if defined(DEBUG_FOCUS) - fprintf( stderr, "linsolve-" ); -#endif - Calculate_Charges_QEq( system, workspace ); #if defined(DEBUG_FOCUS) @@ -1372,10 +1357,6 @@ static void EE( reax_system * const system, control_params * const control, data->timing.cm_solver_iters += iters; -#if defined(DEBUG_FOCUS) - fprintf( stderr, "linsolve-" ); -#endif - Calculate_Charges_EE( system, workspace ); // if( data->step == control->nsteps ) @@ -1446,10 +1427,6 @@ static void ACKS2( reax_system * const system, control_params * const control, data->timing.cm_solver_iters += iters; -#if defined(DEBUG_FOCUS) - fprintf( stderr, "linsolve-" ); -#endif - Calculate_Charges_EE( system, workspace ); } diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c index 86b6ebce8310e4a8c044bae6aed709c863e565f8..24e5c2914a3e76b365a7dbf10acab4ee677f05b3 100644 --- a/sPuReMD/src/forces.c +++ b/sPuReMD/src/forces.c @@ -150,7 +150,7 @@ void Compute_NonBonded_Forces( reax_system *system, control_params *control, fprintf( stderr, "qeq - " ); #endif - if ( control->tabulate == 0) + if ( control->tabulate <= 0 ) { vdW_Coulomb_Energy( system, control, data, workspace, lists, out_control ); } @@ -179,7 +179,7 @@ void Compute_Total_Force( reax_system *system, control_params *control, int i; reax_list *bonds; - bonds = (*lists) + BONDS; + bonds = &(*lists)[BONDS]; #ifdef _OPENMP #pragma omp parallel default(shared) @@ -233,8 +233,8 @@ void Validate_Lists( static_storage *workspace, reax_list **lists, int step, int int i, flag; reax_list *bonds, *hbonds; - bonds = *lists + BONDS; - hbonds = *lists + HBONDS; + bonds = &(*lists)[BONDS]; + hbonds = &(*lists)[HBONDS]; /* far neighbors */ if ( Htop > Hmax * DANGER_ZONE ) @@ -625,23 +625,24 @@ void Init_Forces( reax_system *system, control_params *control, bond_data *ibond, *jbond; bond_order_data *bo_ij, *bo_ji; - far_nbrs = *lists + FAR_NBRS; - bonds = *lists + BONDS; - hbonds = *lists + HBONDS; + far_nbrs = &(*lists)[FAR_NBRS]; + bonds = &(*lists)[BONDS]; + hbonds = &(*lists)[HBONDS]; H = workspace->H; H_sp = workspace->H_sp; Htop = 0; H_sp_top = 0; num_bonds = 0; num_hbonds = 0; - btop_i = btop_j = 0; + btop_i = 0; + btop_j = 0; for ( i = 0; i < system->N; ++i ) { atom_i = &(system->atoms[i]); type_i = atom_i->type; - start_i = Start_Index(i, far_nbrs); - end_i = End_Index(i, far_nbrs); + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); H->start[i] = Htop; H_sp->start[i] = H_sp_top; btop_i = End_Index( i, bonds ); @@ -661,7 +662,7 @@ void Init_Forces( reax_system *system, control_params *control, flag = 0; flag_sp = 0; - if ((data->step - data->prev_steps) % control->reneighbor == 0) + if ( (data->step - data->prev_steps) % control->reneighbor == 0 ) { if ( nbr_pj->d <= control->r_cut ) { @@ -678,7 +679,7 @@ void Init_Forces( reax_system *system, control_params *control, } } else if ((nbr_pj->d = Sq_Distance_on_T3(atom_i->x, atom_j->x, &(system->box), - nbr_pj->dvec)) <= SQR(control->r_cut)) + nbr_pj->dvec)) <= SQR(control->r_cut)) { if ( nbr_pj->d <= SQR(control->r_sp_cut)) { @@ -738,7 +739,7 @@ void Init_Forces( reax_system *system, control_params *control, { r2 = SQR( r_ij ); - if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0) + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) { C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 ); BO_s = (1.0 + control->bo_cut) * EXP( C12 ); @@ -749,7 +750,7 @@ void Init_Forces( reax_system *system, control_params *control, C12 = 0.0; } - if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) { C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 ); BO_pi = EXP( C34 ); @@ -760,7 +761,7 @@ void Init_Forces( reax_system *system, control_params *control, C34 = 0.0; } - if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) { C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); BO_pi2 = EXP( C56 ); @@ -937,31 +938,34 @@ void Init_Forces_Tab( reax_system *system, control_params *control, bond_data *ibond, *jbond; bond_order_data *bo_ij, *bo_ji; - far_nbrs = *lists + FAR_NBRS; - bonds = *lists + BONDS; - hbonds = *lists + HBONDS; - + far_nbrs = &(*lists)[FAR_NBRS]; + bonds = &(*lists)[BONDS]; + hbonds = &(*lists)[HBONDS]; H = workspace->H; H_sp = workspace->H_sp; Htop = 0; H_sp_top = 0; num_bonds = 0; num_hbonds = 0; - btop_i = btop_j = 0; + btop_i = 0; + btop_j = 0; for ( i = 0; i < system->N; ++i ) { atom_i = &(system->atoms[i]); type_i = atom_i->type; - start_i = Start_Index(i, far_nbrs); - end_i = End_Index(i, far_nbrs); + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); H->start[i] = Htop; H_sp->start[i] = H_sp_top; btop_i = End_Index( i, bonds ); sbp_i = &(system->reaxprm.sbp[type_i]); ihb = ihb_top = -1; + if ( control->hb_cut > 0 && (ihb = sbp_i->p_hbond) == 1 ) + { ihb_top = End_Index( workspace->hbond_index[i], hbonds ); + } for ( pj = start_i; pj < end_i; ++pj ) { @@ -971,7 +975,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, flag = 0; flag_sp = 0; - if ((data->step - data->prev_steps) % control->reneighbor == 0) + if ( (data->step - data->prev_steps) % control->reneighbor == 0 ) { if (nbr_pj->d <= control->r_cut) { @@ -988,7 +992,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, } } else if ((nbr_pj->d = Sq_Distance_on_T3(atom_i->x, atom_j->x, &(system->box), - nbr_pj->dvec)) <= SQR(control->r_cut)) + nbr_pj->dvec)) <= SQR(control->r_cut)) { if ( nbr_pj->d <= SQR(control->r_sp_cut)) { @@ -1006,8 +1010,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control, twbp = &(system->reaxprm.tbp[type_i][type_j]); H->j[Htop] = j; - H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control, - workspace->LR, i, j, r_ij, OFF_DIAGONAL ); + H->val[Htop] = Init_Charge_Matrix_Entry( system, control, i, j, + r_ij, OFF_DIAGONAL ); ++Htop; /* H_sp matrix entry */ @@ -1048,7 +1052,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, { r2 = SQR( r_ij ); - if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0) + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) { C12 = twbp->p_bo1 * POW( r_ij / twbp->r_s, twbp->p_bo2 ); BO_s = (1.0 + control->bo_cut) * EXP( C12 ); @@ -1059,7 +1063,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, C12 = 0.0; } - if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) { C34 = twbp->p_bo3 * POW( r_ij / twbp->r_p, twbp->p_bo4 ); BO_pi = EXP( C34 ); @@ -1070,7 +1074,7 @@ void Init_Forces_Tab( reax_system *system, control_params *control, C34 = 0.0; } - if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) { C56 = twbp->p_bo5 * POW( r_ij / twbp->r_pp, twbp->p_bo6 ); BO_pi2 = EXP( C56 ); @@ -1097,7 +1101,6 @@ void Init_Forces_Tab( reax_system *system, control_params *control, ibond->d = r_ij; jbond->d = r_ij; rvec_Copy( ibond->dvec, nbr_pj->dvec ); - //fprintf (stderr, " %f - %f - %f \n", nbr_pj->dvec[0], nbr_pj->dvec[1], nbr_pj->dvec[2]); rvec_Scale( jbond->dvec, -1, nbr_pj->dvec ); ivec_Copy( ibond->rel_box, nbr_pj->rel_box ); ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box ); @@ -1162,8 +1165,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control, /* diagonal entry */ H->j[Htop] = i; - H->val[Htop] = Init_Charge_Matrix_Entry_Tab( system, control, workspace->LR, - i, j, r_ij, DIAGONAL ); + H->val[Htop] = Init_Charge_Matrix_Entry( system, control, i, i, + r_ij, DIAGONAL ); ++Htop; H_sp->j[H_sp_top] = i; @@ -1177,12 +1180,16 @@ void Init_Forces_Tab( reax_system *system, control_params *control, } } + Init_Charge_Matrix_Remaining_Entries( system, control, far_nbrs, + H, H_sp, &Htop, &H_sp_top ); + // mark the end of j list - H->start[i] = Htop; - H_sp->start[i] = H_sp_top; + H->start[system->N_cm] = Htop; + H_sp->start[system->N_cm] = H_sp_top; + /* validate lists - decide if reallocation is required! */ - Validate_Lists( workspace, lists, data->step, system->N, - H->m, Htop, num_bonds, num_hbonds ); + Validate_Lists( workspace, lists, + data->step, system->N, H->m, Htop, num_bonds, num_hbonds ); #if defined(DEBUG_FOCUS) fprintf( stderr, "step%d: Htop = %d, num_bonds = %d, num_hbonds = %d\n", @@ -1304,7 +1311,7 @@ void Compute_Forces( reax_system *system, control_params *control, real t_start, t_elapsed; t_start = Get_Time( ); - if ( !control->tabulate ) + if ( control->tabulate <= 0 ) { Init_Forces( system, control, data, workspace, lists, out_control ); } diff --git a/sPuReMD/src/four_body_interactions.c b/sPuReMD/src/four_body_interactions.c index 26a15b7499a0da34f01651e2b7b36822008aa477..22d104763a7aaaca6c97dfed1619ff96e11672a3 100644 --- a/sPuReMD/src/four_body_interactions.c +++ b/sPuReMD/src/four_body_interactions.c @@ -164,8 +164,8 @@ void Four_Body_Interactions( reax_system *system, control_params *control, p_tor3 = system->reaxprm.gp.l[24]; p_tor4 = system->reaxprm.gp.l[25]; p_cot2 = system->reaxprm.gp.l[27]; - bonds = (*lists) + BONDS; - thb_intrs = (*lists) + THREE_BODIES; + bonds = &(*lists)[BONDS]; + thb_intrs = &(*lists)[THREE_BODIES]; e_tor_total = 0.0; e_con_total = 0.0; diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index 01b0fc5593bbdb562b374b86227764edbfbc553e..c809655fa4b9796f09122388657b05229f5f96ad 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -335,11 +335,14 @@ void Init_Workspace( reax_system *system, control_params *control, break; } + /* sparse matrices */ workspace->H = NULL; + workspace->H_full = NULL; workspace->H_sp = NULL; - workspace->L = NULL; + workspace->H_p = NULL; workspace->H_spar_patt = NULL; workspace->H_app_inv = NULL; + workspace->L = NULL; workspace->U = NULL; workspace->Hdia_inv = NULL; if ( control->cm_solver_pre_comp_type == ICHOLT_PC || @@ -348,6 +351,7 @@ void Init_Workspace( reax_system *system, control_params *control, workspace->droptol = (real *) scalloc( system->N_cm, sizeof( real ), "Init_Workspace::workspace->droptol" ); } + //TODO: check if unused //workspace->w = (real *) scalloc( cm_lin_sys_size, sizeof( real ), //"Init_Workspace::workspace->droptol" ); @@ -497,6 +501,106 @@ void Init_Workspace( reax_system *system, control_params *control, break; } + /* SpMV related */ +#ifdef _OPENMP + workspace->b_local = (real*) smalloc( control->num_threads * system->N_cm * sizeof(real), + "Init_Workspace::b_local" ); +#endif + + /* level scheduling related */ + workspace->levels_L = 1; + workspace->levels_U = 1; + if ( control->cm_solver_pre_app_type == TRI_SOLVE_LEVEL_SCHED_PA || + control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + workspace->row_levels_L = (unsigned int*) smalloc( system->N_cm * sizeof(unsigned int), + "Init_Workspace::row_levels_L" ); + workspace->level_rows_L = (unsigned int*) smalloc( system->N_cm * sizeof(unsigned int), + "Init_Workspace::level_rows_L" ); + workspace->level_rows_cnt_L = (unsigned int*) smalloc( (system->N_cm + 1) * sizeof(unsigned int), + "Init_Workspace::level_rows_cnt_L" ); + workspace->row_levels_U = (unsigned int*) smalloc( system->N_cm * sizeof(unsigned int), + "Init_Workspace::row_levels_U" ); + workspace->level_rows_U = (unsigned int*) smalloc( system->N_cm * sizeof(unsigned int), + "Init_Workspace::level_rows_U" ); + workspace->level_rows_cnt_U = (unsigned int*) smalloc( (system->N_cm + 1) * sizeof(unsigned int), + "Init_Workspace::level_rows_cnt_U" ); + workspace->top = (unsigned int*) smalloc( (system->N_cm + 1) * sizeof(unsigned int), + "Init_Workspace::top" ); + } + else + { + workspace->row_levels_L = NULL; + workspace->level_rows_L = NULL; + workspace->level_rows_cnt_L = NULL; + workspace->row_levels_U = NULL; + workspace->level_rows_U = NULL; + workspace->level_rows_cnt_U = NULL; + workspace->top = NULL; + } + + /* graph coloring related */ + workspace->recolor_cnt = 0; + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + workspace->color = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + "Init_Workspace::color" ); + workspace->to_color = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + "Init_Workspace::to_color" ); + workspace->conflict = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + "setup_graph_coloring::conflict" ); + workspace->conflict_cnt = (unsigned int*) smalloc( sizeof(unsigned int) * (control->num_threads + 1), + "Init_Workspace::conflict_cnt" ); + workspace->recolor = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + "Init_Workspace::recolor" ); + workspace->color_top = (unsigned int*) smalloc( sizeof(unsigned int) * (system->N_cm + 1), + "Init_Workspace::color_top" ); + workspace->permuted_row_col = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + "Init_Workspace::premuted_row_col" ); + workspace->permuted_row_col_inv = (unsigned int*) smalloc( sizeof(unsigned int) * system->N_cm, + "Init_Workspace::premuted_row_col_inv" ); + workspace->y_p = (real*) smalloc( sizeof(real) * system->N_cm, + "Init_Workspace::y_p" ); + workspace->x_p = (real*) smalloc( sizeof(real) * system->N_cm, + "Init_Workspace::x_p" ); + } + else + { + workspace->color = NULL; + workspace->to_color = NULL; + workspace->conflict = NULL; + workspace->conflict_cnt = NULL; + workspace->recolor = NULL; + workspace->color_top = NULL; + workspace->permuted_row_col = NULL; + workspace->permuted_row_col_inv = NULL; + workspace->y_p = NULL; + workspace->x_p = NULL; + } + + /* Jacobi iteration related */ + if ( control->cm_solver_pre_app_type == JACOBI_ITER_PA ) + { + workspace->Dinv_L = (real*) smalloc( sizeof(real) * system->N_cm, + "Init_Workspace::Dinv_L" ); + workspace->Dinv_U = (real*) smalloc( sizeof(real) * system->N_cm, + "Init_Workspace::Dinv_U" ); + workspace->Dinv_b = (real*) smalloc( sizeof(real) * system->N_cm, + "Init_Workspace::Dinv_b" ); + workspace->rp = (real*) smalloc( sizeof(real) * system->N_cm, + "Init_Workspace::rp" ); + workspace->rp2 = (real*) smalloc( sizeof(real) * system->N_cm, + "Init_Workspace::rp2" ); + } + else + { + workspace->Dinv_L = NULL; + workspace->Dinv_U = NULL; + workspace->Dinv_b = NULL; + workspace->rp = NULL; + workspace->rp2 = NULL; + } + /* integrator storage */ workspace->a = (rvec *) smalloc( system->N * sizeof( rvec ), "Init_Workspace::workspace->a" ); @@ -590,7 +694,7 @@ void Init_Lists( reax_system *system, control_params *control, num_nbrs = Estimate_NumNeighbors( system, control, workspace, lists ); - Make_List( system->N, num_nbrs, TYP_FAR_NEIGHBOR, (*lists) + FAR_NBRS ); + Make_List( system->N, num_nbrs, TYP_FAR_NEIGHBOR, &(*lists)[FAR_NBRS] ); #if defined(DEBUG_FOCUS) fprintf( stderr, "memory allocated: far_nbrs = %ldMB\n", @@ -669,7 +773,7 @@ void Init_Lists( reax_system *system, control_params *control, else { Allocate_HBond_List( system->N, workspace->num_H, workspace->hbond_index, - hb_top, (*lists) + HBONDS ); + hb_top, &(*lists)[HBONDS] ); } #if defined(DEBUG_FOCUS) @@ -681,7 +785,7 @@ void Init_Lists( reax_system *system, control_params *control, } /* bonds list */ - Allocate_Bond_List( system->N, bond_top, (*lists) + BONDS ); + Allocate_Bond_List( system->N, bond_top, &(*lists)[BONDS] ); num_bonds = bond_top[system->N - 1]; #if defined(DEBUG_FOCUS) @@ -691,7 +795,7 @@ void Init_Lists( reax_system *system, control_params *control, #endif /* 3bodies list */ - Make_List( num_bonds, num_3body, TYP_THREE_BODY, (*lists) + THREE_BODIES ); + Make_List( num_bonds, num_3body, TYP_THREE_BODY, &(*lists)[THREE_BODIES] ); #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - num_3body: %d\n", num_3body ); @@ -700,9 +804,9 @@ void Init_Lists( reax_system *system, control_params *control, #endif #ifdef TEST_FORCES - Make_List( system->N, num_bonds * 8, TYP_DDELTA, (*lists) + DDELTA ); + Make_List( system->N, num_bonds * 8, TYP_DDELTA, &(*lists)[DDELTA] ); - Make_List( num_bonds, num_bonds * MAX_BONDS * 3, TYP_DBO, (*lists) + DBO ); + Make_List( num_bonds, num_bonds * MAX_BONDS * 3, TYP_DBO, &(*lists)[DBO] ); #endif sfree( hb_top, "Init_Lists::hb_top" ); @@ -949,11 +1053,13 @@ void Initialize( reax_system *system, control_params *control, #endif #ifdef _OPENMP - #pragma omp parallel default(shared) + #pragma omp parallel default(none) shared(control) { #pragma omp single control->num_threads = omp_get_num_threads( ); } +#else + control->num_threads = 1; #endif Randomize( ); @@ -1086,6 +1192,11 @@ void Finalize_Workspace( reax_system *system, control_params *control, Deallocate_Matrix( workspace->H_spar_patt_full ); Deallocate_Matrix( workspace->H_app_inv ); } + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + Deallocate_Matrix( workspace->H_full ); + Deallocate_Matrix( workspace->H_p ); + } for ( i = 0; i < 5; ++i ) { @@ -1160,6 +1271,49 @@ void Finalize_Workspace( reax_system *system, control_params *control, break; } + /* SpMV related */ +#ifdef _OPENMP + sfree( workspace->b_local, "Finalize_Workspace::b_local" ); +#endif + + /* level scheduling related */ + if ( control->cm_solver_pre_app_type == TRI_SOLVE_LEVEL_SCHED_PA || + control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + sfree( workspace->row_levels_L, "Finalize_Workspace::row_levels_L" ); + sfree( workspace->level_rows_L, "Finalize_Workspace::level_rows_L" ); + sfree( workspace->level_rows_cnt_L, "Finalize_Workspace::level_rows_cnt_L" ); + sfree( workspace->row_levels_U, "Finalize_Workspace::row_levels_U" ); + sfree( workspace->level_rows_U, "Finalize_Workspace::level_rows_U" ); + sfree( workspace->level_rows_cnt_U, "Finalize_Workspace::level_rows_cnt_U" ); + sfree( workspace->top, "Finalize_Workspace::top" ); + } + + /* graph coloring related */ + if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA ) + { + sfree( workspace->color, "Finalize_Workspace::workspace->color" ); + sfree( workspace->to_color, "Finalize_Workspace::workspace->to_color" ); + sfree( workspace->conflict, "Finalize_Workspace::workspace->conflict" ); + sfree( workspace->conflict_cnt, "Finalize_Workspace::workspace->conflict_cnt" ); + sfree( workspace->recolor, "Finalize_Workspace::workspace->recolor" ); + sfree( workspace->color_top, "Finalize_Workspace::workspace->color_top" ); + sfree( workspace->permuted_row_col, "Finalize_Workspace::workspace->permuted_row_col" ); + sfree( workspace->permuted_row_col_inv, "Finalize_Workspace::workspace->permuted_row_col_inv" ); + sfree( workspace->y_p, "Finalize_Workspace::workspace->y_p" ); + sfree( workspace->x_p, "Finalize_Workspace::workspace->x_p" ); + } + + /* Jacobi iteration related */ + if ( control->cm_solver_pre_app_type == JACOBI_ITER_PA ) + { + sfree( workspace->Dinv_L, "Finalize_Workspace::Dinv_L" ); + sfree( workspace->Dinv_U, "Finalize_Workspace::Dinv_U" ); + sfree( workspace->Dinv_b, "Finalize_Workspace::Dinv_b" ); + sfree( workspace->rp, "Finalize_Workspace::rp" ); + sfree( workspace->rp2, "Finalize_Workspace::rp2" ); + } + /* integrator storage */ sfree( workspace->a, "Finalize_Workspace::workspace->a" ); sfree( workspace->f_old, "Finalize_Workspace::workspace->f_old" ); @@ -1217,17 +1371,17 @@ void Finalize_Workspace( reax_system *system, control_params *control, void Finalize_Lists( control_params *control, reax_list **lists ) { - Delete_List( TYP_FAR_NEIGHBOR, (*lists) + FAR_NBRS ); + Delete_List( TYP_FAR_NEIGHBOR, &(*lists)[FAR_NBRS] ); if ( control->hb_cut > 0.0 ) { - Delete_List( TYP_HBOND, (*lists) + HBONDS ); + Delete_List( TYP_HBOND, &(*lists)[HBONDS] ); } - Delete_List( TYP_BOND, (*lists) + BONDS ); - Delete_List( TYP_THREE_BODY, (*lists) + THREE_BODIES ); + Delete_List( TYP_BOND, &(*lists)[BONDS] ); + Delete_List( TYP_THREE_BODY, &(*lists)[THREE_BODIES] ); #ifdef TEST_FORCES - Delete_List( TYP_DDELTA, (*lists) + DDELTA ); - Delete_List( TYP_DBO, (*lists) + DBO ); + Delete_List( TYP_DDELTA, &(*lists)[DDELTA] ); + Delete_List( TYP_DBO, &(*lists)[DBO] ); #endif } diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index c90f742ad9269d192ea55fb98e705b89bb767117..11b7d9801d5867f42080116b7322e650ed0604aa 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -50,52 +50,6 @@ enum preconditioner_type }; -/* global to make OpenMP shared (Sparse_MatVec) */ -#ifdef _OPENMP -static real *b_local = NULL; -#endif -/* global to make OpenMP shared (apply_preconditioner) */ -static real *Dinv_L = NULL; -static real *Dinv_U = NULL; -/* global to make OpenMP shared (tri_solve_level_sched) */ -static int levels = 1; -static int levels_L = 1; -static int levels_U = 1; -static unsigned int *row_levels_L = NULL; -static unsigned int *level_rows_L = NULL; -static unsigned int *level_rows_cnt_L = NULL; -static unsigned int *row_levels_U = NULL; -static unsigned int *level_rows_U = NULL; -static unsigned int *level_rows_cnt_U = NULL; -static unsigned int *row_levels; -static unsigned int *level_rows; -static unsigned int *level_rows_cnt; -static unsigned int *top = NULL; -/* global to make OpenMP shared (graph_coloring) */ -static unsigned int *color = NULL; -static unsigned int *to_color = NULL; -static unsigned int *conflict = NULL; -static unsigned int *conflict_cnt = NULL; -static unsigned int *temp_ptr; -static unsigned int *recolor = NULL; -static unsigned int recolor_cnt; -static unsigned int *color_top = NULL; -/* global to make OpenMP shared (sort_colors) */ -static unsigned int *permuted_row_col = NULL; -static unsigned int *permuted_row_col_inv = NULL; -static real *y_p = NULL; -/* global to make OpenMP shared (permute_vector) */ -static real *x_p = NULL; -static unsigned int *mapping = NULL; -static sparse_matrix *H_full; -static sparse_matrix *H_p; -/* global to make OpenMP shared (jacobi_iter) */ -static real *Dinv_b = NULL; -static real *rp = NULL; -static real *rp2 = NULL; -static real *rp3 = NULL; - - #if defined(TEST_MAT) static sparse_matrix * create_test_mat( void ) { @@ -225,7 +179,15 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A, exit( INSUFFICIENT_MEMORY ); } } - + else if ( (*A_full)->m < 2 * A->m - A->n ) + { + Deallocate_Matrix( *A_full ); + if ( Allocate_Matrix( A_full, A->n, 2 * A->m - A->n ) == FAILURE ) + { + fprintf( stderr, "not enough memory for full A. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } if ( Allocate_Matrix( &A_t, A->n, A->m ) == FAILURE ) { @@ -277,14 +239,14 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A, * A has non-zero diagonals * Each row of A has at least one non-zero (i.e., no rows with all zeros) */ void setup_sparse_approx_inverse( const sparse_matrix * const A, sparse_matrix ** A_full, - sparse_matrix ** A_spar_patt, sparse_matrix **A_spar_patt_full, - sparse_matrix ** A_app_inv, const real filter ) + sparse_matrix ** A_spar_patt, sparse_matrix **A_spar_patt_full, + sparse_matrix ** A_app_inv, const real filter ) { int i, pj, size; - real min, max, threshold, val; - - min = 0.0; - max = 0.0; + int left, right, k, p, turn; + real pivot, tmp; + real threshold; + real *list; if ( *A_spar_patt == NULL ) { @@ -304,34 +266,87 @@ void setup_sparse_approx_inverse( const sparse_matrix * const A, sparse_matrix * } } - /* find min and max elements of matrix */ - for ( i = 0; i < A->n; ++i ) + + /* quick-select algorithm for finding the kth greatest element in the matrix*/ + /* list: values from the matrix*/ + /* left-right: search space of the quick-select */ + + list = (real *) smalloc( sizeof(real) * (A->start[A->n]),"Sparse_Approx_Inverse::list" ); + + left = 0; + right = A->start[A->n] - 1; + k = (int)( (A->start[A->n])*filter ); + threshold = 0.0; + + for( i = left; i <= right ; ++i ) { - for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) + list[i] = A->val[i]; + if(list[i] < 0.0) { - val = A->val[pj]; - if ( pj == 0 ) - { - min = val; - max = val; - } - else + list[i] = -list[i]; + } + } + + turn = 0; + while( k ) { + + p = left; + turn = 1 - turn; + if( turn == 1) + { + pivot = list[right]; + } + else + { + pivot = list[left]; + } + for( i = left + 1 - turn; i <= right-turn; ++i ) + { + if( list[i] > pivot ) { - if ( min > val ) - { - min = val; - } - if ( max < val ) - { - max = val; - } + tmp = list[i]; + list[i] = list[p]; + list[p] = tmp; + p++; } } + if(turn == 1) + { + tmp = list[p]; + list[p] = list[right]; + list[right] = tmp; + } + else + { + tmp = list[p]; + list[p] = list[left]; + list[left] = tmp; + } + + if( p == k - 1) + { + threshold = list[p]; + break; + } + else if( p > k - 1 ) + { + right = p - 1; + } + else + { + left = p + 1; + } + } + + if(threshold < 1.000000) + { + threshold = 1.000001; } - threshold = min + (max - min) * (1.0 - filter); + sfree( list, "setup_sparse_approx_inverse::list" ); /* fill sparsity pattern */ + /* diagonal entries are always included */ for ( size = 0, i = 0; i < A->n; ++i ) { (*A_spar_patt)->start[i] = size; @@ -352,7 +367,7 @@ void setup_sparse_approx_inverse( const sparse_matrix * const A, sparse_matrix * compute_full_sparse_matrix( *A_spar_patt, A_spar_patt_full ); /* A_app_inv has the same sparsity pattern - * as A_spar_patt_full (omit non-zero values) */ + * * as A_spar_patt_full (omit non-zero values) */ if ( Allocate_Matrix( A_app_inv, (*A_spar_patt_full)->n, (*A_spar_patt_full)->m ) == FAILURE ) { fprintf( stderr, "not enough memory for approximate inverse matrix. terminating.\n" ); @@ -360,6 +375,7 @@ void setup_sparse_approx_inverse( const sparse_matrix * const A, sparse_matrix * } } + void Calculate_Droptol( const sparse_matrix * const A, real * const droptol, const real dtol ) { @@ -379,8 +395,6 @@ void Calculate_Droptol( const sparse_matrix * const A, #pragma omp master { - /* keep b_local for program duration to avoid allocate/free - * overhead per Sparse_MatVec call*/ if ( droptol_local == NULL ) { droptol_local = (real*) smalloc( omp_get_num_threads() * A->n * sizeof(real), @@ -1622,11 +1636,23 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) /* Compute M^{1} \approx A which minimizes + * \sum_{j=1}^{N} ||e_j - Am_j||_2^2 + * where: e_j is the j-th column of the NxN identify matrix, + * m_j is the j-th column of the NxN approximate sparse matrix M + * + * Internally, use LAPACKE to solve the least-squares problems * * A: symmetric, sparse matrix, stored in full CSR format * A_spar_patt: sparse matrix used as template sparsity pattern * for approximating the inverse, stored in full CSR format - * A_app_inv: approximate inverse to A, stored in full CSR format (result) */ + * A_app_inv: approximate inverse to A, stored in full CSR format (result) + * + * Reference: + * Michele Benzi et al. + * A Comparative Study of Sparse Approximate Inverse + * Preconditioners + * Applied Numerical Mathematics 30, 1999 + * */ real sparse_approx_inverse( const sparse_matrix * const A, const sparse_matrix * const A_spar_patt, sparse_matrix ** A_app_inv ) @@ -1719,7 +1745,7 @@ real sparse_approx_inverse( const sparse_matrix * const A, } // change the value if any of the column indices is seen for ( d_j = A->start[pos_x[d_i]]; - d_j < A->start[pos_x[d_i + 1]]; ++d_j ) + d_j < A->start[pos_x[d_i] + 1]; ++d_j ) { if ( Y[A->j[d_j]] == 1 ) { @@ -1794,12 +1820,13 @@ real sparse_approx_inverse( const sparse_matrix * const A, /* sparse matrix-vector product Ax = b - * where: - * A: lower triangular matrix, stored in CSR format - * x: vector - * b: vector (result) */ -static void Sparse_MatVec( const sparse_matrix * const A, - const real * const x, real * const b ) + * + * workspace: storage container for workspace structures + * A: lower triangular matrix, stored in CSR format + * x: vector + * b (output): vector */ +static void Sparse_MatVec( const static_storage * const workspace, + const sparse_matrix * const A, const real * const x, real * const b ) { int i, j, k, n, si, ei; real H; @@ -1813,18 +1840,7 @@ static void Sparse_MatVec( const sparse_matrix * const A, #ifdef _OPENMP tid = omp_get_thread_num( ); - #pragma omp single - { - /* keep b_local for program duration to avoid allocate/free - * overhead per Sparse_MatVec call*/ - if ( b_local == NULL ) - { - b_local = (real*) smalloc( omp_get_num_threads() * n * sizeof(real), - "Sparse_MatVec::b_local" ); - } - } - - Vector_MakeZero( (real * const)b_local, omp_get_num_threads() * n ); + Vector_MakeZero( workspace->b_local, omp_get_num_threads() * n ); #pragma omp for schedule(static) #endif @@ -1838,8 +1854,8 @@ static void Sparse_MatVec( const sparse_matrix * const A, j = A->j[k]; H = A->val[k]; #ifdef _OPENMP - b_local[tid * n + j] += H * x[i]; - b_local[tid * n + i] += H * x[j]; + workspace->b_local[tid * n + j] += H * x[i]; + workspace->b_local[tid * n + i] += H * x[j]; #else b[j] += H * x[i]; b[i] += H * x[j]; @@ -1848,7 +1864,7 @@ static void Sparse_MatVec( const sparse_matrix * const A, // the diagonal entry is the last one in #ifdef _OPENMP - b_local[tid * n + i] += A->val[k] * x[i]; + workspace->b_local[tid * n + i] += A->val[k] * x[i]; #else b[i] += A->val[k] * x[i]; #endif @@ -1860,7 +1876,7 @@ static void Sparse_MatVec( const sparse_matrix * const A, { for ( j = 0; j < omp_get_num_threads(); ++j ) { - b[i] += b_local[j * n + i]; + b[i] += workspace->b_local[j * n + i]; } } #endif @@ -2042,6 +2058,7 @@ void tri_solve( const sparse_matrix * const LU, const real * const y, /* Solve triangular system LU*x = y using level scheduling * + * workspace: storage container for workspace structures * LU: lower/upper triangular, stored in CSR * y: constants in linear system (RHS) * x: solution @@ -2052,53 +2069,38 @@ void tri_solve( const sparse_matrix * const LU, const real * const y, * Assumptions: * LU has non-zero diagonals * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ -void tri_solve_level_sched( const sparse_matrix * const LU, - const real * const y, real * const x, const int N, - const TRIANGULARITY tri, int find_levels ) +void tri_solve_level_sched( static_storage * workspace, + const sparse_matrix * const LU, + const real * const y, real * const x, const int N, + const TRIANGULARITY tri, int find_levels ) { int i, j, pj, local_row, local_level; + unsigned int *row_levels, *level_rows, *level_rows_cnt; + int levels; + + if ( tri == LOWER ) + { + row_levels = workspace->row_levels_L; + level_rows = workspace->level_rows_L; + level_rows_cnt = workspace->level_rows_cnt_L; + } + else + { + row_levels = workspace->row_levels_U; + level_rows = workspace->level_rows_U; + level_rows_cnt = workspace->level_rows_cnt_U; + } #ifdef _OPENMP #pragma omp single #endif { - if ( tri == LOWER ) - { - row_levels = row_levels_L; - level_rows = level_rows_L; - level_rows_cnt = level_rows_cnt_L; - levels = levels_L; - } - else - { - row_levels = row_levels_U; - level_rows = level_rows_U; - level_rows_cnt = level_rows_cnt_U; - levels = levels_U; - } - - if ( row_levels == NULL || level_rows == NULL || level_rows_cnt == NULL ) - { - row_levels = (unsigned int*) smalloc( (size_t)N * sizeof(unsigned int), - "tri_solve_level_sched::row_levels" ); - level_rows = (unsigned int*) smalloc( (size_t)N * sizeof(unsigned int), - "tri_solve_level_sched::level_rows" ); - level_rows_cnt = (unsigned int*) smalloc( (size_t)(N + 1) * sizeof(unsigned int), - "tri_solve_level_sched::level_rows_cnt" ); - } - - if ( top == NULL ) - { - top = (unsigned int*) smalloc( (size_t)(N + 1) * sizeof(unsigned int), - "tri_solve_level_sched::top" ); - } - /* find levels (row dependencies in substitutions) */ if ( find_levels == TRUE ) { memset( row_levels, 0, N * sizeof(unsigned int) ); memset( level_rows_cnt, 0, N * sizeof(unsigned int) ); - memset( top, 0, N * sizeof(unsigned int) ); + memset( workspace->top, 0, N * sizeof(unsigned int) ); levels = 1; if ( tri == LOWER ) @@ -2116,6 +2118,8 @@ void tri_solve_level_sched( const sparse_matrix * const LU, ++level_rows_cnt[local_level]; } + workspace->levels_L = levels; + //#if defined(DEBUG) fprintf(stderr, "levels(L): %d\n", levels); fprintf(stderr, "NNZ(L): %d\n", LU->start[N]); @@ -2136,6 +2140,8 @@ void tri_solve_level_sched( const sparse_matrix * const LU, ++level_rows_cnt[local_level]; } + workspace->levels_U = levels; + //#if defined(DEBUG) fprintf(stderr, "levels(U): %d\n", levels); fprintf(stderr, "NNZ(U): %d\n", LU->start[N]); @@ -2145,17 +2151,26 @@ void tri_solve_level_sched( const sparse_matrix * const LU, for ( i = 1; i < levels + 1; ++i ) { level_rows_cnt[i] += level_rows_cnt[i - 1]; - top[i] = level_rows_cnt[i]; + workspace->top[i] = level_rows_cnt[i]; } for ( i = 0; i < N; ++i ) { - level_rows[top[row_levels[i] - 1]] = i; - ++top[row_levels[i] - 1]; + level_rows[workspace->top[row_levels[i] - 1]] = i; + ++workspace->top[row_levels[i] - 1]; } } } + if ( tri == LOWER ) + { + levels = workspace->levels_L; + } + else + { + levels = workspace->levels_U; + } + /* perform substitutions by level */ if ( tri == LOWER ) { @@ -2171,7 +2186,6 @@ void tri_solve_level_sched( const sparse_matrix * const LU, for ( pj = LU->start[local_row]; pj < LU->start[local_row + 1] - 1; ++pj ) { x[local_row] -= LU->val[pj] * x[LU->j[pj]]; - } x[local_row] /= LU->val[pj]; } @@ -2191,79 +2205,18 @@ void tri_solve_level_sched( const sparse_matrix * const LU, for ( pj = LU->start[local_row] + 1; pj < LU->start[local_row + 1]; ++pj ) { x[local_row] -= LU->val[pj] * x[LU->j[pj]]; - } x[local_row] /= LU->val[LU->start[local_row]]; } } } - -#ifdef _OPENMP - #pragma omp single -#endif - { - /* save level info for re-use if performing repeated triangular solves via preconditioning */ - if ( tri == LOWER ) - { - row_levels_L = row_levels; - level_rows_L = level_rows; - level_rows_cnt_L = level_rows_cnt; - levels_L = levels; - } - else - { - row_levels_U = row_levels; - level_rows_U = level_rows; - level_rows_cnt_U = level_rows_cnt; - levels_U = levels; - } - } -} - - -static void compute_H_full( const sparse_matrix * const H ) -{ - int count, i, pj; - sparse_matrix *H_t; - - if ( Allocate_Matrix( &H_t, H->n, H->m ) == FAILURE ) - { - fprintf( stderr, "not enough memory for full H. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } - - /* Set up the sparse matrix data structure for A. */ - Transpose( H, H_t ); - - count = 0; - for ( i = 0; i < H->n; ++i ) - { - H_full->start[i] = count; - - /* H: symmetric, lower triangular portion only stored */ - for ( pj = H->start[i]; pj < H->start[i + 1]; ++pj ) - { - H_full->val[count] = H->val[pj]; - H_full->j[count] = H->j[pj]; - ++count; - } - /* H^T: symmetric, upper triangular portion only stored; - * skip diagonal from H^T, as included from H above */ - for ( pj = H_t->start[i] + 1; pj < H_t->start[i + 1]; ++pj ) - { - H_full->val[count] = H_t->val[pj]; - H_full->j[count] = H_t->j[pj]; - ++count; - } - } - H_full->start[i] = count; - - Deallocate_Matrix( H_t ); } /* Iterative greedy shared-memory parallel graph coloring * + * control: container for control info + * workspace: storage container for workspace structures * A: matrix to use for coloring, stored in CSR format; * rows represent vertices, columns of entries within a row represent adjacent vertices * (i.e., dependent rows for elimination during LU factorization) @@ -2276,31 +2229,40 @@ static void compute_H_full( const sparse_matrix * const H ) * and Massively Threaded Architectures * Parallel Computing, 2012 */ -void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) +void graph_coloring( const control_params * const control, + static_storage * workspace, + const sparse_matrix * const A, const TRIANGULARITY tri ) { #ifdef _OPENMP #pragma omp parallel #endif { -#define MAX_COLOR (500) int i, pj, v; unsigned int temp, recolor_cnt_local, *conflict_local; - int tid, num_thread, *fb_color; + int tid, *fb_color; + unsigned int *p_to_color, *p_conflict, *p_temp; #ifdef _OPENMP - tid = omp_get_thread_num(); - num_thread = omp_get_num_threads(); + tid = omp_get_thread_num( ); #else tid = 0; - num_thread = 1; #endif + p_to_color = workspace->to_color; + p_conflict = workspace->conflict; + +#ifdef _OPENMP + #pragma omp for schedule(static) +#endif + for ( i = 0; i < A->n; ++i ) + { + workspace->color[i] = 0; + } #ifdef _OPENMP #pragma omp single #endif { - memset( color, 0, sizeof(unsigned int) * A->n ); - recolor_cnt = A->n; + workspace->recolor_cnt = A->n; } /* ordering of vertices to color depends on triangularity of factor @@ -2312,7 +2274,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) #endif for ( i = 0; i < A->n; ++i ) { - to_color[i] = i; + p_to_color[i] = i; } } else @@ -2322,57 +2284,56 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) #endif for ( i = 0; i < A->n; ++i ) { - to_color[i] = A->n - 1 - i; + p_to_color[i] = A->n - 1 - i; } } - fb_color = (int*) smalloc( sizeof(int) * MAX_COLOR, - "graph_coloring::fb_color" ); + fb_color = (int*) smalloc( sizeof(int) * A->n, + "graph_coloring::fb_color" ); conflict_local = (unsigned int*) smalloc( sizeof(unsigned int) * A->n, - "graph_coloring::fb_color" ); - -#ifdef _OPENMP - #pragma omp barrier -#endif + "graph_coloring::fb_color" ); - while ( recolor_cnt > 0 ) + while ( workspace->recolor_cnt > 0 ) { - memset( fb_color, -1, sizeof(int) * MAX_COLOR ); + memset( fb_color, -1, sizeof(int) * A->n ); /* color vertices */ #ifdef _OPENMP #pragma omp for schedule(static) #endif - for ( i = 0; i < recolor_cnt; ++i ) + for ( i = 0; i < workspace->recolor_cnt; ++i ) { - v = to_color[i]; + v = p_to_color[i]; /* colors of adjacent vertices are forbidden */ for ( pj = A->start[v]; pj < A->start[v + 1]; ++pj ) { if ( v != A->j[pj] ) { - fb_color[color[A->j[pj]]] = v; + fb_color[workspace->color[A->j[pj]]] = v; } } /* search for min. color which is not in conflict with adjacent vertices; * start at 1 since 0 is default (invalid) color for all vertices */ - for ( pj = 1; fb_color[pj] == v; ++pj ); + for ( pj = 1; fb_color[pj] == v; ++pj ) + ; /* assign discovered color (no conflict in neighborhood of adjacent vertices) */ - color[v] = pj; + workspace->color[v] = pj; } /* determine if recoloring required */ - temp = recolor_cnt; + temp = workspace->recolor_cnt; recolor_cnt_local = 0; #ifdef _OPENMP + #pragma omp barrier + #pragma omp single #endif { - recolor_cnt = 0; + workspace->recolor_cnt = 0; } #ifdef _OPENMP @@ -2380,12 +2341,12 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) #endif for ( i = 0; i < temp; ++i ) { - v = to_color[i]; + v = p_to_color[i]; /* search for color conflicts with adjacent vertices */ for ( pj = A->start[v]; pj < A->start[v + 1]; ++pj ) { - if ( color[v] == color[A->j[pj]] && v > A->j[pj] ) + if ( workspace->color[v] == workspace->color[A->j[pj]] && v > A->j[pj] ) { conflict_local[recolor_cnt_local] = v; ++recolor_cnt_local; @@ -2395,134 +2356,107 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) } /* count thread-local conflicts and compute offsets for copying into shared buffer */ - conflict_cnt[tid + 1] = recolor_cnt_local; + workspace->conflict_cnt[tid + 1] = recolor_cnt_local; #ifdef _OPENMP #pragma omp barrier - #pragma omp master + #pragma omp single #endif { - conflict_cnt[0] = 0; - for ( i = 1; i < num_thread + 1; ++i ) + workspace->conflict_cnt[0] = 0; + for ( i = 1; i < control->num_threads + 1; ++i ) { - conflict_cnt[i] += conflict_cnt[i - 1]; + workspace->conflict_cnt[i] += workspace->conflict_cnt[i - 1]; } - recolor_cnt = conflict_cnt[num_thread]; + workspace->recolor_cnt = workspace->conflict_cnt[control->num_threads]; } -#ifdef _OPENMP - #pragma omp barrier -#endif - /* copy thread-local conflicts into shared buffer */ for ( i = 0; i < recolor_cnt_local; ++i ) { - conflict[conflict_cnt[tid] + i] = conflict_local[i]; - color[conflict_local[i]] = 0; + p_conflict[workspace->conflict_cnt[tid] + i] = conflict_local[i]; + workspace->color[conflict_local[i]] = 0; } #ifdef _OPENMP #pragma omp barrier - - #pragma omp single #endif - { - temp_ptr = to_color; - to_color = conflict; - conflict = temp_ptr; - } + p_temp = p_to_color; + p_to_color = p_conflict; + p_conflict = p_temp; } sfree( conflict_local, "graph_coloring::conflict_local" ); sfree( fb_color, "graph_coloring::fb_color" ); - - //#if defined(DEBUG) - //#ifdef _OPENMP - // #pragma omp master - //#endif - // { - // for ( i = 0; i < A->n; ++i ) - // printf("Vertex: %5d, Color: %5d\n", i, color[i] ); - // } - //#endif - -#ifdef _OPENMP - #pragma omp barrier -#endif } } -/* Sort coloring +/* Sort rows by coloring * + * workspace: storage container for workspace structures * n: number of entries in coloring * tri: coloring to triangular factor to use (lower/upper) */ -void sort_colors( const unsigned int n, const TRIANGULARITY tri ) +void sort_rows_by_colors( const static_storage * const workspace, + const unsigned int n, const TRIANGULARITY tri ) { unsigned int i; - memset( color_top, 0, sizeof(unsigned int) * (n + 1) ); + memset( workspace->color_top, 0, sizeof(unsigned int) * (n + 1) ); /* sort vertices by color (ascending within a color) * 1) count colors * 2) determine offsets of color ranges - * 3) sort by color + * 3) sort rows by color * * note: color is 1-based */ for ( i = 0; i < n; ++i ) { - ++color_top[color[i]]; + ++workspace->color_top[workspace->color[i]]; } for ( i = 1; i < n + 1; ++i ) { - color_top[i] += color_top[i - 1]; + workspace->color_top[i] += workspace->color_top[i - 1]; } for ( i = 0; i < n; ++i ) { - permuted_row_col[color_top[color[i] - 1]] = i; - ++color_top[color[i] - 1]; + workspace->permuted_row_col[workspace->color_top[workspace->color[i] - 1]] = i; + ++workspace->color_top[workspace->color[i] - 1]; } /* invert mapping to get map from current row/column to permuted (new) row/column */ for ( i = 0; i < n; ++i ) { - permuted_row_col_inv[permuted_row_col[i]] = i; + workspace->permuted_row_col_inv[workspace->permuted_row_col[i]] = i; } } /* Apply permutation Q^T*x or Q*x based on graph coloring * + * workspace: storage container for workspace structures * color: vertex color (1-based); vertices represent matrix rows/columns * x: vector to permute (in-place) * n: number of entries in x * invert_map: if TRUE, use Q^T, otherwise use Q * tri: coloring to triangular factor to use (lower/upper) */ -static void permute_vector( real * const x, const unsigned int n, const int invert_map, - const TRIANGULARITY tri ) +static void permute_vector( const static_storage * const workspace, + real * const x, const unsigned int n, const int invert_map, + const TRIANGULARITY tri ) { unsigned int i; + unsigned int *mapping; -#ifdef _OPENMP - #pragma omp single -#endif + if ( invert_map == TRUE ) { - if ( x_p == NULL ) - { - x_p = (real*) smalloc( sizeof(real) * n, "permute_vector::x_p" ); - } - - if ( invert_map == TRUE ) - { - mapping = permuted_row_col_inv; - } - else - { - mapping = permuted_row_col; - } + mapping = workspace->permuted_row_col_inv; + } + else + { + mapping = workspace->permuted_row_col; } #ifdef _OPENMP @@ -2530,25 +2464,28 @@ static void permute_vector( real * const x, const unsigned int n, const int inve #endif for ( i = 0; i < n; ++i ) { - x_p[i] = x[mapping[i]]; + workspace->x_p[i] = x[mapping[i]]; } #ifdef _OPENMP - #pragma omp single + #pragma omp for schedule(static) #endif + for ( i = 0; i < n; ++i ) { - memcpy( x, x_p, sizeof(real) * n ); + x[i] = workspace->x_p[i]; } } /* Apply permutation Q^T*(LU)*Q based on graph coloring * + * workspace: storage container for workspace structures * color: vertex color (1-based); vertices represent matrix rows/columns * LU: matrix to permute, stored in CSR format * tri: triangularity of LU (lower/upper) */ -void permute_matrix( sparse_matrix * const LU, const TRIANGULARITY tri ) +void permute_matrix( const static_storage * const workspace, + sparse_matrix * const LU, const TRIANGULARITY tri ) { int i, pj, nr, nc; sparse_matrix *LUtemp; @@ -2560,26 +2497,26 @@ void permute_matrix( sparse_matrix * const LU, const TRIANGULARITY tri ) } /* count nonzeros in each row of permuted factor (re-use color_top for counting) */ - memset( color_top, 0, sizeof(unsigned int) * (LU->n + 1) ); + memset( workspace->color_top, 0, sizeof(unsigned int) * (LU->n + 1) ); if ( tri == LOWER ) { for ( i = 0; i < LU->n; ++i ) { - nr = permuted_row_col_inv[i]; + nr = workspace->permuted_row_col_inv[i]; for ( pj = LU->start[i]; pj < LU->start[i + 1]; ++pj ) { - nc = permuted_row_col_inv[LU->j[pj]]; + nc = workspace->permuted_row_col_inv[LU->j[pj]]; if ( nc <= nr ) { - ++color_top[nr + 1]; + ++workspace->color_top[nr + 1]; } /* correct entries to maintain triangularity (lower) */ else { - ++color_top[nc + 1]; + ++workspace->color_top[nc + 1]; } } } @@ -2588,20 +2525,20 @@ void permute_matrix( sparse_matrix * const LU, const TRIANGULARITY tri ) { for ( i = LU->n - 1; i >= 0; --i ) { - nr = permuted_row_col_inv[i]; + nr = workspace->permuted_row_col_inv[i]; for ( pj = LU->start[i]; pj < LU->start[i + 1]; ++pj ) { - nc = permuted_row_col_inv[LU->j[pj]]; + nc = workspace->permuted_row_col_inv[LU->j[pj]]; if ( nc >= nr ) { - ++color_top[nr + 1]; + ++workspace->color_top[nr + 1]; } /* correct entries to maintain triangularity (upper) */ else { - ++color_top[nc + 1]; + ++workspace->color_top[nc + 1]; } } } @@ -2609,34 +2546,34 @@ void permute_matrix( sparse_matrix * const LU, const TRIANGULARITY tri ) for ( i = 1; i < LU->n + 1; ++i ) { - color_top[i] += color_top[i - 1]; + workspace->color_top[i] += workspace->color_top[i - 1]; } - memcpy( LUtemp->start, color_top, sizeof(unsigned int) * (LU->n + 1) ); + memcpy( LUtemp->start, workspace->color_top, sizeof(unsigned int) * (LU->n + 1) ); /* permute factor */ if ( tri == LOWER ) { for ( i = 0; i < LU->n; ++i ) { - nr = permuted_row_col_inv[i]; + nr = workspace->permuted_row_col_inv[i]; for ( pj = LU->start[i]; pj < LU->start[i + 1]; ++pj ) { - nc = permuted_row_col_inv[LU->j[pj]]; + nc = workspace->permuted_row_col_inv[LU->j[pj]]; if ( nc <= nr ) { - LUtemp->j[color_top[nr]] = nc; - LUtemp->val[color_top[nr]] = LU->val[pj]; - ++color_top[nr]; + LUtemp->j[workspace->color_top[nr]] = nc; + LUtemp->val[workspace->color_top[nr]] = LU->val[pj]; + ++workspace->color_top[nr]; } /* correct entries to maintain triangularity (lower) */ else { - LUtemp->j[color_top[nc]] = nr; - LUtemp->val[color_top[nc]] = LU->val[pj]; - ++color_top[nc]; + LUtemp->j[workspace->color_top[nc]] = nr; + LUtemp->val[workspace->color_top[nc]] = LU->val[pj]; + ++workspace->color_top[nc]; } } } @@ -2645,24 +2582,24 @@ void permute_matrix( sparse_matrix * const LU, const TRIANGULARITY tri ) { for ( i = LU->n - 1; i >= 0; --i ) { - nr = permuted_row_col_inv[i]; + nr = workspace->permuted_row_col_inv[i]; for ( pj = LU->start[i]; pj < LU->start[i + 1]; ++pj ) { - nc = permuted_row_col_inv[LU->j[pj]]; + nc = workspace->permuted_row_col_inv[LU->j[pj]]; if ( nc >= nr ) { - LUtemp->j[color_top[nr]] = nc; - LUtemp->val[color_top[nr]] = LU->val[pj]; - ++color_top[nr]; + LUtemp->j[workspace->color_top[nr]] = nc; + LUtemp->val[workspace->color_top[nr]] = LU->val[pj]; + ++workspace->color_top[nr]; } /* correct entries to maintain triangularity (upper) */ else { - LUtemp->j[color_top[nc]] = nr; - LUtemp->val[color_top[nc]] = LU->val[pj]; - ++color_top[nc]; + LUtemp->j[workspace->color_top[nc]] = nr; + LUtemp->val[workspace->color_top[nc]] = LU->val[pj]; + ++workspace->color_top[nc]; } } } @@ -2680,62 +2617,43 @@ void permute_matrix( sparse_matrix * const LU, const TRIANGULARITY tri ) * used for preconditioning (incomplete factorizations computed based on * permuted H) * + * control: container for control info + * workspace: storage container for workspace structures * H: symmetric, lower triangular portion only, stored in CSR format; - * H is permuted in-place + * H_full: symmetric, stored in CSR format; + * H_p (output): permuted copy of H based on coloring, lower half stored, CSR format */ -sparse_matrix * setup_graph_coloring( sparse_matrix * const H ) +void setup_graph_coloring( const control_params * const control, + const static_storage * const workspace, const sparse_matrix * const H, + sparse_matrix ** H_full, sparse_matrix ** H_p ) { - int num_thread; - - if ( color == NULL ) + if ( *H_p == NULL ) { -#ifdef _OPENMP - #pragma omp parallel + if ( Allocate_Matrix( H_p, H->n, H->m ) == FAILURE ) { - num_thread = omp_get_num_threads(); + fprintf( stderr, "[ERROR] not enough memory for graph coloring. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); } -#else - num_thread = 1; -#endif - - /* internal storage for graph coloring (global to facilitate simultaneous access to OpenMP threads) */ - color = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, - "setup_graph_coloring::color" ); - to_color = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, - "setup_graph_coloring::to_color" ); - conflict = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, - "setup_graph_coloring::conflict" ); - conflict_cnt = (unsigned int*) smalloc( sizeof(unsigned int) * (num_thread + 1), - "setup_graph_coloring::conflict_cnt" ); - recolor = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, - "setup_graph_coloring::recolor" ); - color_top = (unsigned int*) smalloc( sizeof(unsigned int) * (H->n + 1), - "setup_graph_coloring::color_top" ); - permuted_row_col = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, - "setup_graph_coloring::premuted_row_col" ); - permuted_row_col_inv = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, - "setup_graph_coloring::premuted_row_col_inv" ); - y_p = (real*) smalloc( sizeof(real) * H->n, - "setup_graph_coloring::y_p" ); - if ( (Allocate_Matrix( &H_p, H->n, H->m ) == FAILURE ) || - (Allocate_Matrix( &H_full, H->n, 2 * H->m - H->n ) == FAILURE ) ) - { - fprintf( stderr, "not enough memory for graph coloring. terminating.\n" ); + } + else if ( (*H_p)->m < H->m ) + { + Deallocate_Matrix( *H_p ); + if ( Allocate_Matrix( H_p, H->n, H->m ) == FAILURE ) + { + fprintf( stderr, "[ERROR] not enough memory for graph coloring. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); } } - compute_H_full( H ); + compute_full_sparse_matrix( H, H_full ); - graph_coloring( H_full, LOWER ); - sort_colors( H_full->n, LOWER ); + graph_coloring( control, (static_storage *) workspace, *H_full, LOWER ); + sort_rows_by_colors( workspace, (*H_full)->n, LOWER ); - memcpy( H_p->start, H->start, sizeof(int) * (H->n + 1) ); - memcpy( H_p->j, H->j, sizeof(int) * (H->start[H->n]) ); - memcpy( H_p->val, H->val, sizeof(real) * (H->start[H->n]) ); - permute_matrix( H_p, LOWER ); - - return H_p; + memcpy( (*H_p)->start, H->start, sizeof(int) * (H->n + 1) ); + memcpy( (*H_p)->j, H->j, sizeof(int) * (H->start[H->n]) ); + memcpy( (*H_p)->val, H->val, sizeof(real) * (H->start[H->n]) ); + permute_matrix( workspace, (*H_p), LOWER ); } @@ -2749,36 +2667,31 @@ sparse_matrix * setup_graph_coloring( sparse_matrix * const H ) * triangular factors in iterative linear solvers * * Note: Newmann series arises from series expansion of the inverse of - * the coefficient matrix in the triangular system */ -void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, - const real * const b, real * const x, const TRIANGULARITY tri, const - unsigned int maxiter ) + * the coefficient matrix in the triangular system + * + * workspace: storage container for workspace structures + * R: + * Dinv: + * b: + * x (output): + * tri: + * maxiter: + * */ +void jacobi_iter( const static_storage * const workspace, + const sparse_matrix * const R, const real * const Dinv, + const real * const b, real * const x, const TRIANGULARITY tri, + const unsigned int maxiter ) { unsigned int i, k, si, ei, iter; + real *p1, *p2, *p3; si = 0; ei = 0; iter = 0; + p1 = workspace->rp; + p2 = workspace->rp2; -#ifdef _OPENMP - #pragma omp single -#endif - { - if ( Dinv_b == NULL ) - { - Dinv_b = (real*) smalloc( sizeof(real) * R->n, "jacobi_iter::Dinv_b" ); - } - if ( rp == NULL ) - { - rp = (real*) smalloc( sizeof(real) * R->n, "jacobi_iter::rp" ); - } - if ( rp2 == NULL ) - { - rp2 = (real*) smalloc( sizeof(real) * R->n, "jacobi_iter::rp2" ); - } - } - - Vector_MakeZero( rp, R->n ); + Vector_MakeZero( p1, R->n ); /* precompute and cache, as invariant in loop below */ #ifdef _OPENMP @@ -2786,12 +2699,12 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, #endif for ( i = 0; i < R->n; ++i ) { - Dinv_b[i] = Dinv[i] * b[i]; + workspace->Dinv_b[i] = Dinv[i] * b[i]; } do { - // x_{k+1} = G*x_{k} + Dinv*b; + /* x_{k+1} = G*x_{k} + Dinv*b */ #ifdef _OPENMP #pragma omp for schedule(guided) #endif @@ -2809,31 +2722,26 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, ei = R->start[i + 1]; } - rp2[i] = 0.; + p2[i] = 0.; for ( k = si; k < ei; ++k ) { - rp2[i] += R->val[k] * rp[R->j[k]]; + p2[i] += R->val[k] * p1[R->j[k]]; } - rp2[i] *= -Dinv[i]; - rp2[i] += Dinv_b[i]; + p2[i] *= -Dinv[i]; + p2[i] += workspace->Dinv_b[i]; } -#ifdef _OPENMP - #pragma omp single -#endif - { - rp3 = rp; - rp = rp2; - rp2 = rp3; - } + p3 = p1; + p1 = p2; + p2 = p3; ++iter; } while ( iter < maxiter ); - Vector_Copy( x, rp, R->n ); + Vector_Copy( x, p1, R->n ); } @@ -2901,7 +2809,8 @@ static void apply_preconditioner( const static_storage * const workspace, const case ICHOLT_PC: case ILU_PAR_PC: case ILUT_PAR_PC: - tri_solve_level_sched( workspace->L, y, x, workspace->L->n, LOWER, fresh_pre ); + tri_solve_level_sched( (static_storage *) workspace, + workspace->L, y, x, workspace->L->n, LOWER, fresh_pre ); break; case SAI_PC: Sparse_MatVec_full( workspace->H_app_inv, y, x ); @@ -2924,15 +2833,17 @@ static void apply_preconditioner( const static_storage * const workspace, const case ILU_PAR_PC: case ILUT_PAR_PC: #ifdef _OPENMP - #pragma omp single + #pragma omp for schedule(static) #endif - { - memcpy( y_p, y, sizeof(real) * workspace->H->n ); - } + for ( i = 0; i < workspace->H->n; ++i ) + { + workspace->y_p[i] = y[i]; + } - permute_vector( y_p, workspace->H->n, FALSE, LOWER ); - tri_solve_level_sched( workspace->L, y_p, x, workspace->L->n, LOWER, fresh_pre ); - break; + permute_vector( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER ); + tri_solve_level_sched( (static_storage *) workspace, + workspace->L, workspace->y_p, x, workspace->L->n, LOWER, fresh_pre ); + break; default: fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); @@ -2950,18 +2861,7 @@ static void apply_preconditioner( const static_storage * const workspace, const case ICHOLT_PC: case ILU_PAR_PC: case ILUT_PAR_PC: -#ifdef _OPENMP - #pragma omp single -#endif - { - if ( Dinv_L == NULL ) - { - Dinv_L = (real*) smalloc( sizeof(real) * workspace->L->n, - "apply_preconditioner::Dinv_L" ); - } - } - - /* construct D^{-1}_L */ + /* construct D^{-1}_L */ if ( fresh_pre == TRUE ) { #ifdef _OPENMP @@ -2970,12 +2870,13 @@ static void apply_preconditioner( const static_storage * const workspace, const for ( i = 0; i < workspace->L->n; ++i ) { si = workspace->L->start[i + 1] - 1; - Dinv_L[i] = 1.0 / workspace->L->val[si]; + workspace->Dinv_L[i] = 1.0 / workspace->L->val[si]; } } - jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters ); - break; + jacobi_iter( workspace, workspace->L, workspace->Dinv_L, + y, x, LOWER, control->cm_solver_pre_app_jacobi_iters ); + break; default: fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); @@ -3027,7 +2928,8 @@ static void apply_preconditioner( const static_storage * const workspace, const case ICHOLT_PC: case ILU_PAR_PC: case ILUT_PAR_PC: - tri_solve_level_sched( workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); + tri_solve_level_sched( (static_storage *) workspace, + workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); break; default: fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); @@ -3046,9 +2948,10 @@ static void apply_preconditioner( const static_storage * const workspace, const case ICHOLT_PC: case ILU_PAR_PC: case ILUT_PAR_PC: - tri_solve_level_sched( workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); - permute_vector( x, workspace->H->n, TRUE, UPPER ); - break; + tri_solve_level_sched( (static_storage *) workspace, + workspace->U, y, x, workspace->U->n, UPPER, fresh_pre ); + permute_vector( workspace, x, workspace->H->n, TRUE, UPPER ); + break; default: fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); @@ -3066,32 +2969,22 @@ static void apply_preconditioner( const static_storage * const workspace, const case ICHOLT_PC: case ILU_PAR_PC: case ILUT_PAR_PC: -#ifdef _OPENMP - #pragma omp single -#endif - { - if ( Dinv_U == NULL ) - { - Dinv_U = (real*) smalloc( sizeof(real) * workspace->U->n, - "apply_preconditioner::Dinv_U" ); - } - } - /* construct D^{-1}_U */ - if ( fresh_pre == TRUE ) - { + if ( fresh_pre == TRUE ) + { #ifdef _OPENMP - #pragma omp for schedule(static) + #pragma omp for schedule(static) #endif - for ( i = 0; i < workspace->U->n; ++i ) - { - si = workspace->U->start[i]; - Dinv_U[i] = 1. / workspace->U->val[si]; + for ( i = 0; i < workspace->U->n; ++i ) + { + si = workspace->U->start[i]; + workspace->Dinv_U[i] = 1.0 / workspace->U->val[si]; + } } - } - jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->cm_solver_pre_app_jacobi_iters ); - break; + jacobi_iter( workspace, workspace->U, workspace->Dinv_U, + y, x, UPPER, control->cm_solver_pre_app_jacobi_iters ); + break; default: fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); @@ -3152,7 +3045,7 @@ int GMRES( const static_storage * const workspace, const control_params * const { /* calculate r0 */ t_start = Get_Time( ); - Sparse_MatVec( H, x, workspace->b_prm ); + Sparse_MatVec( workspace, H, x, workspace->b_prm ); t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3182,7 +3075,7 @@ int GMRES( const static_storage * const workspace, const control_params * const { /* matvec */ t_start = Get_Time( ); - Sparse_MatVec( H, workspace->v[j], workspace->b_prc ); + Sparse_MatVec( workspace, H, workspace->v[j], workspace->b_prc ); t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3300,27 +3193,19 @@ int GMRES( const static_storage * const workspace, const control_params * const } } -#ifdef _OPENMP data->timing.cm_solver_orthog += t_ortho / control->num_threads; data->timing.cm_solver_pre_app += t_pa / control->num_threads; data->timing.cm_solver_spmv += t_spmv / control->num_threads; data->timing.cm_solver_tri_solve += t_ts / control->num_threads; data->timing.cm_solver_vector_ops += t_vops / control->num_threads; -#else - data->timing.cm_solver_orthog += t_ortho; - data->timing.cm_solver_pre_app += t_pa; - data->timing.cm_solver_spmv += t_spmv; - data->timing.cm_solver_tri_solve += t_ts; - data->timing.cm_solver_vector_ops += t_vops; -#endif if ( g_itr >= control->cm_solver_max_iters ) { fprintf( stderr, "[WARNING] GMRES convergence failed (%d outer iters)\n", g_itr ); - return g_itr * control->cm_solver_restart + g_j; + return g_itr * (control->cm_solver_restart + 1) + g_j + 1; } - return g_itr * control->cm_solver_restart + g_j + 1; + return g_itr * (control->cm_solver_restart + 1) + g_j + 1; } @@ -3368,7 +3253,7 @@ int GMRES_HouseHolder( const static_storage * const workspace, { /* compute z = r0 */ t_start = Get_Time( ); - Sparse_MatVec( H, x, workspace->b_prm ); + Sparse_MatVec( workspace, H, x, workspace->b_prm ); t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3409,7 +3294,7 @@ int GMRES_HouseHolder( const static_storage * const workspace, /* matvec */ t_start = Get_Time( ); - Sparse_MatVec( H, z[j], workspace->b_prc ); + Sparse_MatVec( workspace, H, z[j], workspace->b_prc ); t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3546,19 +3431,11 @@ int GMRES_HouseHolder( const static_storage * const workspace, } } -#ifdef _OPENMP data->timing.cm_solver_orthog += t_ortho / control->num_threads; data->timing.cm_solver_pre_app += t_pa / control->num_threads; data->timing.cm_solver_spmv += t_spmv / control->num_threads; data->timing.cm_solver_tri_solve += t_ts / control->num_threads; data->timing.cm_solver_vector_ops += t_vops / control->num_threads; -#else - data->timing.cm_solver_orthog += t_ortho; - data->timing.cm_solver_pre_app += t_pa; - data->timing.cm_solver_spmv += t_spmv; - data->timing.cm_solver_tri_solve += t_ts; - data->timing.cm_solver_vector_ops += t_vops; -#endif if ( g_itr >= control->cm_solver_max_iters ) { @@ -3606,7 +3483,7 @@ int CG( const static_storage * const workspace, const control_params * const con t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); - Sparse_MatVec( H, x, d ); + Sparse_MatVec( workspace, H, x, d ); t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3627,7 +3504,7 @@ int CG( const static_storage * const workspace, const control_params * const con for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i ) { t_start = Get_Time( ); - Sparse_MatVec( H, p, d ); + Sparse_MatVec( workspace, H, p, d ); t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3657,15 +3534,9 @@ int CG( const static_storage * const workspace, const control_params * const con g_itr = i; } -#ifdef _OPENMP data->timing.cm_solver_pre_app += t_pa / control->num_threads; data->timing.cm_solver_spmv += t_spmv / control->num_threads; data->timing.cm_solver_vector_ops += t_vops / control->num_threads; -#else - data->timing.cm_solver_pre_app += t_pa; - data->timing.cm_solver_spmv += t_spmv; - data->timing.cm_solver_vector_ops += t_vops; -#endif if ( g_itr >= control->cm_solver_max_iters ) { @@ -3708,7 +3579,7 @@ int SDM( const static_storage * const workspace, const control_params * const co t_vops += Get_Timing_Info( t_start ); t_start = Get_Time( ); - Sparse_MatVec( H, x, workspace->q ); + Sparse_MatVec( workspace, H, x, workspace->q ); t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3727,7 +3598,7 @@ int SDM( const static_storage * const workspace, const control_params * const co for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / b_norm > tol; ++i ) { t_start = Get_Time( ); - Sparse_MatVec( H, workspace->d, workspace->q ); + Sparse_MatVec( workspace, H, workspace->d, workspace->q ); t_spmv += Get_Timing_Info( t_start ); t_start = Get_Time( ); @@ -3760,15 +3631,9 @@ int SDM( const static_storage * const workspace, const control_params * const co g_itr = i; } -#ifdef _OPENMP data->timing.cm_solver_pre_app += t_pa / control->num_threads; data->timing.cm_solver_spmv += t_spmv / control->num_threads; data->timing.cm_solver_vector_ops += t_vops / control->num_threads; -#else - data->timing.cm_solver_pre_app += t_pa; - data->timing.cm_solver_spmv += t_spmv; - data->timing.cm_solver_vector_ops += t_vops; -#endif if ( g_itr >= control->cm_solver_max_iters ) { diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h index 814701fe06c81a4fcd33530f9a16f70c12abdf5d..ab0237c14281eb31ddabcd41087b00a475d0ccd0 100644 --- a/sPuReMD/src/lin_alg.h +++ b/sPuReMD/src/lin_alg.h @@ -73,15 +73,19 @@ void Transpose_I( sparse_matrix * const ); void tri_solve( const sparse_matrix * const, const real * const, real * const, const int, const TRIANGULARITY ); -void tri_solve_level_sched( const sparse_matrix * const, +void tri_solve_level_sched( static_storage *, + const sparse_matrix * const, const real * const, real * const, const int, const TRIANGULARITY, int ); -void jacobi_iter( const sparse_matrix * const, const real * const, +void jacobi_iter( const static_storage * const, + const sparse_matrix * const, const real * const, const real * const, real * const, const TRIANGULARITY, const unsigned int ); -sparse_matrix * setup_graph_coloring( sparse_matrix * const ); +void setup_graph_coloring( const control_params * const, + const static_storage * const, const sparse_matrix * const, + sparse_matrix **, sparse_matrix ** ); int GMRES( const static_storage * const, const control_params * const, simulation_data * const, const sparse_matrix * const, diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index e46d217c766685ed6c6d6153f83e2679e100b6b0..61de424a6363a79a6ce51f993ebade2ca7d7abbd 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -617,9 +617,7 @@ typedef struct int num_ignored; int ignore[MAX_ATOM_TYPES]; -#ifdef _OPENMP int num_threads; -#endif } control_params; @@ -947,6 +945,7 @@ typedef struct sparse_matrix *H; sparse_matrix *H_full; sparse_matrix *H_sp; + sparse_matrix *H_p; sparse_matrix *H_spar_patt; sparse_matrix *H_spar_patt_full; sparse_matrix *H_app_inv; @@ -971,12 +970,52 @@ typedef struct real **h; real **rn; real **v; + /* CG related storage */ real *r; real *d; real *q; real *p; + /* SpMV related storage */ +#ifdef _OPENMP + real *b_local; +#endif + + /* Level scheduling related storage for applying, e.g. ICHOLT and ILU(T), + * preconditioners */ + int levels_L; + int levels_U; + unsigned int *row_levels_L; + unsigned int *level_rows_L; + unsigned int *level_rows_cnt_L; + unsigned int *row_levels_U; + unsigned int *level_rows_U; + unsigned int *level_rows_cnt_U; + unsigned int *top; + + /* Graph coloring related storage for applying, e.g. ICHOLT and ILU(T), + * preconditioners */ + unsigned int *color; + unsigned int *to_color; + unsigned int *conflict; + unsigned int *conflict_cnt; + unsigned int *recolor; + unsigned int recolor_cnt; + unsigned int *color_top; + unsigned int *permuted_row_col; + unsigned int *permuted_row_col_inv; + real *y_p; + real *x_p; + + /* Jacobi iteration related storage for applying, e.g. ICHOLT and ILU(T), + * preconditioners */ + real *Dinv_L; + real *Dinv_U; + real *Dinv_b; + real *rp; + real *rp2; + int num_H; int *hbond_index; // for hydrogen bonds diff --git a/sPuReMD/src/neighbors.c b/sPuReMD/src/neighbors.c index c0351c857bc4709d729311922f3ecbef8192675a..b63a7fca09a8249c2ab2cab71ffc7741f9df9200 100644 --- a/sPuReMD/src/neighbors.c +++ b/sPuReMD/src/neighbors.c @@ -71,9 +71,11 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, t_start = Get_Time( ); // fprintf( stderr, "\n\tentered nbrs - " ); g = &( system->g ); - far_nbrs = (*lists) + FAR_NBRS; + far_nbrs = &(*lists)[FAR_NBRS]; + Bin_Atoms( system, workspace ); // fprintf( stderr, "atoms sorted - " ); + num_far = 0; /* first pick up a cell in the grid */ @@ -365,25 +367,29 @@ static inline int is_Near_Neighbor( reax_list *near_nbrs, int atom1, int atom2 ) return TRUE; } + void Generate_Neighbor_Lists( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, reax_list **lists, output_controls *out_control ) { - int i, j, k; - int x, y, z; - int *nbr_atoms; - int atom1, atom2, max; - int num_far; - int c, count; - int grid_top; - grid *g = &( system->g ); - reax_list *far_nbrs = (*lists) + FAR_NBRS; + int i, j, k; + int x, y, z; + int *nbr_atoms; + int atom1, atom2, max; + int num_far; + int c, count; + int grid_top; + grid *g; + reax_list *far_nbrs; //int hb_type1, hb_type2; - //reax_list *hbonds = (*lists) + HBOND; + //reax_list *hbonds = &(*lists)[HBOND]; //int top_hbond1, top_hbond2; get_far_neighbors_function Get_Far_Neighbors; far_neighbor_data new_nbrs[125]; + g = &( system->g ); + far_nbrs = &(*lists)[FAR_NBRS]; + // fprintf( stderr, "\n\tentered nbrs - " ); if ( control->ensemble == iNPT || control->ensemble == sNPT || control->ensemble == NPT ) @@ -632,7 +638,7 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, far_neighbor_data new_nbrs[125]; g = &( system->g ); - far_nbrs = (*lists) + FAR_NBRS; + far_nbrs = &(*lists)[FAR_NBRS]; // fprintf( stderr, "\n\tentered nbrs - " ); if ( control->ensemble == iNPT || diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c index 30c071251bdfcfb9a55f147dbeab16b50994132b..8c3ea3095354baddad4c763e69357de35132c5a6 100644 --- a/sPuReMD/src/print_utils.c +++ b/sPuReMD/src/print_utils.c @@ -42,8 +42,8 @@ void Print_Bond_Orders( reax_system *system, control_params *control, { int i, pj, pk; bond_order_data *bo_ij; - reax_list *bonds = (*lists) + BONDS; - reax_list *dBOs = (*lists) + DBO; + reax_list *bonds = &(*lists)[BONDS]; + reax_list *dBOs = &(*lists)[DBO]; dbond_data *dbo_k; /* bond orders */ @@ -473,7 +473,7 @@ void Print_Far_Neighbors( reax_system *system, control_params *control, int i, j, id_i, id_j; char fname[MAX_STR]; FILE *fout; - reax_list *far_nbrs = &((*lists)[FAR_NBRS]); + reax_list *far_nbrs = &(*lists)[FAR_NBRS]; snprintf( fname, MAX_STR, "%.*s.far_nbrs", MAX_STR - 10, control->sim_name ); fout = fopen( fname, "w" ); @@ -518,7 +518,7 @@ void Print_Far_Neighbors2( reax_system *system, control_params *control, int i, j, id_i, id_j; char fname[MAX_STR]; FILE *fout; - reax_list *far_nbrs = &((*lists)[FAR_NBRS]); + reax_list *far_nbrs = &(*lists)[FAR_NBRS]; snprintf( fname, MAX_STR, "%.*s.far_nbrs_lgj", MAX_STR - 14, control->sim_name ); fout = fopen( fname, "w" ); @@ -720,7 +720,7 @@ void Output_Results( reax_system *system, control_params *control, out_control->append_traj_frame( system, control, data, workspace, lists, out_control ); - //Write_PDB( system, *lists+BONDS, data, control, workspace, out_control ); + //Write_PDB( system, &(*lists)[BONDS], data, control, workspace, out_control ); //t_elapsed = Get_Timing_Info( t_start ); //fprintf(stdout, "append_frame took %.6f seconds\n", t_elapsed ); } diff --git a/sPuReMD/src/reset_utils.c b/sPuReMD/src/reset_utils.c index 0aed2a953efe94d904eccc3d17c8725ee214f96e..3edfe9072c0b635f7d3cfb97e555dd37d19d4a37 100644 --- a/sPuReMD/src/reset_utils.c +++ b/sPuReMD/src/reset_utils.c @@ -118,8 +118,11 @@ void Reset_Neighbor_Lists( reax_system *system, control_params *control, static_storage *workspace, reax_list **lists ) { int i, tmp; - reax_list *bonds = (*lists) + BONDS; - reax_list *hbonds = (*lists) + HBONDS; + reax_list *bonds; + reax_list *hbonds; + + bonds = &(*lists)[BONDS]; + hbonds = &(*lists)[HBONDS]; for ( i = 0; i < system->N; ++i ) { diff --git a/sPuReMD/src/single_body_interactions.c b/sPuReMD/src/single_body_interactions.c index b6edce1db6b7becfd311334fec83568625276261..4a9ff3102d5d86e33db27d71c19ed101a465c54f 100644 --- a/sPuReMD/src/single_body_interactions.c +++ b/sPuReMD/src/single_body_interactions.c @@ -20,6 +20,7 @@ ----------------------------------------------------------------------*/ #include "single_body_interactions.h" + #include "bond_orders.h" #include "list.h" #include "lookup.h" @@ -45,8 +46,9 @@ void LonePair_OverUnder_Coordination_Energy( reax_system *system, control_params two_body_parameters *twbp; bond_data *pbond; bond_order_data *bo_ij; - reax_list *bonds = (*lists) + BONDS; + reax_list *bonds; + bonds = &(*lists)[BONDS]; /* Initialize parameters */ p_lp1 = system->reaxprm.gp.l[15]; p_lp3 = system->reaxprm.gp.l[5]; diff --git a/sPuReMD/src/spuremd.h b/sPuReMD/src/spuremd.h index ed2b98d3cc21ab0a68d5195d4e33ffe422685dfd..3a78242bd768e2674c727ac882f74b69b2a5e336 100644 --- a/sPuReMD/src/spuremd.h +++ b/sPuReMD/src/spuremd.h @@ -29,6 +29,10 @@ #include "mytypes.h" +#ifdef __cplusplus +extern "C" { +#endif + void* setup( const char * const, const char * const, const char * const ); @@ -40,5 +44,9 @@ int cleanup( const void * const ); reax_atom* get_atoms( const void * const ); +#ifdef __cplusplus +} +#endif + #endif diff --git a/sPuReMD/src/three_body_interactions.c b/sPuReMD/src/three_body_interactions.c index 159e0cc17f354c94172c01729a17f473f941b51b..485be5fdf934f5e0d113a35569a44ed44a5fb14f 100644 --- a/sPuReMD/src/three_body_interactions.c +++ b/sPuReMD/src/three_body_interactions.c @@ -93,9 +93,9 @@ void Three_Body_Interactions( reax_system *system, control_params *control, real e_ang_total, e_pen_total, e_coa_total; total_bo = workspace->total_bond_order; - bonds = (*lists) + BONDS; + bonds = &(*lists)[BONDS]; bond_list = bonds->select.bond_list; - thb_intrs = (*lists) + THREE_BODIES; + thb_intrs = &(*lists)[THREE_BODIES]; thb_list = thb_intrs->select.three_body_list; /* global parameters used in these calculations */ p_pen2 = system->reaxprm.gp.l[19]; @@ -705,9 +705,9 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, #ifdef TEST_FORCES num_hb_intrs = 0; #endif - bonds = (*lists) + BONDS; + bonds = &(*lists)[BONDS]; bond_list = bonds->select.bond_list; - hbonds = (*lists) + HBONDS; + hbonds = &(*lists)[HBONDS]; hbond_list = hbonds->select.hbond_list; /* loops below discover the Hydrogen bonds between i-j-k triplets. diff --git a/sPuReMD/src/traj.c b/sPuReMD/src/traj.c index 7972d0868c47fc992f909d9be457c3d454743e44..03cc5d74e1a644fdc10d84e8c6b57d9331c1a644 100644 --- a/sPuReMD/src/traj.c +++ b/sPuReMD/src/traj.c @@ -178,8 +178,8 @@ int Append_Custom_Frame( reax_system *system, control_params *control, int frame_globals_len, num_bonds, num_thb_intrs; real P; char buffer[SIZE]; - reax_list *bonds = (*lists) + BONDS; - reax_list *thb_intrs = (*lists) + THREE_BODIES; + reax_list *bonds = &(*lists)[BONDS]; + reax_list *thb_intrs = &(*lists)[THREE_BODIES]; bond_data *bo_ij; /* IMPORTANT: This whole part will go to init_trj after finalized! */ diff --git a/sPuReMD/src/two_body_interactions.c b/sPuReMD/src/two_body_interactions.c index 664d7c0f3088d1b11beceb6e6a2647c49fb46cb6..d83db947b82936725eb37ad9ac9deb83029abbed 100644 --- a/sPuReMD/src/two_body_interactions.c +++ b/sPuReMD/src/two_body_interactions.c @@ -35,7 +35,7 @@ void Bond_Energy( reax_system *system, control_params *control, real gp3, gp4, gp7, gp10, gp37, ebond_total; reax_list *bonds; - bonds = (*lists) + BONDS; + bonds = &(*lists)[BONDS]; gp3 = system->reaxprm.gp.l[3]; gp4 = system->reaxprm.gp.l[4]; gp7 = system->reaxprm.gp.l[7]; @@ -177,7 +177,7 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, p_vdW1 = system->reaxprm.gp.l[28]; p_vdW1i = 1.0 / p_vdW1; - far_nbrs = (*lists) + FAR_NBRS; + far_nbrs = &(*lists)[FAR_NBRS]; e_vdW_total = 0.0; e_ele_total = 0.0; @@ -527,7 +527,7 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, reax_list *far_nbrs; real e_vdW_total, e_ele_total; - far_nbrs = (*lists) + FAR_NBRS; + far_nbrs = &(*lists)[FAR_NBRS]; steps = data->step - data->prev_steps; update_freq = out_control->energy_update_freq; update_energies = update_freq > 0 && steps % update_freq == 0; diff --git a/sPuReMD/tests/test_lin_alg.cpp b/sPuReMD/tests/test_lin_alg.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a475f8c3dacb419d63b95983e3078b3eb89022e7 --- /dev/null +++ b/sPuReMD/tests/test_lin_alg.cpp @@ -0,0 +1,92 @@ +#include <gtest/gtest.h> + +#include "mytypes.h" +#include "allocate.c" +#include "lin_alg.c" +#include "list.c" +#include "tool_box.c" +#include "vector.c" + +#define VEC_SIZE (100) + + +namespace +{ + class LinAlgTest : public ::testing::Test + { + protected: + real *a, *res; + sparse_matrix *sm; + + LinAlgTest( ) + { + if ( !Allocate_Matrix( &sm, VEC_SIZE, VEC_SIZE ) || + (a = (real *) malloc( VEC_SIZE * sizeof(real))) == NULL || + (res = (real *) malloc( VEC_SIZE * sizeof(real))) == NULL ) + { + throw new std::bad_alloc( ); + } + } + + virtual ~LinAlgTest( ) + { + if ( a != NULL ) + { + free( a ); + } + if ( res != NULL ) + { + free( res ); + } + if ( sm != NULL ) + { + Deallocate_Matrix( sm ); + } + + } + + virtual void SetUp( ) + { + for ( int i = 0; i < VEC_SIZE; ++i ) + { + a[i] = 1.0; + } + + // set up sparse matrix which is an identity matrix in our case + for ( int i = 0; i < sm->n + 1; i++ ) + { + sm->start[i] = i; + } + + for ( int i = 0; i < sm->start[sm->n]; i++ ) + { + sm->j[i] = i; + sm->val[i] = 1.0; + } + } + + + virtual void TearDown( ) + { + + } + }; + + + TEST_F(LinAlgTest, Sparse_MatVec) + { + Sparse_MatVec( sm, a, res ); + + for ( int i = 0; i < VEC_SIZE; ++i ) + { + ASSERT_EQ( res[i], a[i] ); + } + } +} + + +int main( int argc, char **argv ) +{ + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS( ); +} diff --git a/sPuReMD/tests/test_spuremd.cpp b/sPuReMD/tests/test_spuremd.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4ac83d54eec527454cafc3c529e4fc79afc299f3 --- /dev/null +++ b/sPuReMD/tests/test_spuremd.cpp @@ -0,0 +1,64 @@ +#include <gtest/gtest.h> + +#include "spuremd.h" + + +namespace +{ + class SPuReMDTest : public ::testing::Test + { + protected: + void *handle; + + SPuReMDTest ( ) + { + } + + virtual ~SPuReMDTest ( ) + { + } + + virtual void SetUp( ) + { + } + + virtual void TearDown( ) + { + if ( handle != NULL ) + { + cleanup( handle ); + } + } + }; + + + TEST_F(SPuReMDTest, water_6540) + { + handle = setup( "../data/benchmarks/water/water_6540.pdb", + "../data/benchmarks/water/ffield.water", + "../environ/param.gpu.water" ); + + ASSERT_EQ( simulate( handle ), SPUREMD_SUCCESS ); + + //TODO: check energy after evolving system, e.g., 100 steps + } + + + TEST_F(SPuReMDTest, silica_6000) + { + handle = setup( "../data/benchmarks/silica/silica_6000.pdb", + "../data/benchmarks/silica/ffield-bio", + "../environ/param.gpu.water" ); + + ASSERT_EQ( simulate( handle ), SPUREMD_SUCCESS ); + + //TODO: check energy after evolving system, e.g., 100 steps + } +} + + +int main( int argc, char **argv ) +{ + ::testing::InitGoogleTest( &argc, argv ); + return RUN_ALL_TESTS( ); +}