diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c index 0080f73c7c28ee6452d37aad5b92ac8f6749b91b..61c5c8a702cacfc14123761756e81ab8717227aa 100644 --- a/sPuReMD/src/charges.c +++ b/sPuReMD/src/charges.c @@ -205,7 +205,7 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, case SAI_PC: #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) data->timing.cm_solver_pre_comp += - Sparse_Approx_Inverse( Hptr, workspace->H_spar_patt, + sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full, &workspace->H_app_inv ); #else fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); @@ -259,13 +259,9 @@ static void Compute_Preconditioner_QEq( const reax_system * const system, // // if ( ones == NULL ) // { -// if ( ( ones = (real*) malloc( system->N * sizeof(real)) ) == NULL || -// ( x = (real*) malloc( system->N * sizeof(real)) ) == NULL || -// ( y = (real*) malloc( system->N * sizeof(real)) ) == NULL ) -// { -// fprintf( stderr, "Not enough space for preconditioner computation. Terminating...\n" ); -// exit( INSUFFICIENT_MEMORY ); -// } +// ones = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::ones" ); +// x = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::x" ); +// y = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::y" ); // // for ( i = 0; i < system->N; ++i ) // { @@ -568,7 +564,7 @@ static void Compute_Preconditioner_EE( const reax_system * const system, case SAI_PC: #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) data->timing.cm_solver_pre_comp += - Sparse_Approx_Inverse( Hptr, workspace->H_spar_patt, + sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full, &workspace->H_app_inv ); #else fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); @@ -686,7 +682,7 @@ static void Compute_Preconditioner_ACKS2( const reax_system * const system, case SAI_PC: #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) data->timing.cm_solver_pre_comp += - Sparse_Approx_Inverse( Hptr, workspace->H_spar_patt, + sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full, &workspace->H_app_inv ); #else fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); @@ -764,11 +760,8 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, case DIAG_PC: if ( workspace->Hdia_inv == NULL ) { - if ( ( workspace->Hdia_inv = (real *) calloc( Hptr->n, sizeof( real ) ) ) == NULL ) - { - fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + workspace->Hdia_inv = (real *) scalloc( Hptr->n, sizeof( real ), + "Setup_Preconditioner_QEq::workspace->Hdiv_inv" ); } break; @@ -861,8 +854,9 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, break; case SAI_PC: - Setup_Sparsity_Pattern( Hptr, control->cm_solver_pre_comp_sai_thres, - &workspace->H_spar_patt ); + setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt, + &workspace->H_spar_patt_full, &workspace->H_app_inv, + control->cm_solver_pre_comp_sai_thres ); break; default: @@ -916,11 +910,8 @@ static void Setup_Preconditioner_EE( const reax_system * const system, case DIAG_PC: if ( workspace->Hdia_inv == NULL ) { - if ( ( workspace->Hdia_inv = (real *) calloc( system->N_cm, sizeof( real ) ) ) == NULL ) - { - fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + workspace->Hdia_inv = (real *) scalloc( system->N_cm, sizeof( real ), + "Setup_Preconditioner_QEq::workspace->Hdiv_inv" ); } break; @@ -1013,8 +1004,9 @@ static void Setup_Preconditioner_EE( const reax_system * const system, break; case SAI_PC: - Setup_Sparsity_Pattern( Hptr, control->cm_solver_pre_comp_sai_thres, - &workspace->H_spar_patt ); + setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt, + &workspace->H_spar_patt_full, &workspace->H_app_inv, + control->cm_solver_pre_comp_sai_thres ); break; default: @@ -1071,11 +1063,8 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, case DIAG_PC: if ( workspace->Hdia_inv == NULL ) { - if ( ( workspace->Hdia_inv = (real *) calloc( Hptr->n, sizeof( real ) ) ) == NULL ) - { - fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + workspace->Hdia_inv = (real *) scalloc( Hptr->n, sizeof( real ), + "Setup_Preconditioner_QEq::workspace->Hdiv_inv" ); } break; @@ -1167,8 +1156,9 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, break; case SAI_PC: - Setup_Sparsity_Pattern( Hptr, control->cm_solver_pre_comp_sai_thres, - &workspace->H_spar_patt ); + setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt, + &workspace->H_spar_patt_full, &workspace->H_app_inv, + control->cm_solver_pre_comp_sai_thres ); break; default: @@ -1279,18 +1269,18 @@ static void QEq( reax_system * const system, control_params * const control, break; case CG_S: - iters = CG( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, + iters = CG( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; - iters += CG( workspace, control, workspace->H, workspace->b_t, control->cm_solver_q_err, + iters += CG( workspace, control, data, workspace->H, workspace->b_t, control->cm_solver_q_err, workspace->t[0], FALSE ) + 1; break; case SDM_S: - iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, + iters = SDM( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; - iters += SDM( workspace,control, workspace->H, workspace->b_t, control->cm_solver_q_err, + iters += SDM( workspace, control, data, workspace->H, workspace->b_t, control->cm_solver_q_err, workspace->t[0], FALSE ) + 1; break; @@ -1363,13 +1353,13 @@ static void EE( reax_system * const system, control_params * const control, break; case CG_S: - iters = CG( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, + iters = CG( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; break; case SDM_S: - iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, + iters = SDM( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; break; @@ -1437,13 +1427,13 @@ static void ACKS2( reax_system * const system, control_params * const control, break; case CG_S: - iters = CG( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, + iters = CG( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; break; case SDM_S: - iters = SDM( workspace, control, workspace->H, workspace->b_s, control->cm_solver_q_err, + iters = SDM( workspace, control, data, workspace->H, workspace->b_s, control->cm_solver_q_err, workspace->s[0], (control->cm_solver_pre_comp_refactor > 0 && (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0) ? TRUE : FALSE ) + 1; break; diff --git a/sPuReMD/src/ffield.c b/sPuReMD/src/ffield.c index 56d8a310439b94aaace6c11c2ab55d3f64bc09cd..7aab5c5235d7e6e221c91f9d90c66949ec861551 100644 --- a/sPuReMD/src/ffield.c +++ b/sPuReMD/src/ffield.c @@ -33,11 +33,14 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) int i, j, k, l, m, n, o, p, cnt; real val; - s = (char*) malloc( sizeof(char) * MAX_LINE ); - tmp = (char**) malloc( sizeof(char*) * MAX_TOKENS ); + s = (char*) smalloc( sizeof(char) * MAX_LINE, + "Read_Force_Field::s" ); + tmp = (char**) smalloc( sizeof(char*) * MAX_TOKENS, + "Read_Force_Field::tmp" ); for (i = 0; i < MAX_TOKENS; i++) { - tmp[i] = (char*) malloc( sizeof(char) * MAX_TOKEN_LEN ); + tmp[i] = (char*) smalloc( sizeof(char) * MAX_TOKEN_LEN, + "Read_Force_Field::tmp[i]" ); } /* reading first header comment */ @@ -57,7 +60,8 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } reax->gp.n_global = n; - reax->gp.l = (real*) malloc( sizeof(real) * n ); + reax->gp.l = (real*) smalloc( sizeof(real) * n, + "Read_Force_Field::reax->gp-l" ); /* see mytypes.h for mapping between l[i] and the lambdas used in ff */ for (i = 0; i < n; i++) @@ -85,48 +89,65 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) /* Allocating structures in reax_interaction */ reax->sbp = (single_body_parameters*) - calloc( reax->num_atom_types, sizeof(single_body_parameters) ); + scalloc( reax->num_atom_types, sizeof(single_body_parameters), + "Read_Force_Field::reax->sbp" ); reax->tbp = (two_body_parameters**) - calloc( reax->num_atom_types, sizeof(two_body_parameters*) ); + scalloc( reax->num_atom_types, sizeof(two_body_parameters*), + "Read_Force_Field::reax->tbp" ); reax->thbp = (three_body_header***) - calloc( reax->num_atom_types, sizeof(three_body_header**) ); + scalloc( reax->num_atom_types, sizeof(three_body_header**), + "Read_Force_Field::reax->thbp" ); reax->hbp = (hbond_parameters***) - calloc( reax->num_atom_types, sizeof(hbond_parameters**) ); + scalloc( reax->num_atom_types, sizeof(hbond_parameters**), + "Read_Force_Field::reax->hbp" ); reax->fbp = (four_body_header****) - calloc( reax->num_atom_types, sizeof(four_body_header***) ); + scalloc( reax->num_atom_types, sizeof(four_body_header***), + "Read_Force_Field::reax->fbp" ); tor_flag = (char****) - calloc( reax->num_atom_types, sizeof(char***) ); + scalloc( reax->num_atom_types, sizeof(char***), + "Read_Force_Field::tor_flag" ); for ( i = 0; i < reax->num_atom_types; i++ ) { reax->tbp[i] = (two_body_parameters*) - calloc( reax->num_atom_types, sizeof(two_body_parameters) ); + scalloc( reax->num_atom_types, sizeof(two_body_parameters), + "Read_Force_Field::reax->tbp[i]" ); reax->thbp[i] = (three_body_header**) - calloc( reax->num_atom_types, sizeof(three_body_header*) ); + scalloc( reax->num_atom_types, sizeof(three_body_header*), + "Read_Force_Field::reax->thbp[i]" ); reax->hbp[i] = (hbond_parameters**) - calloc( reax->num_atom_types, sizeof(hbond_parameters*) ); + scalloc( reax->num_atom_types, sizeof(hbond_parameters*), + "Read_Force_Field::reax->hbp[i]" ); reax->fbp[i] = (four_body_header***) - calloc( reax->num_atom_types, sizeof(four_body_header**) ); + scalloc( reax->num_atom_types, sizeof(four_body_header**), + "Read_Force_Field::reax->fbp[i]" ); tor_flag[i] = (char***) - calloc( reax->num_atom_types, sizeof(char**) ); + scalloc( reax->num_atom_types, sizeof(char**), + "Read_Force_Field::tor_flag[i]" ); for ( j = 0; j < reax->num_atom_types; j++ ) { reax->thbp[i][j] = (three_body_header*) - calloc( reax->num_atom_types, sizeof(three_body_header) ); + scalloc( reax->num_atom_types, sizeof(three_body_header), + "Read_Force_Field::reax->thbp[i][j]" ); reax->hbp[i][j] = (hbond_parameters*) - calloc( reax->num_atom_types, sizeof(hbond_parameters) ); + scalloc( reax->num_atom_types, sizeof(hbond_parameters), + "Read_Force_Field::reax->hbp[i][j]" ); reax->fbp[i][j] = (four_body_header**) - calloc( reax->num_atom_types, sizeof(four_body_header*) ); + scalloc( reax->num_atom_types, sizeof(four_body_header*), + "Read_Force_Field::reax->fbp[i][j]" ); tor_flag[i][j] = (char**) - calloc( reax->num_atom_types, sizeof(char*) ); + scalloc( reax->num_atom_types, sizeof(char*), + "Read_Force_Field::tor_flag[i][j]" ); for (k = 0; k < reax->num_atom_types; k++) { reax->fbp[i][j][k] = (four_body_header*) - calloc( reax->num_atom_types, sizeof(four_body_header) ); + scalloc( reax->num_atom_types, sizeof(four_body_header), + "Read_Force_Field::reax->fbp[i][j][k]" ); tor_flag[i][j][k] = (char*) - calloc( reax->num_atom_types, sizeof(char) ); + scalloc( reax->num_atom_types, sizeof(char), + "Read_Force_Field::tor_flag[i][j][k]" ); } } } diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c index 10e31fdc625428ac3fd6e3df1a521418cdaed18f..4ad3069f78e7fa0158f92c7872eaad6fdc81a139 100644 --- a/sPuReMD/src/forces.c +++ b/sPuReMD/src/forces.c @@ -476,11 +476,8 @@ static void Init_Charge_Matrix_Remaining_Entries( reax_system *system, break; case ACKS2_CM: - if ( (X_diag = (real*) calloc(system->N, sizeof(real))) == NULL ) - { - fprintf( stderr, "not enough memory for charge matrix. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + X_diag = (real*) scalloc( system->N, sizeof(real), + "Init_Charge_Matrix_Remaining_Entries::X_diag" ); H->start[system->N] = *Htop; H_sp->start[system->N] = *H_sp_top; diff --git a/sPuReMD/src/geo_tools.c b/sPuReMD/src/geo_tools.c index fcde21be8b93a72299b3b7134978d05cef42d83c..6967564503dcf3baeb3a1a2862be0f365e3dd7a0 100644 --- a/sPuReMD/src/geo_tools.c +++ b/sPuReMD/src/geo_tools.c @@ -247,12 +247,7 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control, } /* allocate memory for tokenizing pdb lines */ - if ( Allocate_Tokenizer_Space( &s, &s1, &tmp ) == FAILURE ) - { - fprintf( stderr, "Allocate_Tokenizer_Space: not enough memory!" ); - fprintf( stderr, "terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); - } + Allocate_Tokenizer_Space( &s, &s1, &tmp ); /* read box information */ if ( Read_Box_Info( system, pdb, PDB ) == FAILURE ) @@ -567,12 +562,16 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control, } /* allocate memory for tokenizing biograf file lines */ - line = (char*) malloc( sizeof(char) * MAX_LINE ); - backup = (char*) malloc( sizeof(char) * MAX_LINE ); - tokens = (char**) malloc( sizeof(char*) * MAX_TOKENS ); + line = (char*) smalloc( sizeof(char) * MAX_LINE, + "Read_BGF::line" ); + backup = (char*) smalloc( sizeof(char) * MAX_LINE, + "Read_BGF::backup" ); + tokens = (char**) smalloc( sizeof(char*) * MAX_TOKENS, + "Read_BGF::tokens" ); for ( i = 0; i < MAX_TOKENS; i++ ) { - tokens[i] = (char*) malloc( sizeof(char) * MAX_TOKEN_LEN ); + tokens[i] = (char*) smalloc( sizeof(char) * MAX_TOKEN_LEN, + "Read_BGF::tokens[i]" ); } /* count number of atoms in the pdb file */ @@ -599,20 +598,26 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control, fclose( bgf ); /* memory allocations for atoms, atom maps, bond restrictions */ - system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); + system->atoms = (reax_atom*) scalloc( system->N, sizeof(reax_atom), + "Read_BGF::system->atoms" ); - workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); + workspace->map_serials = (int*) scalloc( MAX_ATOM_ID, sizeof(int), + "Read_BGF::workspace->map_serials" ); for ( i = 0; i < MAX_ATOM_ID; ++i ) { workspace->map_serials[i] = -1; } - workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted_list = (int**) calloc( system->N, sizeof(int*) ); + workspace->orig_id = (int*) scalloc( system->N, sizeof(int), + "Read_BGF::workspace->orig_id" ); + workspace->restricted = (int*) scalloc( system->N, sizeof(int), + "Read_BGF::workspace->restricted" ); + workspace->restricted_list = (int**) scalloc( system->N, sizeof(int*), + "Read_BGF::workspace->restricted_list" ); for ( i = 0; i < system->N; ++i ) { - workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); + workspace->restricted_list[i] = (int*) scalloc( MAX_RESTRICT, sizeof(int), + "Read_BGF::workspace->restricted_list[i]" ); } /* start reading and processing bgf file */ diff --git a/sPuReMD/src/grid.c b/sPuReMD/src/grid.c index 3b7a45ce3c09cb7a7341a8dd78170a6fbf9da73d..9c04c3f287a5558fd82b230694aa5a6a9ef2e86a 100644 --- a/sPuReMD/src/grid.c +++ b/sPuReMD/src/grid.c @@ -76,33 +76,54 @@ void Allocate_Space_for_Grid( reax_system *system ) * (2 * g->spread[1] + 1) * (2 * g->spread[2] + 1) + 3; /* allocate space for the new grid */ - g->atoms = (int****) calloc( g->ncell[0], sizeof( int*** )); - g->top = (int***) calloc( g->ncell[0], sizeof( int** )); - g->mark = (int***) calloc( g->ncell[0], sizeof( int** )); - g->start = (int***) calloc( g->ncell[0], sizeof( int** )); - g->end = (int***) calloc( g->ncell[0], sizeof( int** )); - g->nbrs = (ivec****) calloc( g->ncell[0], sizeof( ivec*** )); - g->nbrs_cp = (rvec****) calloc( g->ncell[0], sizeof( rvec*** )); + g->atoms = (int****) scalloc( g->ncell[0], sizeof( int*** ), + "Allocate_Space_for_Grid::g->atoms" ); + g->top = (int***) scalloc( g->ncell[0], sizeof( int** ), + "Allocate_Space_for_Grid::g->top" ); + g->mark = (int***) scalloc( g->ncell[0], sizeof( int** ), + "Allocate_Space_for_Grid::g->mark" ); + g->start = (int***) scalloc( g->ncell[0], sizeof( int** ), + "Allocate_Space_for_Grid::g->start" ); + g->end = (int***) scalloc( g->ncell[0], sizeof( int** ), + "Allocate_Space_for_Grid::g->end" ); + g->nbrs = (ivec****) scalloc( g->ncell[0], sizeof( ivec*** ), + "Allocate_Space_for_Grid::g->nbrs" ); + g->nbrs_cp = (rvec****) scalloc( g->ncell[0], sizeof( rvec*** ), + "Allocate_Space_for_Grid::g->nbrs_cp" ); for ( i = 0; i < g->ncell[0]; i++ ) { - g->atoms[i] = (int***) calloc( g->ncell[1], sizeof( int** )); - g->top [i] = (int**) calloc( g->ncell[1], sizeof( int* )); - g->mark[i] = (int**) calloc( g->ncell[1], sizeof( int* )); - g->start[i] = (int**) calloc( g->ncell[1], sizeof( int* )); - g->end[i] = (int**) calloc( g->ncell[1], sizeof( int* )); - g->nbrs[i] = (ivec***) calloc( g->ncell[1], sizeof( ivec** )); - g->nbrs_cp[i] = (rvec***) calloc( g->ncell[1], sizeof( rvec** )); + g->atoms[i] = (int***) scalloc( g->ncell[1], sizeof( int** ), + "Allocate_Space_for_Grid::g->atoms[i]" ); + g->top [i] = (int**) scalloc( g->ncell[1], sizeof( int* ), + "Allocate_Space_for_Grid::g->top[i]" ); + g->mark[i] = (int**) scalloc( g->ncell[1], sizeof( int* ), + "Allocate_Space_for_Grid::g->mark[i]" ); + g->start[i] = (int**) scalloc( g->ncell[1], sizeof( int* ), + "Allocate_Space_for_Grid::g->start[i]" ); + g->end[i] = (int**) scalloc( g->ncell[1], sizeof( int* ), + "Allocate_Space_for_Grid::g->end[i]" ); + g->nbrs[i] = (ivec***) scalloc( g->ncell[1], sizeof( ivec** ), + "Allocate_Space_for_Grid::g->nbrs[i]" ); + g->nbrs_cp[i] = (rvec***) scalloc( g->ncell[1], sizeof( rvec** ), + "Allocate_Space_for_Grid::g->nbrs_cp[i]" ); for ( j = 0; j < g->ncell[1]; j++ ) { - g->atoms[i][j] = (int**) calloc( g->ncell[2], sizeof( int* )); - g->top[i][j] = (int*) calloc( g->ncell[2], sizeof( int )); - g->mark[i][j] = (int*) calloc( g->ncell[2], sizeof( int )); - g->start[i][j] = (int*) calloc( g->ncell[2], sizeof( int )); - g->end[i][j] = (int*) calloc( g->ncell[2], sizeof( int )); - g->nbrs[i][j] = (ivec**) calloc( g->ncell[2], sizeof( ivec* )); - g->nbrs_cp[i][j] = (rvec**) calloc( g->ncell[2], sizeof( rvec* )); + g->atoms[i][j] = (int**) scalloc( g->ncell[2], sizeof( int* ), + "Allocate_Space_for_Grid::g->atoms[i][j]" ); + g->top[i][j] = (int*) scalloc( g->ncell[2], sizeof( int ), + "Allocate_Space_for_Grid::g->top[i][j]" ); + g->mark[i][j] = (int*) scalloc( g->ncell[2], sizeof( int ), + "Allocate_Space_for_Grid::g->mark[i][j]" ); + g->start[i][j] = (int*) scalloc( g->ncell[2], sizeof( int ), + "Allocate_Space_for_Grid::g->start[i][j]" ); + g->end[i][j] = (int*) scalloc( g->ncell[2], sizeof( int ), + "Allocate_Space_for_Grid::g->end[i][j]" ); + g->nbrs[i][j] = (ivec**) scalloc( g->ncell[2], sizeof( ivec* ), + "Allocate_Space_for_Grid::g->nbrs[i][j]" ); + g->nbrs_cp[i][j] = (rvec**) scalloc( g->ncell[2], sizeof( rvec* ), + "Allocate_Space_for_Grid::g->nbrs_cp[i][j]" ); for ( k = 0; k < g->ncell[2]; k++ ) { @@ -110,8 +131,10 @@ void Allocate_Space_for_Grid( reax_system *system ) g->mark[i][j][k] = 0; g->start[i][j][k] = 0; g->end[i][j][k] = 0; - g->nbrs[i][j][k] = (ivec*) malloc( g->max_nbrs * sizeof( ivec ) ); - g->nbrs_cp[i][j][k] = (rvec*) malloc( g->max_nbrs * sizeof( rvec ) ); + g->nbrs[i][j][k] = (ivec*) smalloc( g->max_nbrs * sizeof( ivec ), + "Allocate_Space_for_Grid::g->nbrs[i][j]][k]" ); + g->nbrs_cp[i][j][k] = (rvec*) smalloc( g->max_nbrs * sizeof( rvec ), + "Allocate_Space_for_Grid::g->nbrs_cp[i][j]][k]" ); for ( l = 0; l < g->max_nbrs; ++l ) { @@ -135,7 +158,8 @@ void Allocate_Space_for_Grid( reax_system *system ) { for ( k = 0; k < g->ncell[2]; k++ ) { - g->atoms[i][j][k] = (int*) malloc( g->max_atoms * sizeof( int ) ); + g->atoms[i][j][k] = (int*) smalloc( g->max_atoms * sizeof( int ), + "Allocate_Space_for_Grid::g->atoms[i][j]][k]" ); for ( l = 0; l < g->max_atoms; ++l ) { @@ -601,22 +625,31 @@ void Cluster_Atoms( reax_system *system, static_storage *workspace, num_H = 0; g = &( system->g ); - new_atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); - orig_id = (int *) calloc( system->N, sizeof( int ) ); - f_old = (rvec*) calloc( system->N, sizeof(rvec) ); - - s = (real**) calloc( 3, sizeof( real* ) ); - t = (real**) calloc( 3, sizeof( real* ) ); + new_atoms = (reax_atom*) scalloc( system->N, sizeof(reax_atom), + "Cluster_Atoms::new_atoms" ); + orig_id = (int *) scalloc( system->N, sizeof( int ), + "Cluster_Atoms::orig_id" ); + f_old = (rvec*) scalloc( system->N, sizeof(rvec), + "Cluster_Atoms::f_old" ); + + s = (real**) scalloc( 3, sizeof( real* ), + "Cluster_Atoms::s" ); + t = (real**) scalloc( 3, sizeof( real* ), + "Cluster_Atoms::t" ); for ( i = 0; i < 3; ++i ) { - s[i] = (real *) calloc( system->N, sizeof( real ) ); - t[i] = (real *) calloc( system->N, sizeof( real ) ); + s[i] = (real *) scalloc( system->N, sizeof( real ), + "Cluster_Atoms::s[i]" ); + t[i] = (real *) scalloc( system->N, sizeof( real ), + "Cluster_Atoms::t[i]" ); } - v = (real**) calloc( control->cm_solver_restart + 1, sizeof( real* ) ); + v = (real**) scalloc( control->cm_solver_restart + 1, sizeof( real* ), + "Cluster_Atoms::v" ); for ( i = 0; i < control->cm_solver_restart + 1; ++i ) { - v[i] = (real *) calloc( system->N, sizeof( real ) ); + v[i] = (real *) scalloc( system->N, sizeof( real ), + "Cluster_Atoms::v[i]" ); } top = 0; diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index c572882539ab3a75211d5d93a3cb274126f9e3be..93fed761a7f4233da33ee03fefce7e3320947c14 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -279,26 +279,43 @@ void Init_Workspace( reax_system *system, control_params *control, int i; /* Allocate space for hydrogen bond list */ - workspace->hbond_index = (int *) malloc( system->N * sizeof( int ) ); + workspace->hbond_index = (int *) smalloc( system->N * sizeof( int ), + "Init_Workspace::workspace->hbond_index" ); /* bond order related storage */ - workspace->total_bond_order = (real *) malloc( system->N * sizeof( real ) ); - workspace->Deltap = (real *) malloc( system->N * sizeof( real ) ); - workspace->Deltap_boc = (real *) malloc( system->N * sizeof( real ) ); - workspace->dDeltap_self = (rvec *) malloc( system->N * sizeof( rvec ) ); - - workspace->Delta = (real *) malloc( system->N * sizeof( real ) ); - workspace->Delta_lp = (real *) malloc( system->N * sizeof( real ) ); - workspace->Delta_lp_temp = (real *) malloc( system->N * sizeof( real ) ); - workspace->dDelta_lp = (real *) malloc( system->N * sizeof( real ) ); - workspace->dDelta_lp_temp = (real *) malloc( system->N * sizeof( real ) ); - workspace->Delta_e = (real *) malloc( system->N * sizeof( real ) ); - workspace->Delta_boc = (real *) malloc( system->N * sizeof( real ) ); - workspace->nlp = (real *) malloc( system->N * sizeof( real ) ); - workspace->nlp_temp = (real *) malloc( system->N * sizeof( real ) ); - workspace->Clp = (real *) malloc( system->N * sizeof( real ) ); - workspace->CdDelta = (real *) malloc( system->N * sizeof( real ) ); - workspace->vlpex = (real *) malloc( system->N * sizeof( real ) ); + workspace->total_bond_order = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->bond_order" ); + workspace->Deltap = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->Deltap" ); + workspace->Deltap_boc = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->Deltap_boc" ); + workspace->dDeltap_self = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->dDeltap_self" ); + + workspace->Delta = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->Delta" ); + workspace->Delta_lp = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->Delta_lp" ); + workspace->Delta_lp_temp = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->Delta_lp_temp" ); + workspace->dDelta_lp = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->dDelta_lp" ); + workspace->dDelta_lp_temp = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->dDelta_lp_temp" ); + workspace->Delta_e = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->Delta_e" ); + workspace->Delta_boc = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->Delta_boc" ); + workspace->nlp = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->nlp" ); + workspace->nlp_temp = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->nlp_temp" ); + workspace->Clp = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->Clp" ); + workspace->CdDelta = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->CdDelta" ); + workspace->vlpex = (real *) smalloc( system->N * sizeof( real ), + "Init_Workspace::workspace->vlpex" ); /* charge method storage */ switch ( control->charge_method ) @@ -328,22 +345,33 @@ void Init_Workspace( reax_system *system, control_params *control, if ( control->cm_solver_pre_comp_type == ICHOLT_PC || control->cm_solver_pre_comp_type == ILUT_PAR_PC ) { - workspace->droptol = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->droptol = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->droptol" ); } //TODO: check if unused - //workspace->w = (real *) calloc( cm_lin_sys_size, sizeof( real ) ); + //workspace->w = (real *) scalloc( cm_lin_sys_size, sizeof( real ), + //"Init_Workspace::workspace->droptol" ); //TODO: check if unused - workspace->b = (real *) calloc( system->N_cm * 2, sizeof( real ) ); - workspace->b_s = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->b_t = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->b_prc = (real *) calloc( system->N_cm * 2, sizeof( real ) ); - workspace->b_prm = (real *) calloc( system->N_cm * 2, sizeof( real ) ); - workspace->s = (real**) calloc( 5, sizeof( real* ) ); - workspace->t = (real**) calloc( 5, sizeof( real* ) ); + workspace->b = (real *) scalloc( system->N_cm * 2, sizeof( real ), + "Init_Workspace::workspace->b" ); + workspace->b_s = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->b_s" ); + workspace->b_t = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->b_t" ); + workspace->b_prc = (real *) scalloc( system->N_cm * 2, sizeof( real ), + "Init_Workspace::workspace->b_prc" ); + workspace->b_prm = (real *) scalloc( system->N_cm * 2, sizeof( real ), + "Init_Workspace::workspace->b_prm" ); + workspace->s = (real**) scalloc( 5, sizeof( real* ), + "Init_Workspace::workspace->s" ); + workspace->t = (real**) scalloc( 5, sizeof( real* ), + "Init_Workspace::workspace->t" ); for ( i = 0; i < 5; ++i ) { - workspace->s[i] = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->t[i] = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->s[i] = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->s[i]" ); + workspace->t[i] = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->t[i]" ); } switch ( control->charge_method ) @@ -405,40 +433,62 @@ void Init_Workspace( reax_system *system, control_params *control, /* GMRES storage */ case GMRES_S: case GMRES_H_S: - workspace->y = (real *) calloc( control->cm_solver_restart + 1, sizeof( real ) ); - workspace->z = (real *) calloc( control->cm_solver_restart + 1, sizeof( real ) ); - workspace->g = (real *) calloc( control->cm_solver_restart + 1, sizeof( real ) ); - workspace->h = (real **) calloc( control->cm_solver_restart + 1, sizeof( real*) ); - workspace->hs = (real *) calloc( control->cm_solver_restart + 1, sizeof( real ) ); - workspace->hc = (real *) calloc( control->cm_solver_restart + 1, sizeof( real ) ); - workspace->rn = (real **) calloc( control->cm_solver_restart + 1, sizeof( real*) ); - workspace->v = (real **) calloc( control->cm_solver_restart + 1, sizeof( real*) ); + workspace->y = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + "Init_Workspace::workspace->y" ); + workspace->z = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + "Init_Workspace::workspace->z" ); + workspace->g = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + "Init_Workspace::workspace->g" ); + workspace->h = (real **) scalloc( control->cm_solver_restart + 1, sizeof( real*), + "Init_Workspace::workspace->h" ); + workspace->hs = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + "Init_Workspace::workspace->hs" ); + workspace->hc = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + "Init_Workspace::workspace->hc" ); + workspace->rn = (real **) scalloc( control->cm_solver_restart + 1, sizeof( real*), + "Init_Workspace::workspace->rn" ); + workspace->v = (real **) scalloc( control->cm_solver_restart + 1, sizeof( real*), + "Init_Workspace::workspace->v" ); for ( i = 0; i < control->cm_solver_restart + 1; ++i ) { - workspace->h[i] = (real *) calloc( control->cm_solver_restart + 1, sizeof( real ) ); - workspace->rn[i] = (real *) calloc( system->N_cm * 2, sizeof( real ) ); - workspace->v[i] = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->h[i] = (real *) scalloc( control->cm_solver_restart + 1, sizeof( real ), + "Init_Workspace::workspace->h[i]" ); + workspace->rn[i] = (real *) scalloc( system->N_cm * 2, sizeof( real ), + "Init_Workspace::workspace->rn[i]" ); + workspace->v[i] = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->v[i]" ); } - workspace->r = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->d = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->q = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->p = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->r = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->r" ); + workspace->d = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->d" ); + workspace->q = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->q" ); + workspace->p = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->p" ); break; /* CG storage */ case CG_S: - workspace->r = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->d = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->q = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->p = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->r = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->r" ); + workspace->d = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->d" ); + workspace->q = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->q" ); + workspace->p = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->p" ); break; case SDM_S: - workspace->r = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->d = (real *) calloc( system->N_cm, sizeof( real ) ); - workspace->q = (real *) calloc( system->N_cm, sizeof( real ) ); + workspace->r = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->r" ); + workspace->d = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->d" ); + workspace->q = (real *) scalloc( system->N_cm, sizeof( real ), + "Init_Workspace::workspace->q" ); break; default: @@ -448,19 +498,25 @@ void Init_Workspace( reax_system *system, control_params *control, } /* integrator storage */ - workspace->a = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_old = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->v_const = (rvec *) malloc( system->N * sizeof( rvec ) ); + workspace->a = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->a" ); + workspace->f_old = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_old" ); + workspace->v_const = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->v_const" ); #ifdef _OPENMP - workspace->f_local = (rvec *) malloc( control->num_threads * system->N * sizeof( rvec ) ); + workspace->f_local = (rvec *) smalloc( control->num_threads * system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_local" ); #endif /* storage for analysis */ if ( control->molec_anal || control->diffusion_coef ) { - workspace->mark = (int *) calloc( system->N, sizeof(int) ); - workspace->old_mark = (int *) calloc( system->N, sizeof(int) ); + workspace->mark = (int *) scalloc( system->N, sizeof(int), + "Init_Workspace::workspace->mark" ); + workspace->old_mark = (int *) scalloc( system->N, sizeof(int), + "Init_Workspace::workspace->old_mark" ); } else { @@ -469,7 +525,8 @@ void Init_Workspace( reax_system *system, control_params *control, if ( control->diffusion_coef ) { - workspace->x_old = (rvec *) calloc( system->N, sizeof( rvec ) ); + workspace->x_old = (rvec *) scalloc( system->N, sizeof( rvec ), + "Init_Workspace::workspace->x_old" ); } else { @@ -477,20 +534,34 @@ void Init_Workspace( reax_system *system, control_params *control, } #ifdef TEST_FORCES - workspace->dDelta = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_ele = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_vdw = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_bo = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_be = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_lp = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_ov = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_un = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_ang = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_coa = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_pen = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_hb = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_tor = (rvec *) malloc( system->N * sizeof( rvec ) ); - workspace->f_con = (rvec *) malloc( system->N * sizeof( rvec ) ); + workspace->dDelta = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->dDelta" ); + workspace->f_ele = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_ele" ); + workspace->f_vdw = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_vdw" ); + workspace->f_bo = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_bo" ); + workspace->f_be = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_be" ); + workspace->f_lp = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_lp" ); + workspace->f_ov = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_ov" ); + workspace->f_un = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_un" ); + workspace->f_ang = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_ang" ); + workspace->f_coa = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_coa" ); + workspace->f_pen = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_pen" ); + workspace->f_hb = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_hb" ); + workspace->f_tor = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_tor" ); + workspace->f_con = (rvec *) smalloc( system->N * sizeof( rvec ), + "Init_Workspace::workspace->f_con" ); #endif workspace->realloc.num_far = -1; @@ -528,8 +599,10 @@ void Init_Lists( reax_system *system, control_params *control, Generate_Neighbor_Lists(system, control, data, workspace, lists, out_control); Htop = 0; - hb_top = (int*) calloc( system->N, sizeof(int) ); - bond_top = (int*) calloc( system->N, sizeof(int) ); + hb_top = (int*) scalloc( system->N, sizeof(int), + "Init_Lists::hb_top" ); + bond_top = (int*) scalloc( system->N, sizeof(int), + "Init_Lists::bond_top" ); num_3body = 0; Estimate_Storage_Sizes( system, control, lists, &Htop, hb_top, bond_top, &num_3body ); @@ -1008,7 +1081,9 @@ void Finalize_Workspace( reax_system *system, control_params *control, } if ( control->cm_solver_pre_comp_type == SAI_PC ) { + Deallocate_Matrix( workspace->H_full ); Deallocate_Matrix( workspace->H_spar_patt ); + Deallocate_Matrix( workspace->H_spar_patt_full ); Deallocate_Matrix( workspace->H_app_inv ); } diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index c6f56ed426d5095727c0833d5c2662a49fb568e3..110c43a3f747ac15e20bb0dc7958a9354963dbfd 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -148,11 +148,8 @@ void Sort_Matrix_Rows( sparse_matrix * const A ) // #pragma omp parallel default(none) private(i, j, si, ei, temp) shared(stderr) #endif { - if ( ( temp = (sparse_matrix_entry *) malloc( A->n * sizeof(sparse_matrix_entry)) ) == NULL ) - { - fprintf( stderr, "Not enough space for matrix row sort. Terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); - } + temp = (sparse_matrix_entry *) smalloc( A->n * sizeof(sparse_matrix_entry), + "Sort_Matrix_Rows::temp" ); /* sort each row of A using column indices */ #ifdef _OPENMP @@ -259,8 +256,9 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A, * Assumptions: * A has non-zero diagonals * Each row of A has at least one non-zero (i.e., no rows with all zeros) */ -void Setup_Sparsity_Pattern( const sparse_matrix * const A, - const real filter, sparse_matrix ** A_spar_patt ) +void setup_sparse_approx_inverse( const sparse_matrix * const A, sparse_matrix ** A_full, + sparse_matrix ** A_spar_patt, sparse_matrix **A_spar_patt_full, + sparse_matrix ** A_app_inv, const real filter ) { int i, pj, size; real min, max, threshold, val; @@ -286,7 +284,7 @@ void Setup_Sparsity_Pattern( const sparse_matrix * const A, } } - // find min and max element of the matrix + /* find min and max elements of matrix */ for ( i = 0; i < A->n; ++i ) { for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) @@ -312,25 +310,8 @@ void Setup_Sparsity_Pattern( const sparse_matrix * const A, } threshold = min + (max - min) * (1.0 - filter); - // calculate the nnz of the sparsity pattern - // for ( size = 0, i = 0; i < A->n; ++i ) - // { - // for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) - // { - // if ( threshold <= A->val[pj] ) - // size++; - // } - // } - // - // if ( Allocate_Matrix( A_spar_patt, A->n, size ) == NULL ) - // { - // fprintf( stderr, "[SAI] Not enough memory for preconditioning matrices. terminating.\n" ); - // exit( INSUFFICIENT_MEMORY ); - // } - //(*A_spar_patt)->start[(*A_spar_patt)->n] = size; - - // fill the sparsity pattern + /* fill sparsity pattern */ for ( size = 0, i = 0; i < A->n; ++i ) { (*A_spar_patt)->start[i] = size; @@ -346,6 +327,17 @@ void Setup_Sparsity_Pattern( const sparse_matrix * const A, } } (*A_spar_patt)->start[A->n] = size; + + compute_full_sparse_matrix( A, A_full ); + compute_full_sparse_matrix( *A_spar_patt, A_spar_patt_full ); + + /* A_app_inv has the same sparsity pattern + * as A_spar_patt_full (omit non-zero values) */ + if ( Allocate_Matrix( A_app_inv, (*A_spar_patt_full)->n, (*A_spar_patt_full)->m ) == FAILURE ) + { + fprintf( stderr, "not enough memory for approximate inverse matrix. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } } void Calculate_Droptol( const sparse_matrix * const A, @@ -371,11 +363,8 @@ void Calculate_Droptol( const sparse_matrix * const A, * overhead per Sparse_MatVec call*/ if ( droptol_local == NULL ) { - if ( (droptol_local = (real*) malloc( omp_get_num_threads() * A->n * sizeof(real))) == NULL ) - { - fprintf( stderr, "Not enough space for droptol. Terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); - } + droptol_local = (real*) smalloc( omp_get_num_threads() * A->n * sizeof(real), + "Calculate_Droptol::droptol_local" ); } } @@ -569,15 +558,16 @@ real SuperLU_Factorize( const sparse_matrix * const A, { SUPERLU_ABORT("Malloc fails for part_super__h[]."); } - if ( ( (a = (real*) malloc( (2 * A->start[A->n] - A->n) * sizeof(real))) == NULL ) - || ( (asub = (int_t*) malloc( (2 * A->start[A->n] - A->n) * sizeof(int_t))) == NULL ) - || ( (xa = (int_t*) malloc( (A->n + 1) * sizeof(int_t))) == NULL ) - || ( (Ltop = (unsigned int*) malloc( (A->n + 1) * sizeof(unsigned int))) == NULL ) - || ( (Utop = (unsigned int*) malloc( (A->n + 1) * sizeof(unsigned int))) == NULL ) ) - { - fprintf( stderr, "Not enough space for SuperLU factorization. Terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); - } + a = (real*) smalloc( (2 * A->start[A->n] - A->n) * sizeof(real), + "SuperLU_Factorize::a" ); + asub = (int_t*) smalloc( (2 * A->start[A->n] - A->n) * sizeof(int_t), + "SuperLU_Factorize::asub" ); + xa = (int_t*) smalloc( (A->n + 1) * sizeof(int_t), + "SuperLU_Factorize::xa" ); + Ltop = (unsigned int*) smalloc( (A->n + 1) * sizeof(unsigned int), + "SuperLU_Factorize::Ltop" ); + Utop = (unsigned int*) smalloc( (A->n + 1) * sizeof(unsigned int), + "SuperLU_Factorize::Utop" ); if ( Allocate_Matrix( &A_t, A->n, A->m ) == FAILURE ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); @@ -824,13 +814,12 @@ real ICHOLT( const sparse_matrix * const A, const real * const droptol, start = Get_Time( ); - if ( ( Utop = (unsigned int*) malloc((A->n + 1) * sizeof(unsigned int)) ) == NULL || - ( tmp_j = (int*) malloc(A->n * sizeof(int)) ) == NULL || - ( tmp_val = (real*) malloc(A->n * sizeof(real)) ) == NULL ) - { - fprintf( stderr, "[ICHOLT] Not enough memory for preconditioning matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + Utop = (unsigned int*) smalloc( (A->n + 1) * sizeof(unsigned int), + "ICHOLT::Utop" ); + tmp_j = (int*) smalloc( A->n * sizeof(int), + "ICHOLT::Utop" ); + tmp_val = (real*) smalloc( A->n * sizeof(real), + "ICHOLT::Utop" ); // clear variables Ltop = 0; @@ -976,10 +965,10 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, start = Get_Time( ); - if ( Allocate_Matrix( &DAD, A->n, A->m ) == FAILURE || - ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || - ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL || - ( Utop = (int*) malloc((A->n + 1) * sizeof(int)) ) == NULL ) + D = (real*) smalloc( A->n * sizeof(real), "ICHOL_PAR::D" ); + D_inv = (real*) smalloc( A->n * sizeof(real), "ICHOL_PAR::D_inv" ); + Utop = (int*) smalloc( (A->n + 1) * sizeof(int), "ICHOL_PAR::Utop" ); + if ( Allocate_Matrix( &DAD, A->n, A->m ) == FAILURE ) { fprintf( stderr, "not enough memory for ICHOL_PAR preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -1154,9 +1143,9 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, start = Get_Time( ); - if ( Allocate_Matrix( &DAD, A->n, A->m ) == FAILURE || - ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || - ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL ) + D = (real*) smalloc( A->n * sizeof(real), "ILU_PAR::D" ); + D_inv = (real*) smalloc( A->n * sizeof(real), "ILU_PAR::D_inv" ); + if ( Allocate_Matrix( &DAD, A->n, A->m ) == FAILURE ) { fprintf( stderr, "[ILU_PAR] Not enough memory for preconditioning matrices. Terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -1377,12 +1366,8 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, exit( INSUFFICIENT_MEMORY ); } - if ( ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || - ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL ) - { - fprintf( stderr, "not enough memory for ILUT_PAR preconditioning matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + D = (real*) smalloc( A->n * sizeof(real), "ILUT_PAR::D" ); + D_inv = (real*) smalloc( A->n * sizeof(real), "ILUT_PAR::D_inv" ); #ifdef _OPENMP #pragma omp parallel for schedule(static) \ @@ -1616,7 +1601,13 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) -real Sparse_Approx_Inverse( const sparse_matrix * const A, +/* Compute M^{1} \approx A which minimizes + * + * A: symmetric, sparse matrix, stored in full CSR format + * A_spar_patt: sparse matrix used as template sparsity pattern + * for approximating the inverse, stored in full CSR format + * A_app_inv: approximate inverse to A, stored in full CSR format (result) */ +real sparse_approx_inverse( const sparse_matrix * const A, const sparse_matrix * const A_spar_patt, sparse_matrix ** A_app_inv ) { @@ -1626,28 +1617,17 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, int *pos_x, *pos_y; real start; real *e_j, *dense_matrix; - sparse_matrix *A_full = NULL, *A_spar_patt_full = NULL; char *X, *Y; start = Get_Time( ); - // Get A and A_spar_patt full matrices - compute_full_sparse_matrix( A, &A_full ); - compute_full_sparse_matrix( A_spar_patt, &A_spar_patt_full ); - - // A_app_inv will be the same as A_spar_patt_full except the val array - if ( Allocate_Matrix( A_app_inv, A_spar_patt_full->n, A_spar_patt_full->m ) == FAILURE ) - { - fprintf( stderr, "not enough memory for approximate inverse matrix. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } - - (*A_app_inv)->start[(*A_app_inv)->n] = A_spar_patt_full->start[A_spar_patt_full->n]; + (*A_app_inv)->start[(*A_app_inv)->n] = A_spar_patt->start[A_spar_patt->n]; #ifdef _OPENMP #pragma omp parallel default(none) \ - shared(A_full, A_spar_patt_full) private(i, k, pj, j_temp, identity_pos, \ - N, M, d_i, d_j, m, n, nrhs, lda, ldb, info, X, Y, pos_x, pos_y, e_j, dense_matrix) + private(i, k, pj, j_temp, identity_pos, N, M, d_i, d_j, m, n, \ + nrhs, lda, ldb, info, X, Y, pos_x, pos_y, e_j, dense_matrix) \ + shared(A_app_inv, stderr) #endif { X = (char *) smalloc( sizeof(char) * A->n, "Sparse_Approx_Inverse::X" ); @@ -1666,16 +1646,16 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, #ifdef _OPENMP #pragma omp for schedule(static) #endif - for ( i = 0; i < A_spar_patt_full->n; ++i ) + for ( i = 0; i < A_spar_patt->n; ++i ) { N = 0; M = 0; // find column indices of nonzeros (which will be the columns indices of the dense matrix) - for ( pj = A_spar_patt_full->start[i]; pj < A_spar_patt_full->start[i + 1]; ++pj ) + for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->start[i + 1]; ++pj ) { - j_temp = A_spar_patt_full->j[pj]; + j_temp = A_spar_patt->j[pj]; Y[j_temp] = 1; pos_y[j_temp] = N; @@ -1683,16 +1663,16 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, // for each of those indices // search through the row of full A of that index - for ( k = A_full->start[j_temp]; k < A_full->start[j_temp + 1]; ++k ) + for ( k = A->start[j_temp]; k < A->start[j_temp + 1]; ++k ) { // and accumulate the nonzero column indices to serve as the row indices of the dense matrix - X[A_full->j[k]] = 1; + X[A->j[k]] = 1; } } // enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix identity_pos = M; - for ( k = 0; k < A_full->n; k++) + for ( k = 0; k < A->n; k++) { if ( X[k] != 0 ) { @@ -1718,12 +1698,12 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, dense_matrix[d_i * N + d_j] = 0.0; } // change the value if any of the column indices is seen - for ( d_j = A_full->start[pos_x[d_i]]; - d_j < A_full->start[pos_x[d_i + 1]]; ++d_j ) + for ( d_j = A->start[pos_x[d_i]]; + d_j < A->start[pos_x[d_i + 1]]; ++d_j ) { - if ( Y[A_full->j[d_j]] == 1 ) + if ( Y[A->j[d_j]] == 1 ) { - dense_matrix[d_i * N + pos_y[A_full->j[d_j]]] = A_full->val[d_j]; + dense_matrix[d_i * N + pos_y[A->j[d_j]]] = A->val[d_j]; } } @@ -1763,11 +1743,11 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, // print_matrix( "Least squares solution", n, nrhs, b, ldb ); // accumulate the resulting vector to build A_app_inv - (*A_app_inv)->start[i] = A_spar_patt_full->start[i]; - for ( k = A_spar_patt_full->start[i]; k < A_spar_patt_full->start[i + 1]; ++k) + (*A_app_inv)->start[i] = A_spar_patt->start[i]; + for ( k = A_spar_patt->start[i]; k < A_spar_patt->start[i + 1]; ++k) { - (*A_app_inv)->j[k] = A_spar_patt_full->j[k]; - (*A_app_inv)->val[k] = e_j[k - A_spar_patt_full->start[i]]; + (*A_app_inv)->j[k] = A_spar_patt->j[k]; + (*A_app_inv)->val[k] = e_j[k - A_spar_patt->start[i]]; } //empty variables that will be used next iteration @@ -1788,15 +1768,12 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, sfree( X, "Sparse_Approx_Inverse::X" ); } - Deallocate_Matrix( A_full ); - Deallocate_Matrix( A_spar_patt_full ); - return Get_Timing_Info( start ); } #endif -/* sparse matrix-vector product Ax=b +/* sparse matrix-vector product Ax = b * where: * A: lower triangular matrix, stored in CSR format * x: vector @@ -1822,10 +1799,8 @@ static void Sparse_MatVec( const sparse_matrix * const A, * overhead per Sparse_MatVec call*/ if ( b_local == NULL ) { - if ( (b_local = (real*) malloc( omp_get_num_threads() * n * sizeof(real))) == NULL ) - { - exit( INSUFFICIENT_MEMORY ); - } + b_local = (real*) smalloc( omp_get_num_threads() * n * sizeof(real), + "Sparse_MatVec::b_local" ); } } @@ -1885,7 +1860,7 @@ static void Sparse_MatVec_full( const sparse_matrix * const A, Vector_MakeZero( b, A->n ); #ifdef _OPENMP - #pragma omp for schedule(static) default(none) private(i, j) + #pragma omp for schedule(static) #endif for ( i = 0; i < A->n; ++i ) { @@ -1906,11 +1881,8 @@ void Transpose( const sparse_matrix * const A, sparse_matrix * const A_t ) { unsigned int i, j, pj, *A_t_top; - if ( (A_t_top = (unsigned int*) calloc( A->n + 1, sizeof(unsigned int))) == NULL ) - { - fprintf( stderr, "Not enough space for matrix tranpose. Terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); - } + A_t_top = (unsigned int*) scalloc( A->n + 1, sizeof(unsigned int), + "Transpose::A_t_top" ); memset( A_t->start, 0, (A->n + 1) * sizeof(unsigned int) ); @@ -2087,22 +2059,18 @@ void tri_solve_level_sched( const sparse_matrix * const LU, if ( row_levels == NULL || level_rows == NULL || level_rows_cnt == NULL ) { - if ( (row_levels = (unsigned int*) malloc((size_t)N * sizeof(unsigned int))) == NULL - || (level_rows = (unsigned int*) malloc((size_t)N * sizeof(unsigned int))) == NULL - || (level_rows_cnt = (unsigned int*) malloc((size_t)(N + 1) * sizeof(unsigned int))) == NULL ) - { - fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); - } + row_levels = (unsigned int*) smalloc( (size_t)N * sizeof(unsigned int), + "tri_solve_level_sched::row_levels" ); + level_rows = (unsigned int*) smalloc( (size_t)N * sizeof(unsigned int), + "tri_solve_level_sched::level_rows" ); + level_rows_cnt = (unsigned int*) smalloc( (size_t)(N + 1) * sizeof(unsigned int), + "tri_solve_level_sched::level_rows_cnt" ); } if ( top == NULL ) { - if ( (top = (unsigned int*) malloc((size_t)(N + 1) * sizeof(unsigned int))) == NULL ) - { - fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); - } + top = (unsigned int*) smalloc( (size_t)(N + 1) * sizeof(unsigned int), + "tri_solve_level_sched::top" ); } /* find levels (row dependencies in substitutions) */ @@ -2338,12 +2306,10 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) } } - if ( (fb_color = (int*) malloc(sizeof(int) * MAX_COLOR)) == NULL || - (conflict_local = (unsigned int*) malloc(sizeof(unsigned int) * A->n)) == NULL ) - { - fprintf( stderr, "not enough memory for graph coloring. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + fb_color = (int*) smalloc( sizeof(int) * MAX_COLOR, + "graph_coloring::fb_color" ); + conflict_local = (unsigned int*) smalloc( sizeof(unsigned int) * A->n, + "graph_coloring::fb_color" ); #ifdef _OPENMP #pragma omp barrier @@ -2526,11 +2492,7 @@ static void permute_vector( real * const x, const unsigned int n, const int inve { if ( x_p == NULL ) { - if ( (x_p = (real*) malloc(sizeof(real) * n)) == NULL ) - { - fprintf( stderr, "not enough memory for permuting vector. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + x_p = (real*) smalloc( sizeof(real) * n, "permute_vector::x_p" ); } if ( invert_map == TRUE ) @@ -2717,16 +2679,25 @@ sparse_matrix * setup_graph_coloring( sparse_matrix * const H ) #endif /* internal storage for graph coloring (global to facilitate simultaneous access to OpenMP threads) */ - if ( (color = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || - (to_color = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || - (conflict = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || - (conflict_cnt = (unsigned int*) malloc(sizeof(unsigned int) * (num_thread + 1))) == NULL || - (recolor = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || - (color_top = (unsigned int*) malloc(sizeof(unsigned int) * (H->n + 1))) == NULL || - (permuted_row_col = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || - (permuted_row_col_inv = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || - (y_p = (real*) malloc(sizeof(real) * H->n)) == NULL || - (Allocate_Matrix( &H_p, H->n, H->m ) == FAILURE ) || + color = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, + "setup_graph_coloring::color" ); + to_color = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, + "setup_graph_coloring::to_color" ); + conflict = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, + "setup_graph_coloring::conflict" ); + conflict_cnt = (unsigned int*) smalloc( sizeof(unsigned int) * (num_thread + 1), + "setup_graph_coloring::conflict_cnt" ); + recolor = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, + "setup_graph_coloring::recolor" ); + color_top = (unsigned int*) smalloc( sizeof(unsigned int) * (H->n + 1), + "setup_graph_coloring::color_top" ); + permuted_row_col = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, + "setup_graph_coloring::premuted_row_col" ); + permuted_row_col_inv = (unsigned int*) smalloc( sizeof(unsigned int) * H->n, + "setup_graph_coloring::premuted_row_col_inv" ); + y_p = (real*) smalloc( sizeof(real) * H->n, + "setup_graph_coloring::y_p" ); + if ( (Allocate_Matrix( &H_p, H->n, H->m ) == FAILURE ) || (Allocate_Matrix( &H_full, H->n, 2 * H->m - H->n ) == FAILURE ) ) { fprintf( stderr, "not enough memory for graph coloring. terminating.\n" ); @@ -2773,27 +2744,15 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, { if ( Dinv_b == NULL ) { - if ( (Dinv_b = (real*) malloc(sizeof(real) * R->n)) == NULL ) - { - fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + Dinv_b = (real*) smalloc( sizeof(real) * R->n, "jacobi_iter::Dinv_b" ); } if ( rp == NULL ) { - if ( (rp = (real*) malloc(sizeof(real) * R->n)) == NULL ) - { - fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + rp = (real*) smalloc( sizeof(real) * R->n, "jacobi_iter::rp" ); } if ( rp2 == NULL ) { - if ( (rp2 = (real*) malloc(sizeof(real) * R->n)) == NULL ) - { - fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + rp2 = (real*) smalloc( sizeof(real) * R->n, "jacobi_iter::rp2" ); } } @@ -2969,11 +2928,8 @@ static void apply_preconditioner( const static_storage * const workspace, const { if ( Dinv_L == NULL ) { - if ( (Dinv_L = (real*) malloc(sizeof(real) * workspace->L->n)) == NULL ) - { - fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + Dinv_L = (real*) smalloc( sizeof(real) * workspace->L->n, + "apply_preconditioner::Dinv_L" ); } } @@ -2998,11 +2954,8 @@ static void apply_preconditioner( const static_storage * const workspace, const { if ( Dinv_U == NULL ) { - if ( (Dinv_U = (real*) malloc(sizeof(real) * workspace->U->n)) == NULL ) - { - fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + Dinv_U = (real*) smalloc( sizeof(real) * workspace->U->n, + "apply_preconditioner::Dinv_U" ); } } @@ -3043,42 +2996,50 @@ int GMRES( const static_storage * const workspace, const control_params * const const real tol, real * const x, const int fresh_pre ) { int i, j, k, itr, N, g_j, g_itr; - real cc, tmp1, tmp2, temp, ret_temp, bnorm, time_start; + real cc, tmp1, tmp2, temp, ret_temp, bnorm; + real t_start, t_ortho, t_pa, t_spmv, t_ts, t_vops; N = H->n; g_j = 0; g_itr = 0; #ifdef _OPENMP - #pragma omp parallel default(none) private(i, j, k, itr, bnorm, ret_temp) \ - shared(N, cc, tmp1, tmp2, temp, time_start, g_itr, g_j, stderr) + #pragma omp parallel default(none) \ + private(i, j, k, itr, bnorm, ret_temp, t_start) \ + shared(N, cc, tmp1, tmp2, temp, g_itr, g_j, stderr) \ + reduction(+: t_ortho, t_pa, t_spmv, t_ts, t_vops) #endif { j = 0; itr = 0; + t_ortho = 0.0; + t_pa = 0.0; + t_spmv = 0.0; + t_ts = 0.0; + t_vops = 0.0; - time_start = Get_Time( ); + t_start = Get_Time( ); bnorm = Norm( b, N ); - data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); + t_vops += Get_Timing_Info( t_start ); /* GMRES outer-loop */ for ( itr = 0; itr < control->cm_solver_max_iters; ++itr ) { /* calculate r0 */ - time_start = Get_Time( ); + t_start = Get_Time( ); Sparse_MatVec( H, x, workspace->b_prm ); - data->timing.cm_solver_spmv += Get_Timing_Info( time_start ); + t_spmv += Get_Timing_Info( t_start ); - time_start = Get_Time( ); + t_start = Get_Time( ); Vector_Sum( workspace->b_prc, 1., b, -1., workspace->b_prm, N ); - data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); + t_vops += Get_Timing_Info( t_start ); - time_start = Get_Time( ); + t_start = Get_Time( ); apply_preconditioner( workspace, control, workspace->b_prc, workspace->v[0], itr == 0 ? fresh_pre : FALSE ); - data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); + t_pa += Get_Timing_Info( t_start ); - time_start = Get_Time( ); + t_start = Get_Time( ); ret_temp = Norm( workspace->v[0], N ); #ifdef _OPENMP @@ -3089,24 +3050,24 @@ int GMRES( const static_storage * const workspace, const control_params * const } Vector_Scale( workspace->v[0], 1. / ret_temp, workspace->v[0], N ); - data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); + t_vops += Get_Timing_Info( t_start ); /* GMRES inner-loop */ for ( j = 0; j < control->cm_solver_restart && FABS(workspace->g[j]) / bnorm > tol; j++ ) { /* matvec */ - time_start = Get_Time( ); + t_start = Get_Time( ); Sparse_MatVec( H, workspace->v[j], workspace->b_prc ); - data->timing.cm_solver_spmv += Get_Timing_Info( time_start ); + t_spmv += Get_Timing_Info( t_start ); - time_start = Get_Time( ); + t_start = Get_Time( ); apply_preconditioner( workspace, control, workspace->b_prc, workspace->v[j + 1], FALSE ); - data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); + t_pa += Get_Timing_Info( t_start ); // if ( control->cm_solver_pre_comp_type == DIAG_PC ) // { /* apply modified Gram-Schmidt to orthogonalize the new residual */ - time_start = Get_Time( ); + t_start = Get_Time( ); for ( i = 0; i <= j; i++ ) { ret_temp = Dot( workspace->v[i], workspace->v[j + 1], N ); @@ -3121,7 +3082,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); } - data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); + t_vops += Get_Timing_Info( t_start ); // } // else // { @@ -3131,7 +3092,7 @@ int GMRES( const static_storage * const workspace, const control_params * const // // // { -// time_start = Get_Time( ); +// t_start = Get_Time( ); // } // // #pragma omp single @@ -3159,11 +3120,11 @@ int GMRES( const static_storage * const workspace, const control_params * const // // // { -// data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); +// t_vops += Get_Timing_Info( t_start ); // } // } - time_start = Get_Time( ); + t_start = Get_Time( ); ret_temp = Norm( workspace->v[j + 1], N ); #ifdef _OPENMP @@ -3175,9 +3136,9 @@ int GMRES( const static_storage * const workspace, const control_params * const Vector_Scale( workspace->v[j + 1], 1.0 / workspace->h[j + 1][j], workspace->v[j + 1], N ); - data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); + t_vops += Get_Timing_Info( t_start ); - time_start = Get_Time( ); + t_start = Get_Time( ); // if ( control->cm_solver_pre_comp_type == NONE_PC || // control->cm_solver_pre_comp_type == DIAG_PC ) // { @@ -3234,7 +3195,7 @@ int GMRES( const static_storage * const workspace, const control_params * const workspace->g[j + 1] = tmp2; } - data->timing.cm_solver_orthog += Get_Timing_Info( time_start ); + t_ortho += Get_Timing_Info( t_start ); #ifdef _OPENMP #pragma omp barrier @@ -3242,7 +3203,7 @@ int GMRES( const static_storage * const workspace, const control_params * const } /* solve Hy = g: H is now upper-triangular, do back-substitution */ - time_start = Get_Time( ); + t_start = Get_Time( ); #ifdef _OPENMP #pragma omp single #endif @@ -3258,10 +3219,10 @@ int GMRES( const static_storage * const workspace, const control_params * const workspace->y[i] = temp / workspace->h[i][i]; } } - data->timing.cm_solver_tri_solve += Get_Timing_Info( time_start ); + t_ts += Get_Timing_Info( t_start ); /* update x = x_0 + Vy */ - time_start = Get_Time( ); + t_start = Get_Time( ); Vector_MakeZero( workspace->p, N ); for ( i = 0; i < j; i++ ) @@ -3270,7 +3231,7 @@ int GMRES( const static_storage * const workspace, const control_params * const } Vector_Add( x, 1., workspace->p, N ); - data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); + t_vops += Get_Timing_Info( t_start ); /* stopping condition */ if ( FABS(workspace->g[j]) / bnorm <= tol ) @@ -3288,9 +3249,23 @@ int GMRES( const static_storage * const workspace, const control_params * const } } +#ifdef _OPENMP + data->timing.cm_solver_orthog += t_ortho / control->num_threads; + data->timing.cm_solver_pre_app += t_pa / control->num_threads; + data->timing.cm_solver_spmv += t_spmv / control->num_threads; + data->timing.cm_solver_tri_solve += t_ts / control->num_threads; + data->timing.cm_solver_vector_ops += t_vops / control->num_threads; +#else + data->timing.cm_solver_orthog += t_ortho; + data->timing.cm_solver_pre_app += t_pa; + data->timing.cm_solver_spmv += t_spmv; + data->timing.cm_solver_tri_solve += t_ts; + data->timing.cm_solver_vector_ops += t_vops; +#endif + if ( g_itr >= control->cm_solver_max_iters ) { - fprintf( stderr, "GMRES convergence failed\n" ); + fprintf( stderr, "[WARNING] GMRES convergence failed (%d outer iters)\n", g_itr ); return g_itr * (control->cm_solver_restart + 1) + g_j + 1; } @@ -3303,203 +3278,239 @@ int GMRES_HouseHolder( const static_storage * const workspace, const sparse_matrix * const H, const real * const b, real tol, real * const x, const int fresh_pre ) { - int i, j, k, itr, N; - real cc, tmp1, tmp2, temp, bnorm; + int i, j, k, itr, N, g_j, g_itr; + real cc, tmp1, tmp2, temp, bnorm, ret_temp; real v[10000], z[control->cm_solver_restart + 2][10000], w[control->cm_solver_restart + 2]; real u[control->cm_solver_restart + 2][10000]; + real t_start, t_ortho, t_pa, t_spmv, t_ts, t_vops; j = 0; N = H->n; - bnorm = Norm( b, N ); - /* apply the diagonal pre-conditioner to rhs */ - for ( i = 0; i < N; ++i ) +#ifdef _OPENMP + #pragma omp parallel default(none) \ + private(i, j, k, itr, bnorm, ret_temp, t_start) \ + shared(v, z, w, u, tol, N, cc, tmp1, tmp2, temp, g_itr, g_j, stderr) \ + reduction(+: t_ortho, t_pa, t_spmv, t_ts, t_vops) +#endif { - workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; - } + t_start = Get_Time( ); + bnorm = Norm( b, N ); + t_vops += Get_Timing_Info( t_start ); - // memset( x, 0, sizeof(real) * N ); + // memset( x, 0, sizeof(real) * N ); - /* GMRES outer-loop */ - for ( itr = 0; itr < control->cm_solver_max_iters; ++itr ) - { - /* compute z = r0 */ - Sparse_MatVec( H, x, workspace->b_prm ); - for ( i = 0; i < N; ++i ) + /* GMRES outer-loop */ + for ( itr = 0; itr < control->cm_solver_max_iters; ++itr ) { - workspace->b_prm[i] *= workspace->Hdia_inv[i]; /* pre-conditioner */ - } - Vector_Sum( z[0], 1., workspace->b_prc, -1., workspace->b_prm, N ); + /* compute z = r0 */ + t_start = Get_Time( ); + Sparse_MatVec( H, x, workspace->b_prm ); + t_spmv += Get_Timing_Info( t_start ); - Vector_MakeZero( w, control->cm_solver_restart + 1 ); - w[0] = Norm( z[0], N ); + t_start = Get_Time( ); + Vector_Sum( workspace->b_prc, 1., workspace->b, -1., workspace->b_prm, N ); + t_vops += Get_Timing_Info( t_start ); - Vector_Copy( u[0], z[0], N ); - u[0][0] += ( u[0][0] < 0.0 ? -1 : 1 ) * w[0]; - Vector_Scale( u[0], 1 / Norm( u[0], N ), u[0], N ); + t_start = Get_Time( ); + apply_preconditioner( workspace, control, workspace->b_prc, z[0], fresh_pre ); + t_pa += Get_Timing_Info( t_start ); - w[0] *= ( u[0][0] < 0.0 ? 1 : -1 ); - // fprintf( stderr, "\n\n%12.6f\n", w[0] ); + t_start = Get_Time( ); + Vector_MakeZero( w, control->cm_solver_restart + 1 ); + w[0] = Norm( z[0], N ); - /* GMRES inner-loop */ - for ( j = 0; j < control->cm_solver_restart && FABS( w[j] ) / bnorm > tol; j++ ) - { - /* compute v_j */ - Vector_Scale( z[j], -2 * u[j][j], u[j], N ); - z[j][j] += 1.; /* due to e_j */ + Vector_Copy( u[0], z[0], N ); + u[0][0] += ( u[0][0] < 0.0 ? -1 : 1 ) * w[0]; + Vector_Scale( u[0], 1 / Norm( u[0], N ), u[0], N ); + + w[0] *= ( u[0][0] < 0.0 ? 1 : -1 ); + t_vops += Get_Timing_Info( t_start ); - for ( i = j - 1; i >= 0; --i ) + /* GMRES inner-loop */ + for ( j = 0; j < control->cm_solver_restart && FABS( w[j] ) / bnorm > tol; j++ ) { - Vector_Add( z[j] + i, -2 * Dot( u[i] + i, z[j] + i, N - i ), u[i] + i, N - i ); - } + /* compute v_j */ + t_start = Get_Time( ); + Vector_Scale( z[j], -2 * u[j][j], u[j], N ); + z[j][j] += 1.; /* due to e_j */ - /* matvec */ - Sparse_MatVec( H, z[j], v ); + for ( i = j - 1; i >= 0; --i ) + { + Vector_Add( z[j] + i, -2 * Dot( u[i] + i, z[j] + i, N - i ), u[i] + i, N - i ); + } + t_vops += Get_Timing_Info( t_start ); - for ( k = 0; k < N; ++k ) - { - v[k] *= workspace->Hdia_inv[k]; /* pre-conditioner */ - } + /* matvec */ + t_start = Get_Time( ); + Sparse_MatVec( H, z[j], workspace->b_prc ); + t_spmv += Get_Timing_Info( t_start ); - for ( i = 0; i <= j; ++i ) - { - Vector_Add( v + i, -2 * Dot( u[i] + i, v + i, N - i ), u[i] + i, N - i ); - } + t_start = Get_Time( ); + apply_preconditioner( workspace, control, workspace->b_prc, v, fresh_pre ); + t_pa += Get_Timing_Info( t_start ); - if ( !Vector_isZero( v + (j + 1), N - (j + 1) ) ) - { - /* compute the HouseHolder unit vector u_j+1 */ + t_start = Get_Time( ); for ( i = 0; i <= j; ++i ) { - u[j + 1][i] = 0; + Vector_Add( v + i, -2 * Dot( u[i] + i, v + i, N - i ), u[i] + i, N - i ); } - Vector_Copy( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ); + if ( !Vector_isZero( v + (j + 1), N - (j + 1) ) ) + { + /* compute the HouseHolder unit vector u_j+1 */ + Vector_MakeZero( u[j + 1], j + 1 ); + Vector_Copy( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ); + ret_temp = Norm( v + (j + 1), N - (j + 1) ); +#ifdef _OPENMP + #pragma omp single +#endif + u[j + 1][j + 1] += ( v[j + 1] < 0.0 ? -1 : 1 ) * ret_temp; - u[j + 1][j + 1] += ( v[j + 1] < 0.0 ? -1 : 1 ) * Norm( v + (j + 1), N - (j + 1) ); +#ifdef _OPENMP + #pragma omp barrier +#endif - Vector_Scale( u[j + 1], 1 / Norm( u[j + 1], N ), u[j + 1], N ); + Vector_Scale( u[j + 1], 1 / Norm( u[j + 1], N ), u[j + 1], N ); - /* overwrite v with P_m+1 * v */ - v[j + 1] -= 2 * Dot( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ) * u[j + 1][j + 1]; - Vector_MakeZero( v + (j + 2), N - (j + 2) ); - // Vector_Add( v, -2 * Dot( u[j+1], v, N ), u[j+1], N ); - } + /* overwrite v with P_m+1 * v */ + ret_temp = 2 * Dot( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ) * u[j + 1][j + 1]; +#ifdef _OPENMP + #pragma omp single +#endif + v[j + 1] -= ret_temp; +#ifdef _OPENMP + #pragma omp barrier +#endif - /* prev Givens rots on the upper-Hessenberg matrix to make it U */ - for ( i = 0; i < j; i++ ) - { - tmp1 = workspace->hc[i] * v[i] + workspace->hs[i] * v[i + 1]; - tmp2 = -workspace->hs[i] * v[i] + workspace->hc[i] * v[i + 1]; + Vector_MakeZero( v + (j + 2), N - (j + 2) ); +// Vector_Add( v, -2 * Dot( u[j+1], v, N ), u[j+1], N ); + } + t_vops += Get_Timing_Info( t_start ); - v[i] = tmp1; - v[i + 1] = tmp2; - } + /* prev Givens rots on the upper-Hessenberg matrix to make it U */ + t_start = Get_Time( ); +#ifdef _OPENMP + #pragma omp single +#endif + { + for ( i = 0; i < j; i++ ) + { + tmp1 = workspace->hc[i] * v[i] + workspace->hs[i] * v[i + 1]; + tmp2 = -workspace->hs[i] * v[i] + workspace->hc[i] * v[i + 1]; - /* apply the new Givens rotation to H and right-hand side */ - if ( FABS(v[j + 1]) >= ALMOST_ZERO ) - { - cc = SQRT( SQR( v[j] ) + SQR( v[j + 1] ) ); - workspace->hc[j] = v[j] / cc; - workspace->hs[j] = v[j + 1] / cc; + v[i] = tmp1; + v[i + 1] = tmp2; + } - tmp1 = workspace->hc[j] * v[j] + workspace->hs[j] * v[j + 1]; - tmp2 = -workspace->hs[j] * v[j] + workspace->hc[j] * v[j + 1]; + /* apply the new Givens rotation to H and right-hand side */ + if ( FABS(v[j + 1]) >= ALMOST_ZERO ) + { + cc = SQRT( SQR( v[j] ) + SQR( v[j + 1] ) ); + workspace->hc[j] = v[j] / cc; + workspace->hs[j] = v[j + 1] / cc; - v[j] = tmp1; - v[j + 1] = tmp2; + tmp1 = workspace->hc[j] * v[j] + workspace->hs[j] * v[j + 1]; + tmp2 = -workspace->hs[j] * v[j] + workspace->hc[j] * v[j + 1]; - /* Givens rotations to rhs */ - tmp1 = workspace->hc[j] * w[j]; - tmp2 = -workspace->hs[j] * w[j]; - w[j] = tmp1; - w[j + 1] = tmp2; - } + v[j] = tmp1; + v[j + 1] = tmp2; - /* extend R */ - for ( i = 0; i <= j; ++i ) - { - workspace->h[i][j] = v[i]; + /* Givens rotations to rhs */ + tmp1 = workspace->hc[j] * w[j]; + tmp2 = -workspace->hs[j] * w[j]; + w[j] = tmp1; + w[j + 1] = tmp2; + } + + /* extend R */ + for ( i = 0; i <= j; ++i ) + { + workspace->h[i][j] = v[i]; + } + } + t_ortho += Get_Timing_Info( t_start ); } - // fprintf( stderr, "h:" ); - // for( i = 0; i <= j+1 ; ++i ) - // fprintf( stderr, "%.6f ", h[i][j] ); - // fprintf( stderr, "\n" ); - // fprintf( stderr, "%12.6f\n", w[j+1] ); - } + /* solve Hy = w. + H is now upper-triangular, do back-substitution */ + t_start = Get_Time( ); +#ifdef _OPENMP + #pragma omp single +#endif + { + for ( i = j - 1; i >= 0; i-- ) + { + temp = w[i]; + for ( k = j - 1; k > i; k-- ) + { + temp -= workspace->h[i][k] * workspace->y[k]; + } + workspace->y[i] = temp / workspace->h[i][i]; + } + } + t_ts += Get_Timing_Info( t_start ); - /* solve Hy = w. - H is now upper-triangular, do back-substitution */ - for ( i = j - 1; i >= 0; i-- ) - { - temp = w[i]; - for ( k = j - 1; k > i; k-- ) + t_start = Get_Time( ); + for ( i = j - 1; i >= 0; i-- ) { - temp -= workspace->h[i][k] * workspace->y[k]; + Vector_Add( x, workspace->y[i], z[i], N ); } + t_vops += Get_Timing_Info( t_start ); - workspace->y[i] = temp / workspace->h[i][i]; - } - - // fprintf( stderr, "y: " ); - // for( i = 0; i < control->cm_solver_restart+1; ++i ) - // fprintf( stderr, "%8.3f ", workspace->y[i] ); - - - /* update x = x_0 + Vy */ - // memset( z, 0, sizeof(real) * N ); - // for( i = j-1; i >= 0; i-- ) - // { - // Vector_Copy( v, z, N ); - // v[i] += workspace->y[i]; - // - // Vector_Sum( z, 1., v, -2 * Dot( u[i], v, N ), u[i], N ); - // } - // - // fprintf( stderr, "\nz: " ); - // for( k = 0; k < N; ++k ) - // fprintf( stderr, "%6.2f ", z[k] ); - - // fprintf( stderr, "\nx_bef: " ); - // for( i = 0; i < N; ++i ) - // fprintf( stderr, "%6.2f ", x[i] ); - - // Vector_Add( x, 1, z, N ); - for ( i = j - 1; i >= 0; i-- ) - { - Vector_Add( x, workspace->y[i], z[i], N ); + /* stopping condition */ + if ( FABS( w[j] ) / bnorm <= tol ) + { + break; + } } - /* stopping condition */ - if ( FABS( w[j] ) / bnorm <= tol ) +#ifdef _OPENMP + #pragma omp single +#endif { - break; + g_j = j; + g_itr = itr; } } - if ( itr >= control->cm_solver_max_iters ) +#ifdef _OPENMP + data->timing.cm_solver_orthog += t_ortho / control->num_threads; + data->timing.cm_solver_pre_app += t_pa / control->num_threads; + data->timing.cm_solver_spmv += t_spmv / control->num_threads; + data->timing.cm_solver_tri_solve += t_ts / control->num_threads; + data->timing.cm_solver_vector_ops += t_vops / control->num_threads; +#else + data->timing.cm_solver_orthog += t_ortho; + data->timing.cm_solver_pre_app += t_pa; + data->timing.cm_solver_spmv += t_spmv; + data->timing.cm_solver_tri_solve += t_ts; + data->timing.cm_solver_vector_ops += t_vops; +#endif + + if ( g_itr >= control->cm_solver_max_iters ) { - fprintf( stderr, "GMRES convergence failed\n" ); - return itr * (control->cm_solver_restart + 1) + j + 1; + fprintf( stderr, "[WARNING] GMRES convergence failed (%d outer iters)\n", g_itr ); + return g_itr * (control->cm_solver_restart + 1) + j + 1; } - return itr * (control->cm_solver_restart + 1) + j + 1; + return g_itr * (control->cm_solver_restart + 1) + g_j + 1; } /* Conjugate Gradient */ int CG( const static_storage * const workspace, const control_params * const control, - const sparse_matrix * const H, const real * const b, const real tol, - real * const x, const int fresh_pre ) + simulation_data * const data, const sparse_matrix * const H, const real * const b, + const real tol, real * const x, const int fresh_pre ) { - int i, itr, N; + int i, g_itr, N; real tmp, alpha, beta, b_norm, r_norm; real *d, *r, *p, *z; real sig_old, sig_new; + real t_start, t_pa, t_spmv, t_vops; N = H->n; d = workspace->d; @@ -3508,86 +3519,140 @@ int CG( const static_storage * const workspace, const control_params * const con z = workspace->p; #ifdef _OPENMP - #pragma omp parallel default(none) private(i, tmp, alpha, beta, b_norm, r_norm, sig_old, sig_new) \ - shared(itr, N, d, r, p, z) + #pragma omp parallel default(none) \ + private(i, tmp, alpha, beta, b_norm, r_norm, sig_old, sig_new, t_start) \ + reduction(+: t_pa, t_spmv, t_vops) \ + shared(g_itr, N, d, r, p, z) #endif { + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; + + t_start = Get_Time( ); b_norm = Norm( b, N ); + t_vops += Get_Timing_Info( t_start ); + t_start = Get_Time( ); Sparse_MatVec( H, x, d ); + t_spmv += Get_Timing_Info( t_start ); + + t_start = Get_Time( ); Vector_Sum( r, 1.0, b, -1.0, d, N ); r_norm = Norm( r, N ); + t_vops += Get_Timing_Info( t_start ); + t_start = Get_Time( ); apply_preconditioner( workspace, control, r, z, fresh_pre ); - Vector_Copy( p, z, N ); + t_pa += Get_Timing_Info( t_start ); + t_start = Get_Time( ); + Vector_Copy( p, z, N ); sig_new = Dot( r, z, N ); + t_vops += Get_Timing_Info( t_start ); for ( i = 0; i < control->cm_solver_max_iters && r_norm / b_norm > tol; ++i ) { + t_start = Get_Time( ); Sparse_MatVec( H, p, d ); + t_spmv += Get_Timing_Info( t_start ); + t_start = Get_Time( ); tmp = Dot( d, p, N ); alpha = sig_new / tmp; Vector_Add( x, alpha, p, N ); - Vector_Add( r, -alpha, d, N ); r_norm = Norm( r, N ); + t_vops += Get_Timing_Info( t_start ); + t_start = Get_Time( ); apply_preconditioner( workspace, control, r, z, FALSE ); + t_pa += Get_Timing_Info( t_start ); + t_start = Get_Time( ); sig_old = sig_new; sig_new = Dot( r, z, N ); - beta = sig_new / sig_old; Vector_Sum( p, 1., z, beta, p, N ); + t_vops += Get_Timing_Info( t_start ); } #ifdef _OPENMP #pragma omp single #endif - itr = i; + g_itr = i; } - if ( itr >= control->cm_solver_max_iters ) +#ifdef _OPENMP + data->timing.cm_solver_pre_app += t_pa / control->num_threads; + data->timing.cm_solver_spmv += t_spmv / control->num_threads; + data->timing.cm_solver_vector_ops += t_vops / control->num_threads; +#else + data->timing.cm_solver_pre_app += t_pa; + data->timing.cm_solver_spmv += t_spmv; + data->timing.cm_solver_vector_ops += t_vops; +#endif + + if ( g_itr >= control->cm_solver_max_iters ) { - fprintf( stderr, "[WARNING] CG convergence failed (%d iters)\n", itr ); - return itr; + fprintf( stderr, "[WARNING] CG convergence failed (%d iters)\n", g_itr ); + return g_itr; } - return itr; + return g_itr; } /* Steepest Descent */ int SDM( const static_storage * const workspace, const control_params * const control, - const sparse_matrix * const H, const real * const b, const real tol, - real * const x, const int fresh_pre ) + simulation_data * const data, const sparse_matrix * const H, const real * const b, + const real tol, real * const x, const int fresh_pre ) { - int i, itr, N; + int i, g_itr, N; real tmp, alpha, b_norm; real sig; + real t_start, t_pa, t_spmv, t_vops; N = H->n; #ifdef _OPENMP - #pragma omp parallel default(none) private(i, tmp, alpha, b_norm, sig) \ - shared(itr, N) + #pragma omp parallel default(none) \ + private(i, tmp, alpha, b_norm, sig, t_start) \ + reduction(+: t_pa, t_spmv, t_vops) \ + shared(g_itr, N) #endif { + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; + + t_start = Get_Time( ); b_norm = Norm( b, N ); + t_vops += Get_Timing_Info( t_start ); + t_start = Get_Time( ); Sparse_MatVec( H, x, workspace->q ); + t_spmv += Get_Timing_Info( t_start ); + + t_start = Get_Time( ); Vector_Sum( workspace->r, 1.0, b, -1.0, workspace->q, N ); + t_vops += Get_Timing_Info( t_start ); + t_start = Get_Time( ); apply_preconditioner( workspace, control, workspace->r, workspace->d, fresh_pre ); + t_pa += Get_Timing_Info( t_start ); + t_start = Get_Time( ); sig = Dot( workspace->r, workspace->d, N ); + t_vops += Get_Timing_Info( t_start ); for ( i = 0; i < control->cm_solver_max_iters && SQRT(sig) / b_norm > tol; ++i ) { + t_start = Get_Time( ); Sparse_MatVec( H, workspace->d, workspace->q ); + t_spmv += Get_Timing_Info( t_start ); + t_start = Get_Time( ); sig = Dot( workspace->r, workspace->d, N ); /* ensure each thread gets a local copy of @@ -3603,23 +3668,36 @@ int SDM( const static_storage * const workspace, const control_params * const co Vector_Add( x, alpha, workspace->d, N ); Vector_Add( workspace->r, -alpha, workspace->q, N ); + t_vops += Get_Timing_Info( t_start ); + t_start = Get_Time( ); apply_preconditioner( workspace, control, workspace->r, workspace->d, FALSE ); + t_pa += Get_Timing_Info( t_start ); } #ifdef _OPENMP #pragma omp single #endif - itr = i; + g_itr = i; } - if ( itr >= control->cm_solver_max_iters ) +#ifdef _OPENMP + data->timing.cm_solver_pre_app += t_pa / control->num_threads; + data->timing.cm_solver_spmv += t_spmv / control->num_threads; + data->timing.cm_solver_vector_ops += t_vops / control->num_threads; +#else + data->timing.cm_solver_pre_app += t_pa; + data->timing.cm_solver_spmv += t_spmv; + data->timing.cm_solver_vector_ops += t_vops; +#endif + + if ( g_itr >= control->cm_solver_max_iters ) { - fprintf( stderr, "[WARNING] SDM convergence failed (%d iters)\n", itr ); - return itr; + fprintf( stderr, "[WARNING] SDM convergence failed (%d iters)\n", g_itr ); + return g_itr; } - return itr; + return g_itr; } @@ -3639,11 +3717,7 @@ real condest( const sparse_matrix * const L, const sparse_matrix * const U ) N = L->n; - if ( (e = (real*) malloc(sizeof(real) * N)) == NULL ) - { - fprintf( stderr, "Not enough memory for condest. Terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } + e = (real*) smalloc( sizeof(real) * N, "condest::e" ); memset( e, 1., N * sizeof(real) ); diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h index 6caf7a1a052decd8032eba19e56e8010b0939b42..814701fe06c81a4fcd33530f9a16f70c12abdf5d 100644 --- a/sPuReMD/src/lin_alg.h +++ b/sPuReMD/src/lin_alg.h @@ -32,8 +32,8 @@ typedef enum void Sort_Matrix_Rows( sparse_matrix * const ); -void Setup_Sparsity_Pattern( const sparse_matrix * const, - const real, sparse_matrix ** ); +void setup_sparse_approx_inverse( const sparse_matrix * const, sparse_matrix **, + sparse_matrix **, sparse_matrix **, sparse_matrix **, const real ); int Estimate_LU_Fill( const sparse_matrix * const, const real * const ); @@ -62,7 +62,7 @@ real ILUT_PAR( const sparse_matrix * const, const real *, const unsigned int, sparse_matrix * const, sparse_matrix * const ); #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) -real Sparse_Approx_Inverse( const sparse_matrix * const, const sparse_matrix * const, +real sparse_approx_inverse( const sparse_matrix * const, const sparse_matrix * const, sparse_matrix ** ); #endif @@ -94,11 +94,11 @@ int GMRES_HouseHolder( const static_storage * const, const control_params * cons const int ); int CG( const static_storage * const, const control_params * const, - const sparse_matrix * const, const real * const, const real, - real * const, const int ); + simulation_data * const, const sparse_matrix * const, const real * const, + const real, real * const, const int ); int SDM( const static_storage * const, const control_params * const, - const sparse_matrix * const, const real * const, const real, + simulation_data * const, const sparse_matrix * const, const real * const, const real, real * const, const int ); real condest( const sparse_matrix * const, const sparse_matrix * const ); diff --git a/sPuReMD/src/lookup.c b/sPuReMD/src/lookup.c index 7ae8ab2e3ac083febd400ae34e18d5ddb2c87b80..7ec372f4392fea03e81219eb2780c64ab45b7e4c 100644 --- a/sPuReMD/src/lookup.c +++ b/sPuReMD/src/lookup.c @@ -36,7 +36,8 @@ void Make_Lookup_Table( real xmin, real xmax, int n, t->dx = (xmax - xmin) / (n - 1); t->inv_dx = 1.0 / t->dx; t->a = (n - 1) / (xmax - xmin); - t->y = (real*) malloc(n * sizeof(real)); + t->y = (real*) smalloc( n * sizeof(real), + "Make_Lookup_Table::t->y" ); for (i = 0; i < n; i++) { @@ -83,11 +84,16 @@ void Natural_Cubic_Spline( const real *h, const real *f, real *a, *b, *c, *d, *v; /* allocate space for the linear system */ - a = (real*) malloc( n * sizeof(real) ); - b = (real*) malloc( n * sizeof(real) ); - c = (real*) malloc( n * sizeof(real) ); - d = (real*) malloc( n * sizeof(real) ); - v = (real*) malloc( n * sizeof(real) ); + a = (real*) smalloc( n * sizeof(real), + "Natural_Cubic_Spline::a" ); + b = (real*) smalloc( n * sizeof(real), + "Natural_Cubic_Spline::b" ); + c = (real*) smalloc( n * sizeof(real), + "Natural_Cubic_Spline::c" ); + d = (real*) smalloc( n * sizeof(real), + "Natural_Cubic_Spline::d" ); + v = (real*) smalloc( n * sizeof(real), + "Natural_Cubic_Spline::v" ); /* build the linear system */ a[0] = 0.0; @@ -153,11 +159,16 @@ void Complete_Cubic_Spline( const real *h, const real *f, real v0, real vlast, real *a, *b, *c, *d, *v; /* allocate space for the linear system */ - a = (real*) malloc( n * sizeof(real) ); - b = (real*) malloc( n * sizeof(real) ); - c = (real*) malloc( n * sizeof(real) ); - d = (real*) malloc( n * sizeof(real) ); - v = (real*) malloc( n * sizeof(real) ); + a = (real*) smalloc( n * sizeof(real), + "Complete_Cubic_Spline::a" ); + b = (real*) smalloc( n * sizeof(real), + "Complete_Cubic_Spline::b" ); + c = (real*) smalloc( n * sizeof(real), + "Complete_Cubic_Spline::c" ); + d = (real*) smalloc( n * sizeof(real), + "Complete_Cubic_Spline::d" ); + v = (real*) smalloc( n * sizeof(real), + "Complete_Cubic_Spline::v" ); /* build the linear system */ a[0] = 0.0; @@ -260,19 +271,27 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) num_atom_types = system->reaxprm.num_atom_types; dr = control->r_cut / control->tabulate; - h = (real*) calloc( (control->tabulate + 1), sizeof(real) ); - fh = (real*) calloc( (control->tabulate + 1), sizeof(real) ); - fvdw = (real*) calloc( (control->tabulate + 1), sizeof(real) ); - fCEvd = (real*) calloc( (control->tabulate + 1), sizeof(real) ); - fele = (real*) calloc( (control->tabulate + 1), sizeof(real) ); - fCEclmb = (real*) calloc( (control->tabulate + 1), sizeof(real) ); + h = (real*) scalloc( (control->tabulate + 1), sizeof(real), + "Make_LR_Lookup_Table::h" ); + fh = (real*) scalloc( (control->tabulate + 1), sizeof(real), + "Make_LR_Lookup_Table::fh" ); + fvdw = (real*) scalloc( (control->tabulate + 1), sizeof(real), + "Make_LR_Lookup_Table::fvdw" ); + fCEvd = (real*) scalloc( (control->tabulate + 1), sizeof(real), + "Make_LR_Lookup_Table::fCEvd" ); + fele = (real*) scalloc( (control->tabulate + 1), sizeof(real), + "Make_LR_Lookup_Table::fele" ); + fCEclmb = (real*) scalloc( (control->tabulate + 1), sizeof(real), + "Make_LR_Lookup_Table::fCEclmb" ); /* allocate Long-Range LookUp Table space based on number of atom types in the ffield file */ - LR = (LR_lookup_table**) malloc( num_atom_types * sizeof(LR_lookup_table*) ); + LR = (LR_lookup_table**) smalloc( num_atom_types * sizeof(LR_lookup_table*), + "Make_LR_Lookup_Table::LR" ); for ( i = 0; i < num_atom_types; ++i ) { - LR[i] = (LR_lookup_table*) malloc(num_atom_types * sizeof(LR_lookup_table)); + LR[i] = (LR_lookup_table*) smalloc( num_atom_types * sizeof(LR_lookup_table), + "Make_LR_Lookup_Table::LR[i]"); } /* most atom types in ffield file will not exist in the current @@ -303,17 +322,23 @@ void Make_LR_Lookup_Table( reax_system *system, control_params *control ) LR[i][j].dx = dr; LR[i][j].inv_dx = control->tabulate / control->r_cut; LR[i][j].y = (LR_data*) - malloc( LR[i][j].n * sizeof(LR_data) ); + smalloc( LR[i][j].n * sizeof(LR_data), + "Make_LR_Lookup_Table::LR[i][j].y" ); LR[i][j].H = (cubic_spline_coef*) - malloc( LR[i][j].n * sizeof(cubic_spline_coef) ); + smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + "Make_LR_Lookup_Table::LR[i][j].H" ); LR[i][j].vdW = (cubic_spline_coef*) - malloc( LR[i][j].n * sizeof(cubic_spline_coef) ); + smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + "Make_LR_Lookup_Table::LR[i][j].vdW" ); LR[i][j].CEvd = (cubic_spline_coef*) - malloc( LR[i][j].n * sizeof(cubic_spline_coef) ); + smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + "Make_LR_Lookup_Table::LR[i][j].CEvd" ); LR[i][j].ele = (cubic_spline_coef*) - malloc( LR[i][j].n * sizeof(cubic_spline_coef) ); + smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + "Make_LR_Lookup_Table::LR[i][j].ele" ); LR[i][j].CEclmb = (cubic_spline_coef*) - malloc( LR[i][j].n * sizeof(cubic_spline_coef) ); + smalloc( LR[i][j].n * sizeof(cubic_spline_coef), + "Make_LR_Lookup_Table::LR[i][j].CEclmb" ); for ( r = 1; r <= control->tabulate; ++r ) { diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index 51f24d401b1337a9d8059df252d51864114c7106..023542b9408131135470014fd0770c180687e16a 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -890,8 +890,10 @@ typedef struct /* charge method storage */ sparse_matrix *H; + sparse_matrix *H_full; sparse_matrix *H_sp; sparse_matrix *H_spar_patt; + sparse_matrix *H_spar_patt_full; sparse_matrix *H_app_inv; sparse_matrix *L; sparse_matrix *U; diff --git a/sPuReMD/src/restart.c b/sPuReMD/src/restart.c index 0f7b1c5bdcbacdb134f6ab4acd944fb8f5ef109d..82474a8b96bbf39b0abf24f3e2f89f754f21e98e 100644 --- a/sPuReMD/src/restart.c +++ b/sPuReMD/src/restart.c @@ -20,7 +20,9 @@ ----------------------------------------------------------------------*/ #include "restart.h" + #include "box.h" +#include "tool_box.h" #include "vector.h" @@ -101,17 +103,25 @@ void Read_Binary_Restart( char *fname, reax_system *system, #endif /* memory allocations for atoms, atom maps, bond restrictions */ - system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); + system->atoms = (reax_atom*) scalloc( system->N, sizeof(reax_atom), + "Read_Binary_Restart::system->atoms" ); - workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); + workspace->map_serials = (int*) scalloc( MAX_ATOM_ID, sizeof(int), + "Read_Binary_Restart::workspace->map_serials" ); for ( i = 0; i < MAX_ATOM_ID; ++i ) workspace->map_serials[i] = -1; - workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted_list = (int**) calloc( system->N, sizeof(int*) ); + workspace->orig_id = (int*) scalloc( system->N, sizeof(int), + "Read_Binary_Restart::workspace->orig_id" ); + workspace->restricted = (int*) scalloc( system->N, sizeof(int), + "Read_Binary_Restart::workspace->restricted" ); + workspace->restricted_list = (int**) scalloc( system->N, sizeof(int*), + "Read_Binary_Restart::workspace->restricted_list" ); for ( i = 0; i < system->N; ++i ) - workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); + { + workspace->restricted_list[i] = (int*) scalloc( MAX_RESTRICT, sizeof(int), + "Read_Binary_Restart::workspace->restricted_list[i]" ); + } for ( i = 0; i < system->N; ++i ) { @@ -207,17 +217,27 @@ void Read_ASCII_Restart( char *fname, reax_system *system, #endif /* memory allocations for atoms, atom maps, bond restrictions */ - system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); + system->atoms = (reax_atom*) scalloc( system->N, sizeof(reax_atom), + "Read_ASCII_Restart::system->atoms" ); - workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); + workspace->map_serials = (int*) scalloc( MAX_ATOM_ID, sizeof(int), + "Read_ASCII_Restart::workspace->map_serials" ); for ( i = 0; i < MAX_ATOM_ID; ++i ) + { workspace->map_serials[i] = -1; + } - workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted_list = (int**) calloc( system->N, sizeof(int*) ); + workspace->orig_id = (int*) scalloc( system->N, sizeof(int), + "Read_ASCII_Restart::workspace->orig_id" ); + workspace->restricted = (int*) scalloc( system->N, sizeof(int), + "Read_ASCII_Restart::workspace->restricted" ); + workspace->restricted_list = (int**) scalloc( system->N, sizeof(int*), + "Read_ASCII_Restart::workspace->restricted_list" ); for ( i = 0; i < system->N; ++i ) - workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); + { + workspace->restricted_list[i] = (int*) scalloc( MAX_RESTRICT, sizeof(int), + "Read_ASCII_Restart::workspace->restricted_list[i]" ); + } for ( i = 0; i < system->N; ++i ) { diff --git a/sPuReMD/src/spuremd.c b/sPuReMD/src/spuremd.c index 2e791a306ed29c668573aa52e7bd078f9be181f4..82958abe94f9e1a7432e4752a1fee9b823b1aabc 100644 --- a/sPuReMD/src/spuremd.c +++ b/sPuReMD/src/spuremd.c @@ -154,7 +154,8 @@ void static Read_System( char * const geo_file, int Setup( char ** args, reax_system * const system, control_params * const control, simulation_data * const data ) { - lists = (reax_list*) malloc( sizeof(reax_list) * LIST_N ); + lists = (reax_list*) smalloc( sizeof(reax_list) * LIST_N, + "Setup::lists" ); Read_System( args[0], args[1], args[2], system, control, data, &workspace, &out_control ); diff --git a/sPuReMD/src/tool_box.c b/sPuReMD/src/tool_box.c index c37ccc3032d419e69035402fab82b8da2df16821..6b59d19446c9cbaaed604731d0cafdb3e163779e 100644 --- a/sPuReMD/src/tool_box.c +++ b/sPuReMD/src/tool_box.c @@ -352,34 +352,22 @@ char *Get_Atom_Name( reax_system *system, int i ) } -int Allocate_Tokenizer_Space( char **line, char **backup, char ***tokens ) +void Allocate_Tokenizer_Space( char **line, char **backup, char ***tokens ) { int i; - if ( (*line = (char*) malloc( sizeof(char) * MAX_LINE )) == NULL ) - { - return FAILURE; - } - - if ( (*backup = (char*) malloc( sizeof(char) * MAX_LINE )) == NULL ) - { - return FAILURE; - } - - if ( (*tokens = (char**) malloc( sizeof(char*) * MAX_TOKENS )) == NULL ) - { - return FAILURE; - } + *line = (char*) smalloc( sizeof(char) * MAX_LINE, + "Allocate_Tokenizer_Space::*line" ); + *backup = (char*) smalloc( sizeof(char) * MAX_LINE, + "Allocate_Tokenizer_Space::*backup" ); + *tokens = (char**) smalloc( sizeof(char*) * MAX_TOKENS, + "Allocate_Tokenizer_Space::*tokens" ); for ( i = 0; i < MAX_TOKENS; i++ ) { - if ( ((*tokens)[i] = (char*) malloc(sizeof(char) * MAX_TOKEN_LEN)) == NULL ) - { - return FAILURE; - } + (*tokens)[i] = (char*) smalloc(sizeof(char) * MAX_TOKEN_LEN, + "Allocate_Tokenizer_Space::(*tokens)[i]" ); } - - return SUCCESS; } diff --git a/sPuReMD/src/tool_box.h b/sPuReMD/src/tool_box.h index ba103f9aa71281b4ae62ba41979240b5098ecf60..d1b15f64bd2a8adb7ee3a0ee7c40d40113b81f48 100644 --- a/sPuReMD/src/tool_box.h +++ b/sPuReMD/src/tool_box.h @@ -72,7 +72,7 @@ char *Get_Element( reax_system*, int ); char *Get_Atom_Name( reax_system*, int ); -int Allocate_Tokenizer_Space( char**, char**, char*** ); +void Allocate_Tokenizer_Space( char**, char**, char*** ); void Deallocate_Tokenizer_Space( char **, char **, char *** );