diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c index 237faa5514ed99771327d1b662933394014c5f40..9d2742cdfdb82247414a15752cba13d78d47558c 100644 --- a/sPuReMD/src/charges.c +++ b/sPuReMD/src/charges.c @@ -862,7 +862,7 @@ static void Setup_Preconditioner_QEq( const reax_system * const system, case SAI_PC: Setup_Sparsity_Pattern( Hptr, control->cm_solver_pre_comp_sai_thres, - workspace->H_spar_patt ); + &workspace->H_spar_patt ); break; default: @@ -1014,7 +1014,7 @@ static void Setup_Preconditioner_EE( const reax_system * const system, case SAI_PC: Setup_Sparsity_Pattern( Hptr, control->cm_solver_pre_comp_sai_thres, - workspace->H_spar_patt ); + &workspace->H_spar_patt ); break; default: @@ -1168,7 +1168,7 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system, case SAI_PC: Setup_Sparsity_Pattern( Hptr, control->cm_solver_pre_comp_sai_thres, - workspace->H_spar_patt ); + &workspace->H_spar_patt ); break; default: diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index fc9c46bac8d1888658aab60b2b436e969ea311f8..d392aa3ca19ffbeaad526f52bf7e7e1b35c5bea3 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -321,6 +321,8 @@ void Init_Workspace( reax_system *system, control_params *control, workspace->H = NULL; workspace->H_sp = NULL; workspace->L = NULL; + workspace->H_spar_patt = NULL; + workspace->H_app_inv = NULL; workspace->U = NULL; workspace->Hdia_inv = NULL; if ( control->cm_solver_pre_comp_type == ICHOLT_PC || diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index b59d8f5c6f8cac072450c080369b62d0e3766b34..ccdfce52146b043a6af59cc6569b14eb968d0a2f 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -20,17 +20,18 @@ ----------------------------------------------------------------------*/ #include "lin_alg.h" - #include "allocate.h" #include "tool_box.h" #include "vector.h" +#include "print_utils.h" + /* Intel MKL */ #if defined(HAVE_LAPACKE_MKL) - #include "mkl.h" +#include "mkl.h" /* reference LAPACK */ #elif defined(HAVE_LAPACKE) - #include "lapacke.h" +#include "lapacke.h" #endif typedef struct @@ -194,7 +195,7 @@ void Sort_Matrix_Rows( sparse_matrix * const A ) * A has non-zero diagonals * Each row of A has at least one non-zero (i.e., no rows with all zeros) */ static void compute_full_sparse_matrix( const sparse_matrix * const A, - sparse_matrix ** A_full ) + sparse_matrix ** A_full ) { int count, i, pj; sparse_matrix *A_t; @@ -207,6 +208,8 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A, exit( INSUFFICIENT_MEMORY ); } } + + if ( Allocate_Matrix( &A_t, A->n, A->m ) == FAILURE ) { fprintf( stderr, "not enough memory for full A. terminating.\n" ); @@ -219,6 +222,9 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A, count = 0; for ( i = 0; i < A->n; ++i ) { + + if((*A_full)->start == NULL){ + } (*A_full)->start[i] = count; /* A: symmetric, lower triangular portion only stored */ @@ -253,7 +259,7 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A, * A has non-zero diagonals * Each row of A has at least one non-zero (i.e., no rows with all zeros) */ void Setup_Sparsity_Pattern( const sparse_matrix * const A, - const real filter, sparse_matrix * A_spar_patt ) + const real filter, sparse_matrix ** A_spar_patt ) { int i, pj, size; real min, max, threshold, val; @@ -261,18 +267,18 @@ void Setup_Sparsity_Pattern( const sparse_matrix * const A, min = 0.0; max = 0.0; - if ( A_spar_patt == NULL ) + if ( *A_spar_patt == NULL ) { - if ( Allocate_Matrix( &A_spar_patt, A->n, A->m ) == FAILURE ) + if ( Allocate_Matrix( A_spar_patt, A->n, A->m ) == FAILURE ) { fprintf( stderr, "[SAI] Not enough memory for preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); } } - else if ( (A_spar_patt->m) < (A->m) ) + else if ( ((*A_spar_patt)->m) < (A->m) ) { - Deallocate_Matrix( A_spar_patt ); - if ( Allocate_Matrix( &A_spar_patt, A->n, A->m ) == FAILURE ) + Deallocate_Matrix( *A_spar_patt ); + if ( Allocate_Matrix( A_spar_patt, A->n, A->m ) == FAILURE ) { fprintf( stderr, "[SAI] Not enough memory for preconditioning matrices. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -304,46 +310,45 @@ void Setup_Sparsity_Pattern( const sparse_matrix * const A, } } - threshold = min + ( max - min ) * filter; - + threshold = min + (max-min)*(1.0-filter); // calculate the nnz of the sparsity pattern -// for ( size = 0, i = 0; i < A->n; ++i ) -// { -// for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) -// { -// if ( threshold <= A->val[pj] ) -// size++; -// } -// } -// -// if ( Allocate_Matrix( &A_spar_patt, A->n, size ) == NULL ) -// { -// fprintf( stderr, "[SAI] Not enough memory for preconditioning matrices. terminating.\n" ); -// exit( INSUFFICIENT_MEMORY ); -// } - - //A_spar_patt->start[A_spar_patt->n] = size; + // for ( size = 0, i = 0; i < A->n; ++i ) + // { + // for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) + // { + // if ( threshold <= A->val[pj] ) + // size++; + // } + // } + // + // if ( Allocate_Matrix( A_spar_patt, A->n, size ) == NULL ) + // { + // fprintf( stderr, "[SAI] Not enough memory for preconditioning matrices. terminating.\n" ); + // exit( INSUFFICIENT_MEMORY ); + // } + + //(*A_spar_patt)->start[(*A_spar_patt)->n] = size; // fill the sparsity pattern for ( size = 0, i = 0; i < A->n; ++i ) { - A_spar_patt->start[i] = size; + (*A_spar_patt)->start[i] = size; for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) { - if ( threshold <= A->val[pj] ) + if ( ( A->val[pj] >= threshold ) || ( A->j[pj]==i ) ) { - A_spar_patt->val[size] = A->val[pj]; - A_spar_patt->j[size] = A->j[pj]; + (*A_spar_patt)->val[size] = A->val[pj]; + (*A_spar_patt)->j[size] = A->j[pj]; size++; } } } - A_spar_patt->start[A->n] = size; + (*A_spar_patt)->start[A->n] = size; } void Calculate_Droptol( const sparse_matrix * const A, - real * const droptol, const real dtol ) + real * const droptol, const real dtol ) { int i, j, k; real val; @@ -353,13 +358,13 @@ void Calculate_Droptol( const sparse_matrix * const A, #endif #ifdef _OPENMP - #pragma omp parallel default(none) private(i, j, k, val, tid), shared(droptol_local, stderr) +#pragma omp parallel default(none) private(i, j, k, val, tid), shared(droptol_local, stderr) #endif { #ifdef _OPENMP tid = omp_get_thread_num(); - #pragma omp master +#pragma omp master { /* keep b_local for program duration to avoid allocate/free * overhead per Sparse_MatVec call*/ @@ -373,7 +378,7 @@ void Calculate_Droptol( const sparse_matrix * const A, } } - #pragma omp barrier +#pragma omp barrier #endif /* init droptol to 0 */ @@ -387,12 +392,12 @@ void Calculate_Droptol( const sparse_matrix * const A, } #ifdef _OPENMP - #pragma omp barrier +#pragma omp barrier #endif /* calculate sqaure of the norm of each row */ #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < A->n; ++i ) { @@ -420,9 +425,9 @@ void Calculate_Droptol( const sparse_matrix * const A, } #ifdef _OPENMP - #pragma omp barrier +#pragma omp barrier - #pragma omp for schedule(static) +#pragma omp for schedule(static) for ( i = 0; i < A->n; ++i ) { droptol[i] = 0.0; @@ -432,13 +437,13 @@ void Calculate_Droptol( const sparse_matrix * const A, } } - #pragma omp barrier +#pragma omp barrier #endif /* calculate local droptol for each row */ //fprintf( stderr, "droptol: " ); #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < A->n; ++i ) { @@ -460,7 +465,7 @@ int Estimate_LU_Fill( const sparse_matrix * const A, const real * const droptol fillin = 0; #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) private(i, pj, val) reduction(+: fillin) #endif for ( i = 0; i < A->n; ++i ) @@ -482,7 +487,7 @@ int Estimate_LU_Fill( const sparse_matrix * const A, const real * const droptol #if defined(HAVE_SUPERLU_MT) real SuperLU_Factorize( const sparse_matrix * const A, - sparse_matrix * const L, sparse_matrix * const U ) + sparse_matrix * const L, sparse_matrix * const U ) { unsigned int i, pj, count, *Ltop, *Utop, r; sparse_matrix *A_t; @@ -511,10 +516,10 @@ real SuperLU_Factorize( const sparse_matrix * const A, /* Default parameters to control factorization. */ #ifdef _OPENMP //TODO: set as global parameter and use - #pragma omp parallel \ +#pragma omp parallel \ default(none) shared(nprocs) { - #pragma omp master +#pragma omp master { /* SuperLU_MT spawns threads internally, so set and pass parameter */ nprocs = omp_get_num_threads(); @@ -601,7 +606,7 @@ real SuperLU_Factorize( const sparse_matrix * const A, xa[i] = count; dCompRow_to_CompCol( A->n, A->n, 2 * A->start[A->n] - A->n, a, asub, xa, - &at, &atsub, &xat ); + &at, &atsub, &xat ); for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i ) fprintf( stderr, "%6d", asub[i] ); @@ -656,8 +661,8 @@ real SuperLU_Factorize( const sparse_matrix * const A, Apply perm_c to the columns of original A to form AC. ------------------------------------------------------------*/ pdgstrf_init( nprocs, fact, trans, refact, panel_size, relax, - u, usepr, drop_tol, perm_c, perm_r, - work, lwork, &A_S, &AC_S, &superlumt_options, &Gstat ); + u, usepr, drop_tol, perm_c, perm_r, + work, lwork, &A_S, &AC_S, &superlumt_options, &Gstat ); for ( i = 0; i < ((NCPformat*)AC_S.Store)->nnz; ++i ) fprintf( stderr, "%6.1f", ((real*)(((NCPformat*)AC_S.Store)->nzval))[i] ); @@ -787,7 +792,7 @@ real diag_pre_comp( const sparse_matrix * const H, real * const Hdia_inv ) start = Get_Time( ); #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) private(i) #endif for ( i = 0; i < H->n; ++i ) @@ -808,7 +813,7 @@ real diag_pre_comp( const sparse_matrix * const H, real * const Hdia_inv ) /* Incomplete Cholesky factorization with dual thresholding */ real ICHOLT( const sparse_matrix * const A, const real * const droptol, - sparse_matrix * const L, sparse_matrix * const U ) + sparse_matrix * const L, sparse_matrix * const U ) { int *tmp_j; real *tmp_val; @@ -961,7 +966,7 @@ real ICHOLT( const sparse_matrix * const A, const real * const droptol, * SIAM J. Sci. Comp. */ #if defined(TESTING) real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, - sparse_matrix * const U_t, sparse_matrix * const U ) + sparse_matrix * const U_t, sparse_matrix * const U ) { unsigned int i, j, k, pj, x = 0, y = 0, ei_x, ei_y; real *D, *D_inv, sum, start; @@ -980,7 +985,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, } #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(D_inv, D) private(i) #endif for ( i = 0; i < A->n; ++i ) @@ -996,7 +1001,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, * transformation DAD, where D = D(1./SQRT(D(A))) */ memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); #ifdef _OPENMP - #pragma omp parallel for schedule(guided) \ +#pragma omp parallel for schedule(guided) \ default(none) shared(DAD, D_inv, D) private(i, pj) #endif for ( i = 0; i < A->n; ++i ) @@ -1022,7 +1027,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, { /* for each nonzero */ #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(DAD, stderr) private(sum, ei_x, ei_y, k) firstprivate(x, y) #endif for ( j = 0; j < A->start[A->n]; ++j ) @@ -1077,7 +1082,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, fprintf( stderr, "Numeric breakdown in ICHOL_PAR. Terminating.\n"); #if defined(DEBUG_FOCUS) fprintf( stderr, "A(%5d,%5d) = %10.3f\n", - k - 1, A->entries[j].j, A->entries[j].val ); + k - 1, A->entries[j].j, A->entries[j].val ); fprintf( stderr, "sum = %10.3f\n", sum); #endif exit(NUMERIC_BREAKDOWN); @@ -1097,7 +1102,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, * since DAD \approx U^{T}U, so * D^{-1}DADD^{-1} = A \approx D^{-1}U^{T}UD^{-1} */ #ifdef _OPENMP - #pragma omp parallel for schedule(guided) \ +#pragma omp parallel for schedule(guided) \ default(none) shared(D_inv) private(i, pj) #endif for ( i = 0; i < A->n; ++i ) @@ -1140,7 +1145,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, * sweeps: number of loops over non-zeros for computation * L / U: factorized triangular matrices (A \approx LU), CSR format */ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, - sparse_matrix * const L, sparse_matrix * const U ) + sparse_matrix * const L, sparse_matrix * const U ) { unsigned int i, j, k, pj, x, y, ei_x, ei_y; real *D, *D_inv, sum, start; @@ -1157,7 +1162,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, } #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(D, D_inv) private(i) #endif for ( i = 0; i < A->n; ++i ) @@ -1171,7 +1176,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, * transformation DAD, where D = D(1./SQRT(abs(D(A)))) */ memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(DAD, D) private(i, pj) #endif for ( i = 0; i < A->n; ++i ) @@ -1199,7 +1204,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, /* L has unit diagonal, by convention */ #ifdef _OPENMP - #pragma omp parallel for schedule(static) default(none) private(i) +#pragma omp parallel for schedule(static) default(none) private(i) #endif for ( i = 0; i < A->n; ++i ) { @@ -1210,7 +1215,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, { /* for each nonzero in L */ #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) #endif for ( j = 0; j < DAD->start[DAD->n]; ++j ) @@ -1262,7 +1267,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, } #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) #endif for ( j = 0; j < DAD->start[DAD->n]; ++j ) @@ -1315,7 +1320,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, * since DAD \approx LU, then * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */ #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(DAD, D_inv) private(i, pj) #endif for ( i = 0; i < DAD->n; ++i ) @@ -1355,7 +1360,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, * sweeps: number of loops over non-zeros for computation * L / U: factorized triangular matrices (A \approx LU), CSR format */ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, - const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U ) + const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U ) { unsigned int i, j, k, pj, x, y, ei_x, ei_y, Ltop, Utop; real *D, *D_inv, sum, start; @@ -1379,7 +1384,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, } #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(D, D_inv) private(i) #endif for ( i = 0; i < A->n; ++i ) @@ -1392,7 +1397,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, * transformation DAD, where D = D(1./SQRT(D(A))) */ memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(DAD, D) private(i, pj) #endif for ( i = 0; i < A->n; ++i ) @@ -1420,7 +1425,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, /* L has unit diagonal, by convention */ #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) private(i) shared(L_temp) #endif for ( i = 0; i < A->n; ++i ) @@ -1432,7 +1437,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, { /* for each nonzero in L */ #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) #endif for ( j = 0; j < DAD->start[DAD->n]; ++j ) @@ -1484,7 +1489,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, } #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) #endif for ( j = 0; j < DAD->start[DAD->n]; ++j ) @@ -1537,7 +1542,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, * since DAD \approx LU, then * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */ #ifdef _OPENMP - #pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) \ default(none) shared(DAD, L_temp, U_temp, D_inv) private(i, pj) #endif for ( i = 0; i < DAD->n; ++i ) @@ -1611,16 +1616,17 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol, #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) real Sparse_Approx_Inverse( const sparse_matrix * const A, - const sparse_matrix * const A_spar_patt, - sparse_matrix ** A_app_inv ) + const sparse_matrix * const A_spar_patt, + sparse_matrix ** A_app_inv ) { + //trial int i, k, pj, j_temp, identity_pos; int N, M, d_i, d_j; lapack_int m, n, nrhs, lda, ldb, info; int *pos_x, *pos_y; real start; real *e_j, *dense_matrix; - sparse_matrix *A_full, *A_spar_patt_full; + sparse_matrix *A_full = NULL, *A_spar_patt_full = NULL; char *X, *Y; start = Get_Time( ); @@ -1642,24 +1648,28 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, compute_full_sparse_matrix( A, &A_full ); compute_full_sparse_matrix( A_spar_patt, &A_spar_patt_full ); + // char file[100] = "H_spar_patt_full"; + // Print_Sparse_Matrix2(A_spar_patt_full, file, NULL); // A_app_inv will be the same as A_spar_patt_full except the val array if ( Allocate_Matrix( A_app_inv, A_spar_patt_full->n, A_spar_patt_full->m ) == FAILURE ) { fprintf( stderr, "not enough memory for approximate inverse matrix. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); } + + (*A_app_inv)->start[(*A_app_inv)->n] = A_spar_patt_full->start[A_spar_patt_full->n]; // For each row of full A_spar_patt for ( i = 0; i < A_spar_patt_full->n; ++i ) { - // N = A_spar_patt_full->start[i + 1] - A_spar_patt_full->start[i]; N = 0; M = 0; // find column indices of nonzeros (which will be the columns indices of the dense matrix) for ( pj = A_spar_patt_full->start[i]; pj < A_spar_patt_full->start[i + 1]; ++pj ) { + j_temp = A_spar_patt_full->j[pj]; Y[j_temp] = 1; @@ -1691,7 +1701,7 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, } // allocate memory for NxM dense matrix - smalloc( sizeof(real) * N * M, + dense_matrix = (real *) smalloc( sizeof(real) * N * M, "Sparse_Approx_Inverse::dense_matrix" ); // fill in the entries of dense matrix @@ -1702,22 +1712,22 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, { dense_matrix[d_i * N + d_j] = 0.0; } - // change the value if any of the column indices is seen for ( d_j = A_full->start[pos_x[d_i]]; d_j < A_full->start[pos_x[d_i + 1]]; ++d_j ) { if ( Y[A_full->j[d_j]] == 1 ) { - dense_matrix[d_i * N + pos_y[d_j]] = A_full->val[d_j]; + dense_matrix[d_i * N + pos_y[A_full->j[d_j]]] = A_full->val[d_j]; } } + } /* create the right hand side of the linear equation that is the full column of the identity matrix*/ - smalloc( sizeof(char) * M, - "Sparse_Approx_Inverse::M" ); + e_j = (real *) smalloc( sizeof(real) * M, + "Sparse_Approx_Inverse::e_j" ); for ( k = 0; k < M; ++k ) { @@ -1734,6 +1744,7 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, /* Executable statements */ // printf( "LAPACKE_dgels (row-major, high-level) Example Program Results\n" ); /* Solve the equations A*X = B */ + info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, nrhs, dense_matrix, lda, e_j, ldb ); /* Check for the full rank */ @@ -1756,24 +1767,24 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, } //empty variables that will be used next iteration - srealloc( dense_matrix, 0, "Sparse_Approx_Inverse::dense_matrix" ); - srealloc( e_j, 0, "Sparse_Approx_Inverse::e_j" ); - for ( i = 0; i < A->n; ++i ) + sfree( dense_matrix, "Sparse_Approx_Inverse::dense_matrix" ); + sfree( e_j, "Sparse_Approx_Inverse::e_j" ); + for ( k = 0; k < A->n; ++k ) { - X[i] = 0; - Y[i] = 0; - pos_x[i] = 0; - pos_y[i] = 0; + X[k] = 0; + Y[k] = 0; + pos_x[k] = 0; + pos_y[k] = 0; } } - // Deallocate? + // Deallocate Deallocate_Matrix( A_full ); Deallocate_Matrix( A_spar_patt_full ); - srealloc( X, 0, "Sparse_Approx_Inverse::X" ); - srealloc( Y, 0, "Sparse_Approx_Inverse::Y" ); - srealloc( pos_x, 0, "Sparse_Approx_Inverse::pos_x" ); - srealloc( pos_y, 0, "Sparse_Approx_Inverse::pos_y" ); + sfree( X, "Sparse_Approx_Inverse::X" ); + sfree( Y, "Sparse_Approx_Inverse::Y" ); + sfree( pos_x, "Sparse_Approx_Inverse::pos_x" ); + sfree( pos_y, "Sparse_Approx_Inverse::pos_y" ); return Get_Timing_Info( start ); } @@ -1786,7 +1797,7 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A, * x: vector * b: vector (result) */ static void Sparse_MatVec( const sparse_matrix * const A, - const real * const x, real * const b ) + const real * const x, real * const b ) { int i, j, k, n, si, ei; real H; @@ -1800,7 +1811,7 @@ static void Sparse_MatVec( const sparse_matrix * const A, #ifdef _OPENMP tid = omp_get_thread_num( ); - #pragma omp single +#pragma omp single { /* keep b_local for program duration to avoid allocate/free * overhead per Sparse_MatVec call*/ @@ -1815,7 +1826,7 @@ static void Sparse_MatVec( const sparse_matrix * const A, Vector_MakeZero( (real * const)b_local, omp_get_num_threads() * n ); - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < n; ++i ) { @@ -1844,7 +1855,7 @@ static void Sparse_MatVec( const sparse_matrix * const A, } #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) for ( i = 0; i < n; ++i ) { for ( j = 0; j < omp_get_num_threads(); ++j ) @@ -1936,12 +1947,12 @@ void Transpose_I( sparse_matrix * const A ) * N: dimensions of preconditioner and vectors (# rows in H) */ static void diag_pre_app( const real * const Hdia_inv, const real * const y, - real * const x, const int N ) + real * const x, const int N ) { unsigned int i; #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < N; ++i ) { @@ -1962,13 +1973,13 @@ static void diag_pre_app( const real * const Hdia_inv, const real * const y, * LU has non-zero diagonals * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ void tri_solve( const sparse_matrix * const LU, const real * const y, - real * const x, const int N, const TRIANGULARITY tri ) + real * const x, const int N, const TRIANGULARITY tri ) { int i, pj, j, si, ei; real val; #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { if ( tri == LOWER ) @@ -2020,13 +2031,13 @@ void tri_solve( const sparse_matrix * const LU, const real * const y, * LU has non-zero diagonals * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ void tri_solve_level_sched( const sparse_matrix * const LU, - const real * const y, real * const x, const int N, - const TRIANGULARITY tri, int find_levels ) + const real * const y, real * const x, const int N, + const TRIANGULARITY tri, int find_levels ) { int i, j, pj, local_row, local_level; #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { if ( tri == LOWER ) @@ -2133,7 +2144,7 @@ void tri_solve_level_sched( const sparse_matrix * const LU, for ( i = 0; i < levels; ++i ) { #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( j = level_rows_cnt[i]; j < level_rows_cnt[i + 1]; ++j ) { @@ -2153,7 +2164,7 @@ void tri_solve_level_sched( const sparse_matrix * const LU, for ( i = 0; i < levels; ++i ) { #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( j = level_rows_cnt[i]; j < level_rows_cnt[i + 1]; ++j ) { @@ -2170,7 +2181,7 @@ void tri_solve_level_sched( const sparse_matrix * const LU, } #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { /* save level info for re-use if performing repeated triangular solves via preconditioning */ @@ -2250,7 +2261,7 @@ static void compute_H_full( const sparse_matrix * const H ) void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) { #ifdef _OPENMP - #pragma omp parallel +#pragma omp parallel #endif { #define MAX_COLOR (500) @@ -2267,7 +2278,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) #endif #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { memset( color, 0, sizeof(unsigned int) * A->n ); @@ -2279,7 +2290,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) if ( tri == LOWER ) { #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < A->n; ++i ) { @@ -2289,7 +2300,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) else { #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < A->n; ++i ) { @@ -2305,7 +2316,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) } #ifdef _OPENMP - #pragma omp barrier +#pragma omp barrier #endif while ( recolor_cnt > 0 ) @@ -2314,7 +2325,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) /* color vertices */ #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < recolor_cnt; ++i ) { @@ -2342,14 +2353,14 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) recolor_cnt_local = 0; #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { recolor_cnt = 0; } #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < temp; ++i ) { @@ -2371,9 +2382,9 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) conflict_cnt[tid + 1] = recolor_cnt_local; #ifdef _OPENMP - #pragma omp barrier +#pragma omp barrier - #pragma omp master +#pragma omp master #endif { conflict_cnt[0] = 0; @@ -2385,7 +2396,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) } #ifdef _OPENMP - #pragma omp barrier +#pragma omp barrier #endif /* copy thread-local conflicts into shared buffer */ @@ -2396,9 +2407,9 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) } #ifdef _OPENMP - #pragma omp barrier +#pragma omp barrier - #pragma omp single +#pragma omp single #endif { temp_ptr = to_color; @@ -2421,7 +2432,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) //#endif #ifdef _OPENMP - #pragma omp barrier +#pragma omp barrier #endif } } @@ -2475,12 +2486,12 @@ void sort_colors( const unsigned int n, const TRIANGULARITY tri ) * tri: coloring to triangular factor to use (lower/upper) */ static void permute_vector( real * const x, const unsigned int n, const int invert_map, - const TRIANGULARITY tri ) + const TRIANGULARITY tri ) { unsigned int i; #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { if ( x_p == NULL ) @@ -2503,7 +2514,7 @@ static void permute_vector( real * const x, const unsigned int n, const int inve } #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < n; ++i ) { @@ -2511,7 +2522,7 @@ static void permute_vector( real * const x, const unsigned int n, const int inve } #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { memcpy( x, x_p, sizeof(real) * n ); @@ -2667,7 +2678,7 @@ sparse_matrix * setup_graph_coloring( sparse_matrix * const H ) if ( color == NULL ) { #ifdef _OPENMP - #pragma omp parallel +#pragma omp parallel { num_thread = omp_get_num_threads(); } @@ -2719,15 +2730,15 @@ sparse_matrix * setup_graph_coloring( sparse_matrix * const H ) * Note: Newmann series arises from series expansion of the inverse of * the coefficient matrix in the triangular system */ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, - const real * const b, real * const x, const TRIANGULARITY tri, const - unsigned int maxiter ) + const real * const b, real * const x, const TRIANGULARITY tri, const + unsigned int maxiter ) { unsigned int i, k, si = 0, ei = 0, iter; iter = 0; #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { if ( Dinv_b == NULL ) @@ -2760,7 +2771,7 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, /* precompute and cache, as invariant in loop below */ #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif for ( i = 0; i < R->n; ++i ) { @@ -2771,7 +2782,7 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, { // x_{k+1} = G*x_{k} + Dinv*b; #ifdef _OPENMP - #pragma omp for schedule(guided) +#pragma omp for schedule(guided) #endif for ( i = 0; i < R->n; ++i ) { @@ -2799,7 +2810,7 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, } #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { rp3 = rp; @@ -2827,7 +2838,7 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, * Matrices have non-zero diagonals * Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */ static void apply_preconditioner( const static_storage * const workspace, const control_params * const control, - const real * const y, real * const x, const int fresh_pre ) + const real * const y, real * const x, const int fresh_pre ) { int i, si; @@ -2840,157 +2851,157 @@ static void apply_preconditioner( const static_storage * const workspace, const { switch ( control->cm_solver_pre_app_type ) { - case TRI_SOLVE_PA: - switch ( control->cm_solver_pre_comp_type ) - { - case DIAG_PC: - diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); - break; - case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: - tri_solve( workspace->L, y, x, workspace->L->n, LOWER ); - tri_solve( workspace->U, x, x, workspace->U->n, UPPER ); - break; - case SAI_PC: - //TODO: add code to compute SAI first - // Sparse_MatVec( SAI, y, x ); - break; - default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); - exit( INVALID_INPUT ); - break; - } - break; - case TRI_SOLVE_LEVEL_SCHED_PA: - switch ( control->cm_solver_pre_comp_type ) - { - case DIAG_PC: - diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); - break; - case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: - tri_solve_level_sched( workspace->L, y, x, workspace->L->n, LOWER, fresh_pre ); - tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre ); - break; - case SAI_PC: - //TODO: add code to compute SAI first - // Sparse_MatVec( SAI, y, x ); - default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); - exit( INVALID_INPUT ); + case TRI_SOLVE_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve( workspace->L, y, x, workspace->L->n, LOWER ); + tri_solve( workspace->U, x, x, workspace->U->n, UPPER ); + break; + case SAI_PC: + //TODO: add code to compute SAI first + // Sparse_MatVec( SAI, y, x ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } break; - } - break; - case TRI_SOLVE_GC_PA: - switch ( control->cm_solver_pre_comp_type ) - { - case DIAG_PC: - case SAI_PC: - fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); - exit( INVALID_INPUT ); + case TRI_SOLVE_LEVEL_SCHED_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve_level_sched( workspace->L, y, x, workspace->L->n, LOWER, fresh_pre ); + tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre ); + break; + case SAI_PC: + //TODO: add code to compute SAI first + // Sparse_MatVec( SAI, y, x ); + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } break; - case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case TRI_SOLVE_GC_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + case SAI_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif - { - memcpy( y_p, y, sizeof(real) * workspace->H->n ); - } + { + memcpy( y_p, y, sizeof(real) * workspace->H->n ); + } - permute_vector( y_p, workspace->H->n, FALSE, LOWER ); - tri_solve_level_sched( workspace->L, y_p, x, workspace->L->n, LOWER, fresh_pre ); - tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre ); - permute_vector( x, workspace->H->n, TRUE, UPPER ); - break; - default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); - exit( INVALID_INPUT ); - break; - } - break; - case JACOBI_ITER_PA: - switch ( control->cm_solver_pre_comp_type ) - { - case DIAG_PC: - case SAI_PC: - fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); - exit( INVALID_INPUT ); + permute_vector( y_p, workspace->H->n, FALSE, LOWER ); + tri_solve_level_sched( workspace->L, y_p, x, workspace->L->n, LOWER, fresh_pre ); + tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre ); + permute_vector( x, workspace->H->n, TRUE, UPPER ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } break; - case ICHOLT_PC: - case ILU_PAR_PC: - case ILUT_PAR_PC: + case JACOBI_ITER_PA: + switch ( control->cm_solver_pre_comp_type ) + { + case DIAG_PC: + case SAI_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif - { - if ( Dinv_L == NULL ) - { - if ( (Dinv_L = (real*) malloc(sizeof(real) * workspace->L->n)) == NULL ) - { - fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } - } - } + { + if ( Dinv_L == NULL ) + { + if ( (Dinv_L = (real*) malloc(sizeof(real) * workspace->L->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + } - /* construct D^{-1}_L */ - if ( fresh_pre == TRUE ) - { + /* construct D^{-1}_L */ + if ( fresh_pre == TRUE ) + { #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif - for ( i = 0; i < workspace->L->n; ++i ) - { - si = workspace->L->start[i + 1] - 1; - Dinv_L[i] = 1. / workspace->L->val[si]; - } - } + for ( i = 0; i < workspace->L->n; ++i ) + { + si = workspace->L->start[i + 1] - 1; + Dinv_L[i] = 1. / workspace->L->val[si]; + } + } - jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters ); + jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters ); #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif - { - if ( Dinv_U == NULL ) - { - if ( (Dinv_U = (real*) malloc(sizeof(real) * workspace->U->n)) == NULL ) - { - fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit( INSUFFICIENT_MEMORY ); - } - } - } + { + if ( Dinv_U == NULL ) + { + if ( (Dinv_U = (real*) malloc(sizeof(real) * workspace->U->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + } - /* construct D^{-1}_U */ - if ( fresh_pre == TRUE ) - { + /* construct D^{-1}_U */ + if ( fresh_pre == TRUE ) + { #ifdef _OPENMP - #pragma omp for schedule(static) +#pragma omp for schedule(static) #endif - for ( i = 0; i < workspace->U->n; ++i ) - { - si = workspace->U->start[i]; - Dinv_U[i] = 1. / workspace->U->val[si]; - } - } + for ( i = 0; i < workspace->U->n; ++i ) + { + si = workspace->U->start[i]; + Dinv_U[i] = 1. / workspace->U->val[si]; + } + } - jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->cm_solver_pre_app_jacobi_iters ); - break; + jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->cm_solver_pre_app_jacobi_iters ); + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } + break; default: fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); exit( INVALID_INPUT ); break; - } - break; - default: - fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); - exit( INVALID_INPUT ); - break; } } @@ -2999,8 +3010,8 @@ static void apply_preconditioner( const static_storage * const workspace, const /* generalized minimual residual iterative solver for sparse linear systems */ int GMRES( const static_storage * const workspace, const control_params * const control, - simulation_data * const data, const sparse_matrix * const H, const real * const b, - const real tol, real * const x, const int fresh_pre ) + simulation_data * const data, const sparse_matrix * const H, const real * const b, + const real tol, real * const x, const int fresh_pre ) { int i, j, k, itr, N, g_j, g_itr; real cc, tmp1, tmp2, temp, ret_temp, bnorm, time_start; @@ -3008,7 +3019,7 @@ int GMRES( const static_storage * const workspace, const control_params * const N = H->n; #ifdef _OPENMP - #pragma omp parallel default(none) private(i, j, k, itr, bnorm, ret_temp) \ +#pragma omp parallel default(none) private(i, j, k, itr, bnorm, ret_temp) \ shared(N, cc, tmp1, tmp2, temp, time_start, g_itr, g_j, stderr) #endif { @@ -3016,14 +3027,14 @@ int GMRES( const static_storage * const workspace, const control_params * const itr = 0; #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); } bnorm = Norm( b, N ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); @@ -3033,14 +3044,14 @@ int GMRES( const static_storage * const workspace, const control_params * const { /* apply preconditioner to residual */ #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); } apply_preconditioner( workspace, control, b, workspace->b_prc, fresh_pre ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); @@ -3052,14 +3063,14 @@ int GMRES( const static_storage * const workspace, const control_params * const { /* calculate r0 */ #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); } Sparse_MatVec( H, x, workspace->b_prm ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_spmv += Get_Timing_Info( time_start ); @@ -3068,14 +3079,14 @@ int GMRES( const static_storage * const workspace, const control_params * const if ( control->cm_solver_pre_comp_type == DIAG_PC ) { #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); } apply_preconditioner( workspace, control, workspace->b_prm, workspace->b_prm, FALSE ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); @@ -3085,14 +3096,14 @@ int GMRES( const static_storage * const workspace, const control_params * const if ( control->cm_solver_pre_comp_type == DIAG_PC ) { #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); } Vector_Sum( workspace->v[0], 1., workspace->b_prc, -1., workspace->b_prm, N ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); @@ -3101,14 +3112,14 @@ int GMRES( const static_storage * const workspace, const control_params * const else { #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); } Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); @@ -3118,15 +3129,15 @@ int GMRES( const static_storage * const workspace, const control_params * const if ( control->cm_solver_pre_comp_type != DIAG_PC ) { #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); } apply_preconditioner( workspace, control, workspace->v[0], workspace->v[0], - itr == 0 ? fresh_pre : FALSE ); + itr == 0 ? fresh_pre : FALSE ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); @@ -3134,14 +3145,14 @@ int GMRES( const static_storage * const workspace, const control_params * const } #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); } ret_temp = Norm( workspace->v[0], N ); #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { workspace->g[0] = ret_temp; @@ -3149,7 +3160,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); @@ -3160,7 +3171,7 @@ int GMRES( const static_storage * const workspace, const control_params * const { /* matvec */ #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); @@ -3168,14 +3179,14 @@ int GMRES( const static_storage * const workspace, const control_params * const Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_spmv += Get_Timing_Info( time_start ); } #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); @@ -3183,7 +3194,7 @@ int GMRES( const static_storage * const workspace, const control_params * const apply_preconditioner( workspace, control, workspace->v[j + 1], workspace->v[j + 1], FALSE ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_pre_app += Get_Timing_Info( time_start ); @@ -3193,7 +3204,7 @@ int GMRES( const static_storage * const workspace, const control_params * const // { /* apply modified Gram-Schmidt to orthogonalize the new residual */ #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); @@ -3203,7 +3214,7 @@ int GMRES( const static_storage * const workspace, const control_params * const ret_temp = Dot( workspace->v[i], workspace->v[j + 1], N ); #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { workspace->h[i][j] = ret_temp; @@ -3213,7 +3224,7 @@ int GMRES( const static_storage * const workspace, const control_params * const } #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); @@ -3260,31 +3271,31 @@ int GMRES( const static_storage * const workspace, const control_params * const // } #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); } ret_temp = Norm( workspace->v[j + 1], N ); #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif { workspace->h[j + 1][j] = ret_temp; } Vector_Scale( workspace->v[j + 1], - 1.0 / workspace->h[j + 1][j], workspace->v[j + 1], N ); + 1.0 / workspace->h[j + 1][j], workspace->v[j + 1], N ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); } #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); @@ -3302,9 +3313,9 @@ int GMRES( const static_storage * const workspace, const control_params * const } tmp1 = workspace->hc[i] * workspace->h[i][j] + - workspace->hs[i] * workspace->h[i + 1][j]; + workspace->hs[i] * workspace->h[i + 1][j]; tmp2 = -workspace->hs[i] * workspace->h[i][j] + - workspace->hc[i] * workspace->h[i + 1][j]; + workspace->hc[i] * workspace->h[i + 1][j]; workspace->h[i][j] = tmp1; workspace->h[i + 1][j] = tmp2; @@ -3343,13 +3354,13 @@ int GMRES( const static_storage * const workspace, const control_params * const } #ifdef _OPENMP - #pragma omp barrier +#pragma omp barrier #endif } /* solve Hy = g: H is now upper-triangular, do back-substitution */ #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { time_start = Get_Time( ); @@ -3381,7 +3392,7 @@ int GMRES( const static_storage * const workspace, const control_params * const Vector_Add( x, 1., workspace->p, N ); #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start ); @@ -3395,7 +3406,7 @@ int GMRES( const static_storage * const workspace, const control_params * const } #ifdef _OPENMP - #pragma omp master +#pragma omp master #endif { g_itr = itr; @@ -3414,9 +3425,9 @@ int GMRES( const static_storage * const workspace, const control_params * const int GMRES_HouseHolder( const static_storage * const workspace, - const control_params * const control, simulation_data * const data, - const sparse_matrix * const H, const real * const b, real tol, - real * const x, const int fresh_pre ) + const control_params * const control, simulation_data * const data, + const sparse_matrix * const H, const real * const b, real tol, + real * const x, const int fresh_pre ) { int i, j, k, itr, N; real cc, tmp1, tmp2, temp, bnorm; @@ -3623,7 +3634,7 @@ int CG( const static_storage * const workspace, const control_params * const con z = workspace->p; #ifdef _OPENMP - #pragma omp parallel default(none) private(i, tmp, alpha, beta, b_norm, r_norm, sig_old, sig_new) \ +#pragma omp parallel default(none) private(i, tmp, alpha, beta, b_norm, r_norm, sig_old, sig_new) \ shared(itr, N, d, r, p, z) #endif { @@ -3659,7 +3670,7 @@ int CG( const static_storage * const workspace, const control_params * const con } #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif itr = i; } @@ -3676,8 +3687,8 @@ int CG( const static_storage * const workspace, const control_params * const con /* Steepest Descent */ int SDM( const static_storage * const workspace, const control_params * const control, - const sparse_matrix * const H, const real * const b, const real tol, - real * const x, const int fresh_pre ) + const sparse_matrix * const H, const real * const b, const real tol, + real * const x, const int fresh_pre ) { int i, itr, N; real tmp, alpha, b_norm; @@ -3686,7 +3697,7 @@ int SDM( const static_storage * const workspace, const control_params * const co N = H->n; #ifdef _OPENMP - #pragma omp parallel default(none) private(i, tmp, alpha, b_norm, sig) \ +#pragma omp parallel default(none) private(i, tmp, alpha, b_norm, sig) \ shared(itr, N) #endif { @@ -3710,7 +3721,7 @@ int SDM( const static_storage * const workspace, const control_params * const co * (Dot function has persistent state in the form * of a shared global variable for the OpenMP version) */ #ifdef _OPENMP - #pragma omp barrier +#pragma omp barrier #endif tmp = Dot( workspace->d, workspace->q, N ); @@ -3723,7 +3734,7 @@ int SDM( const static_storage * const workspace, const control_params * const co } #ifdef _OPENMP - #pragma omp single +#pragma omp single #endif itr = i; } diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h index 683b0c44367edec4eb81a91fa0503bc7ba2a2cae..6caf7a1a052decd8032eba19e56e8010b0939b42 100644 --- a/sPuReMD/src/lin_alg.h +++ b/sPuReMD/src/lin_alg.h @@ -33,7 +33,7 @@ typedef enum void Sort_Matrix_Rows( sparse_matrix * const ); void Setup_Sparsity_Pattern( const sparse_matrix * const, - const real, sparse_matrix * ); + const real, sparse_matrix ** ); int Estimate_LU_Fill( const sparse_matrix * const, const real * const ); diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index a66758b1fb5d00dad29edf656ba536dc7d7fdee8..51f24d401b1337a9d8059df252d51864114c7106 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -607,7 +607,7 @@ typedef struct unsigned int cm_solver_pre_comp_refactor; real cm_solver_pre_comp_droptol; unsigned int cm_solver_pre_comp_sweeps; - unsigned int cm_solver_pre_comp_sai_thres; + real cm_solver_pre_comp_sai_thres; unsigned int cm_solver_pre_app_type; unsigned int cm_solver_pre_app_jacobi_iters;