From f3779f8eb860158f51073c708a6c3dded4b5f2b3 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@msu.edu> Date: Tue, 2 Feb 2016 14:19:29 -0500 Subject: [PATCH] sPuReMD: add parallel ICHOL algorithm. --- puremd_rc_1003/sPuReMD/QEq.c | 128 ++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/puremd_rc_1003/sPuReMD/QEq.c b/puremd_rc_1003/sPuReMD/QEq.c index 7e629670..db89a28b 100644 --- a/puremd_rc_1003/sPuReMD/QEq.c +++ b/puremd_rc_1003/sPuReMD/QEq.c @@ -228,6 +228,133 @@ void ICHOLT( sparse_matrix *A, real *droptol, } +/* Parallel incomplete Cholesky factorization + * + * Reference: + * Edmond Chow and Aftab Patel + * Fine-Grained Parallel Incomplete LU Factorization + * SIAM J. Sci. Comp. */ +static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, + sparse_matrix * const U, sparse_matrix * const U_t ) +{ + unsigned int i, j, k, pj, x = 0, y = 0, ei_x, ei_y; + real sum; + int *Utop; + + Utop = (int*) malloc((A->n + 1) * sizeof(int)); + for ( i = 0; i < A->n; ++i ) + { + Utop[i] = 0; + } + + /* initial guesses for U^T */ + memcpy( &(U_t->start), &(A->start), sizeof(int) * (A->n+1) ); + memcpy( &(U_t->entries), &(A->entries), sizeof(sparse_matrix_entry) * (A->m) ); + + for ( i = 0; i < sweeps; ++i ) + { +// #pragma omp parallel for + for ( j = 0; j < A->m; ++j ) + { + sum = ZERO; + + /* determine row bounds of current nonzero */ + for ( k = 0; k < A->n; ++k ) + { + if( A->start[k] > j ) + { + x = A->start[k - 1]; + ei_x = A->start[k]; + break; + } + } + y = A->start[A->entries[j].j]; + ei_y = A->start[A->entries[j].j + 1]; + + /* sparse dot product: dot( A(i,1:i-1), A(j,1:i-1) ) */ + while( A->entries[x].j < A->entries[j].j && + A->entries[y].j < A->entries[j].j && + x < ei_x && y < ei_y ) + { + if( A->entries[x].j == A->entries[y].j ) + { + sum += A->entries[x].val * A->entries[y].val; + } + else if( A->entries[x].j < A->entries[y].j ) + { + ++x; + } + else + { + ++y; + } + } + + sum = A->entries[j].val - sum; + + if( A->entries[j].j == A->start[k - 1] ) + { + U_t->entries[j].val = sum / U_t->entries[A->start[k]-1].val; + } + else + { + U_t->entries[j].val = SQRT( sum ); + } + } + } + + + //TODO: transpose U^T and copy into U + for ( i = 0; i < U_t->n; ++i ) + { + for ( pj = U_t->start[i]; pj < U_t->start[i + 1]; ++pj ) + { + U->start[U_t->entries[pj].j + 1]++; + } + } + for ( i = 1; i <= U->n; ++i ) + { + Utop[i] = U->start[i] = U->start[i] + U->start[i - 1] + 1; + } + for ( i = 0; i < U_t->n; ++i ) + { + for ( pj = U_t->start[i]; pj < U_t->start[i + 1]; ++pj ) + { + j = U_t->entries[pj].j; + U->entries[Utop[j]].j = i; + U->entries[Utop[j]].val = U_t->entries[pj].val; + Utop[j]++; + } + } + + free(Utop); +} +//% use entries of A as inital guesses for U +//U = triu(A); +//% find all non-zero elements +//[row,col] = find(U); +//for s=1:sweeps +// for b=1:numel(row); +// i = row(b); +// j = col(b); +//% stemp = 0.0; +//% for k=1:(i-1) +//% stemp = stemp + U(k,i)*U(k,j); +//% end +// OR +//% for k=1:(j-1) +//% stemp = stemp + U(i,k)*U(j,k); +//% end +//% stemp = A(i,j) - stemp; +// % vectorized computations for sparse matrices +// if (i ~= j) +// U(i,j) = (A(i,j) - dot(U(1:i-1,i),U(1:i-1,j))) / U(i,i); +// else +// U(i,i) = sqrt(A(i,j) - dot(U(1:i-1,i),U(1:i-1,j))); +// end +// end +//end + void Init_MatVec( reax_system *system, control_params *control, simulation_data *data, static_storage *workspace, list *far_nbrs ) @@ -263,7 +390,6 @@ void Init_MatVec( reax_system *system, control_params *control, #endif } - // TODO: replace with ilu_par or ichol_par // TODO: add parameters for sweeps to control file //ICHOL_PAR( workspace->H, 3, workspace->L, workspace->U ); ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U ); -- GitLab