From 4c09c5f15211d18887859c78595236eb1d6d5b84 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@cse.msu.edu> Date: Tue, 12 Apr 2016 23:11:37 -0400 Subject: [PATCH] sPuReMD: begin adding OpenMP support to SpMV, Jacobi iter, and ICHOL_PAR. --- puremd_rc_1003/sPuReMD/GMRES.c | 109 ++++++++++++++++++++++++++------ puremd_rc_1003/sPuReMD/Makefile | 2 +- puremd_rc_1003/sPuReMD/QEq.c | 11 ++-- 3 files changed, 96 insertions(+), 26 deletions(-) mode change 100755 => 100644 puremd_rc_1003/sPuReMD/Makefile diff --git a/puremd_rc_1003/sPuReMD/GMRES.c b/puremd_rc_1003/sPuReMD/GMRES.c index bbc9fc0e..0bf02708 100644 --- a/puremd_rc_1003/sPuReMD/GMRES.c +++ b/puremd_rc_1003/sPuReMD/GMRES.c @@ -35,25 +35,60 @@ static void Sparse_MatVec( const sparse_matrix * const A, { int i, j, k, n, si, ei; real H; +#ifdef _OPENMP + real *b_local; +#endif n = A->n; Vector_MakeZero( b, n ); - for ( i = 0; i < n; ++i ) + #pragma omp parallel \ + default(none) shared(n) private(b_local, si, ei, H, i, j, k) { - si = A->start[i]; - ei = A->start[i + 1] - 1; +#ifdef _OPENMP + if ( (b_local = (real*) calloc(n, sizeof(real))) == NULL ) + { + exit( INSUFFICIENT_SPACE ); + } +#endif + #pragma omp for schedule(guided) + for ( i = 0; i < n; ++i ) + { + si = A->start[i]; + ei = A->start[i + 1] - 1; - for ( k = si; k < ei; ++k ) + for ( k = si; k < ei; ++k ) + { + j = A->entries[k].j; + H = A->entries[k].val; +#ifdef _OPENMP + b_local[j] += H * x[i]; + b_local[i] += H * x[j]; +#else + b[j] += H * x[i]; + b[i] += H * x[j]; +#endif + } + + // the diagonal entry is the last one in +#ifdef _OPENMP + b_local[i] += A->entries[k].val * x[i]; +#else + b[i] += A->entries[k].val * x[i]; +#endif + } + +#ifdef _OPENMP + #pragma omp critical(redux) { - j = A->entries[k].j; - H = A->entries[k].val; - b[j] += H * x[i]; - b[i] += H * x[j]; + for ( i = 0; i < n; ++i ) + { + b[i] += b_local[i]; + } } - // the diagonal entry is the last one in - b[i] += A->entries[k].val * x[i]; + free(b_local); +#endif } } @@ -70,28 +105,60 @@ static void Sparse_MatVec2( const sparse_matrix * const R, const real * const x, real * const b) { int i, k, si = 0, ei = 0; +#ifdef _OPENMP + real *b_local; +#endif Vector_MakeZero( b, R->n ); - for ( i = 0; i < R->n; ++i ) + #pragma omp parallel default(none) private(b_local, i, k) firstprivate(si, ei) { - if (tri == LOWER) +#ifdef _OPENMP + if ( (b_local = (real*) calloc(R->n, sizeof(real))) == NULL ) { - si = R->start[i]; - ei = R->start[i + 1] - 1; + exit( INSUFFICIENT_SPACE ); } - else if (tri == UPPER) +#endif + #pragma omp for schedule(guided) + for ( i = 0; i < R->n; ++i ) { + if (tri == LOWER) + { + si = R->start[i]; + ei = R->start[i + 1] - 1; + } + else if (tri == UPPER) + { - si = R->start[i] + 1; - ei = R->start[i + 1]; - } + si = R->start[i] + 1; + ei = R->start[i + 1]; + } - for ( k = si; k < ei; ++k ) + for ( k = si; k < ei; ++k ) + { +#ifdef _OPENMP + b_local[i] += R->entries[k].val * x[R->entries[k].j]; +#else + b[i] += R->entries[k].val * x[R->entries[k].j]; +#endif + } +#ifdef _OPENMP + b_local[i] *= -Dinv[i]; +#else + b[i] *= -Dinv[i]; +#endif + } +#ifdef _OPENMP + #pragma omp critical(redux) { - b[i] += R->entries[k].val * x[R->entries[k].j]; + for ( i = 0; i < R->n; ++i ) + { + b[i] += b_local[i]; + } } - b[i] *= -Dinv[i]; + + free(b_local); +#endif } } diff --git a/puremd_rc_1003/sPuReMD/Makefile b/puremd_rc_1003/sPuReMD/Makefile old mode 100755 new mode 100644 index afb0d13a..a403e107 --- a/puremd_rc_1003/sPuReMD/Makefile +++ b/puremd_rc_1003/sPuReMD/Makefile @@ -1,7 +1,7 @@ CC = gcc LIBS = -lm -lz -CFLAGS = -I. -Wall -O3 -funroll-loops -fstrict-aliasing +CFLAGS = -I. -Wall -O3 -funroll-loops -fstrict-aliasing -fopenmp #CFLAGS = -I. -Wall -pg -gstabs #CFLAGS = -I. -Wall -funroll-loops -fstrict-aliasing #-finline-functions -finline-limit=15 -g#-DTEST -pg -ldl -rdynamic -g diff --git a/puremd_rc_1003/sPuReMD/QEq.c b/puremd_rc_1003/sPuReMD/QEq.c index 25033088..ca8efc67 100644 --- a/puremd_rc_1003/sPuReMD/QEq.c +++ b/puremd_rc_1003/sPuReMD/QEq.c @@ -120,7 +120,7 @@ real ICHOLT( sparse_matrix *A, real *droptol, struct timeval start, stop; gettimeofday( &start, NULL ); - + Utop = (int*) malloc((A->n + 1) * sizeof(int)); // clear variables @@ -296,6 +296,8 @@ static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, for ( i = 0; i < sweeps; ++i ) { /* for each nonzero */ + #pragma omp parallel for schedule(guided) \ + default(none) private(sum, ei_x, ei_y) firstprivate(x, y) for ( j = 0; j < A->start[A->n]; ++j ) { sum = ZERO; @@ -461,10 +463,11 @@ void Init_MatVec( reax_system *system, control_params *control, } data->timing.pre_comp += ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U ); +// data->timing.pre_comp += ICHOLT( workspace->H_sp, workspace->droptol, workspace->L, workspace->U ); // TODO: add parameters for sweeps to control file // ICHOL_PAR( workspace->H, 1, workspace->L, workspace->U ); - fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); +// fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) ); #if defined(DEBUG_FOCUS) sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); @@ -548,8 +551,8 @@ void QEq( reax_system *system, control_params *control, simulation_data *data, Init_MatVec( system, control, data, workspace, far_nbrs ); - if( data->step == 0 || data->step == 100 ) - Print_Linear_System( system, control, workspace, data->step ); +// if( data->step == 0 || data->step == 100 ) +// Print_Linear_System( system, control, workspace, data->step ); //TODO: add parameters in control file for solver choice and options // matvecs = GMRES( workspace, workspace->H, -- GitLab