Skip to content
Snippets Groups Projects
Commit 69a25536 authored by Kurt A. O'Hearn's avatar Kurt A. O'Hearn
Browse files

sPuReMD: decrease memory usage for thread-local SpMV buffers.

parent 4c09c5f1
No related branches found
No related tags found
No related merge requests found
......@@ -24,6 +24,8 @@
#include "list.h"
#include "vector.h"
#include <omp.h>
/* sparse matrix-vector product Ax=b
* where:
......@@ -36,20 +38,35 @@ static void Sparse_MatVec( const sparse_matrix * const A,
int i, j, k, n, si, ei;
real H;
#ifdef _OPENMP
real *b_local;
static real **b_local;
unsigned int tid;
#endif
n = A->n;
Vector_MakeZero( b, n );
#pragma omp parallel \
default(none) shared(n) private(b_local, si, ei, H, i, j, k)
{
#ifdef _OPENMP
if ( (b_local = (real*) calloc(n, sizeof(real))) == NULL )
/* keep b_local for program duration to avoid allocate/free
* overhead per Sparse_MatVec call*/
if ( b_local == NULL )
{
b_local = (real**) malloc( omp_get_num_threads() * sizeof(real*));
for ( i = 0; i < omp_get_num_threads(); ++i )
{
exit( INSUFFICIENT_SPACE );
if ( (b_local[i] = (real*) malloc( n * sizeof(real))) == NULL )
{
exit( INSUFFICIENT_SPACE );
}
}
}
#endif
#pragma omp parallel \
default(none) shared(n, b_local) private(si, ei, H, j, k, tid)
{
#ifdef _OPENMP
tid = omp_get_thread_num();
Vector_MakeZero( b_local[tid], n );
#endif
#pragma omp for schedule(guided)
for ( i = 0; i < n; ++i )
......@@ -62,8 +79,8 @@ static void Sparse_MatVec( const sparse_matrix * const A,
j = A->entries[k].j;
H = A->entries[k].val;
#ifdef _OPENMP
b_local[j] += H * x[i];
b_local[i] += H * x[j];
b_local[tid][j] += H * x[i];
b_local[tid][i] += H * x[j];
#else
b[j] += H * x[i];
b[i] += H * x[j];
......@@ -72,24 +89,22 @@ static void Sparse_MatVec( const sparse_matrix * const A,
// the diagonal entry is the last one in
#ifdef _OPENMP
b_local[i] += A->entries[k].val * x[i];
b_local[tid][i] += A->entries[k].val * x[i];
#else
b[i] += A->entries[k].val * x[i];
#endif
}
}
#ifdef _OPENMP
#pragma omp critical(redux)
for ( tid = 0; tid < omp_get_num_threads(); ++tid )
{
for ( i = 0; i < n; ++i )
{
for ( i = 0; i < n; ++i )
{
b[i] += b_local[i];
}
b[i] += b_local[tid][i];
}
free(b_local);
#endif
}
#endif
}
......
......@@ -297,7 +297,7 @@ static void ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
{
/* for each nonzero */
#pragma omp parallel for schedule(guided) \
default(none) private(sum, ei_x, ei_y) firstprivate(x, y)
default(none) shared(DAD) private(sum, ei_x, ei_y, k) firstprivate(x, y)
for ( j = 0; j < A->start[A->n]; ++j )
{
sum = ZERO;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment