From 472662f668c28fad3af7aea6de22422d0d987376 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@msu.edu> Date: Fri, 23 Dec 2016 08:40:14 -0800 Subject: [PATCH] sPuReMD: add thread parallelism to conflict detection in graph coloring. --- sPuReMD/src/lin_alg.c | 99 ++++++++++++++++++++++++++++++++----------- 1 file changed, 74 insertions(+), 25 deletions(-) diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c index 1759b41c..4ef1ce22 100644 --- a/sPuReMD/src/lin_alg.c +++ b/sPuReMD/src/lin_alg.c @@ -52,6 +52,7 @@ unsigned int *top = NULL; unsigned int *color = NULL; unsigned int *to_color = NULL; unsigned int *conflict = NULL; +unsigned int *conflict_cnt = NULL; unsigned int *temp_ptr; unsigned int *recolor = NULL; unsigned int recolor_cnt; @@ -534,8 +535,16 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) { #define MAX_COLOR (500) int i, pj, v; - unsigned int temp; - int *fb_color; + unsigned int temp, recolor_cnt_local, *conflict_local; + int tid, num_thread, *fb_color; + +#ifdef _OPENMP + tid = omp_get_thread_num(); + num_thread = omp_get_num_threads(); +#else + tid = 0; + num_thread = 1; +#endif #pragma omp master { @@ -562,7 +571,8 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) } } - if ( (fb_color = (int*) malloc(sizeof(int) * MAX_COLOR)) == NULL ) + if ( (fb_color = (int*) malloc(sizeof(int) * MAX_COLOR)) == NULL || + (conflict_local = (unsigned int*) malloc(sizeof(unsigned int) * A->n)) == NULL ) { fprintf( stderr, "not enough memory for graph coloring. terminating.\n" ); exit( INSUFFICIENT_MEMORY ); @@ -598,29 +608,63 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) } /* determine if recoloring required */ - //TODO: switch to reduction on recolor_cnt (+) via parallel scan through recolor + temp = recolor_cnt; + recolor_cnt_local = 0; + + #pragma omp barrier + #pragma omp master { - temp = recolor_cnt; recolor_cnt = 0; + } + + #pragma omp barrier - for ( i = 0; i < temp; ++i ) - { - v = to_color[i]; + #pragma omp for schedule(static) + for ( i = 0; i < temp; ++i ) + { + v = to_color[i]; - /* search for color conflicts with adjacent vertices */ - for ( pj = A->start[v]; pj < A->start[v + 1]; ++pj ) + /* search for color conflicts with adjacent vertices */ + for ( pj = A->start[v]; pj < A->start[v + 1]; ++pj ) + { + if ( color[v] == color[A->j[pj]] && v > A->j[pj] ) { - if ( color[v] == color[A->j[pj]] && v > A->j[pj] ) - { - conflict[recolor_cnt] = v; - color[v] = 0; - ++recolor_cnt; - break; - } + conflict_local[recolor_cnt_local] = v; + ++recolor_cnt_local; + break; } } + } + + /* count thread-local conflicts and compute offsets for copying into shared buffer */ + conflict_cnt[tid + 1] = recolor_cnt_local; + + #pragma omp barrier + + #pragma omp master + { + conflict_cnt[0] = 0; + for ( i = 1; i < num_thread + 1; ++i ) + { + conflict_cnt[i] += conflict_cnt[i - 1]; + } + recolor_cnt = conflict_cnt[num_thread]; + } + + #pragma omp barrier + /* copy thread-local conflicts into shared buffer */ + for ( i = 0; i < recolor_cnt_local; ++i ) + { + conflict[conflict_cnt[tid] + i] = conflict_local[i]; + color[conflict_local[i]] = 0; + } + + #pragma omp barrier + + #pragma omp master + { temp_ptr = to_color; to_color = conflict; conflict = temp_ptr; @@ -629,16 +673,9 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri ) #pragma omp barrier } + free( conflict_local ); free( fb_color ); -//#if defined(DEBUG) -// #pragma omp master -// { -// for ( i = 0; i < A->n; ++i ) -// printf("Vertex: %5d, Color: %5d\n", i, color[i] ); -// } -//#endif - #pragma omp barrier } } @@ -877,12 +914,24 @@ void permute_matrix( sparse_matrix * const LU, const TRIANGULARITY tri ) */ sparse_matrix * setup_graph_coloring( sparse_matrix * const H ) { + int num_thread; + if ( color == NULL ) { +#ifdef _OPENMP + #pragma omp parallel + { + num_thread = omp_get_num_threads(); + } +#else + num_thread = 1; +#endif + /* internal storage for graph coloring (global to facilitate simultaneous access to OpenMP threads) */ if ( (color = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || (to_color =(unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || (conflict = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || + (conflict_cnt = (unsigned int*) malloc(sizeof(unsigned int) * (num_thread + 1))) == NULL || (recolor = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || (color_top = (unsigned int*) malloc(sizeof(unsigned int) * (H->n + 1))) == NULL || (permuted_row_col = (unsigned int*) malloc(sizeof(unsigned int) * H->n)) == NULL || -- GitLab