Newer
Older
/*----------------------------------------------------------------------
PuReMD - Purdue ReaxFF Molecular Dynamics Program
Copyright (2010) Purdue University
Hasan Metin Aktulga, haktulga@cs.purdue.edu
Joseph Fogarty, jcfogart@mail.usf.edu
Sagar Pandit, pandit@usf.edu
Ananth Y Grama, ayg@cs.purdue.edu
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
See the GNU General Public License for more details:
<http://www.gnu.org/licenses/>.
----------------------------------------------------------------------*/
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#include "allocate.h"
Kurt A. O'Hearn
committed
#include "comm_tools.h"
#include "io_tools.h"
#include "tool_box.h"
#include "vector.h"
Kurt A. O'Hearn
committed
/* Intel MKL */
#if defined(HAVE_LAPACKE_MKL)
#include "mkl.h"
/* reference LAPACK */
#elif defined(HAVE_LAPACKE)
#include "lapacke.h"
#endif
typedef struct
{
unsigned int j;
real val;
} sparse_matrix_entry;
enum preconditioner_type
{
LEFT = 0,
RIGHT = 1,
};
Kurt A. O'Hearn
committed
static int compare_matrix_entry( const void * const v1, const void * const v2 )
{
return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j;
}
/* Routine used for sorting nonzeros within a sparse matrix row;
* internally, a combination of qsort and manual sorting is utilized
*
* A: sparse matrix for which to sort nonzeros within a row, stored in CSR format
*/
Kurt A. O'Hearn
committed
void Sort_Matrix_Rows( sparse_matrix * const A )
Kurt A. O'Hearn
committed
{
unsigned int i, pj, si, ei, temp_size;
sparse_matrix_entry *temp;
temp = NULL;
temp_size = 0;
Kurt A. O'Hearn
committed
/* sort each row of A using column indices */
Kurt A. O'Hearn
committed
for ( i = 0; i < A->n; ++i )
{
si = A->start[i];
ei = A->end[i];
if ( temp == NULL )
{
temp = smalloc( sizeof(sparse_matrix_entry) * (ei - si), "Sort_Matrix_Rows::temp" );
temp_size = ei - si;
}
else if ( temp_size < ei - si )
{
sfree( temp, "Sort_Matrix_Rows::temp" );
temp = smalloc( sizeof(sparse_matrix_entry) * (ei - si), "Sort_Matrix_Rows::temp" );
temp_size = ei - si;
}
for ( pj = 0; pj < (ei - si); ++pj )
{
temp[pj].j = A->j[si + pj];
temp[pj].val = A->val[si + pj];
}
/* polymorphic sort in standard C library using column indices */
qsort( temp, ei - si, sizeof(sparse_matrix_entry), compare_matrix_entry );
for ( pj = 0; pj < (ei - si); ++pj )
{
A->j[si + pj] = temp[pj].j;
A->val[si + pj] = temp[pj].val;
}
Kurt A. O'Hearn
committed
}
sfree( temp, "Sort_Matrix_Rows::temp" );
Kurt A. O'Hearn
committed
}
static int compare_dbls( const void* arg1, const void* arg2 )
{
Kurt A. O'Hearn
committed
int ret;
double a1, a2;
a1 = *(double *) arg1;
a2 = *(double *) arg2;
if ( a1 < a2 )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = -1;
}
else if (a1 == a2)
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 0;
}
else
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 1;
}
return ret;
}
static void qsort_dbls( double *array, int array_len )
{
Kurt A. O'Hearn
committed
qsort( array, (size_t) array_len, sizeof(double),
Kurt A. O'Hearn
committed
compare_dbls );
}
static int find_bucket( double *list, int len, double a )
{
int s, e, m;
Kurt A. O'Hearn
committed
if ( len == 0 )
{
return 0;
}
Kurt A. O'Hearn
committed
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
if ( a > list[len - 1] )
{
return len;
}
s = 0;
e = len - 1;
while ( s < e )
{
m = (s + e) / 2;
if ( list[m] < a )
{
s = m + 1;
}
else
{
e = m;
}
}
return s;
}
Kurt A. O'Hearn
committed
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
/* Compute diagonal inverese (Jacobi) preconditioner
*
* H: matrix used to compute preconditioner, in CSR format
* Hdia_inv: computed diagonal inverse preconditioner
*/
void jacobi( sparse_matrix const * const H, real * const Hdia_inv )
{
unsigned int i, pj;
if ( H->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < H->n; ++i )
{
if ( FABS( H->val[H->start[i]] ) > 1.0e-15 )
{
Hdia_inv[i] = 1.0 / H->val[H->start[i]];
}
else
{
Hdia_inv[i] = 1.0;
}
}
}
else if ( H->format == SYM_FULL_MATRIX || H->format == FULL_MATRIX )
{
for ( i = 0; i < H->n; ++i )
{
for ( pj = H->start[i]; pj < H->start[i + 1]; ++pj )
{
if ( H->j[pj] == i )
{
if ( FABS( H->val[H->start[i]] ) > 1.0e-15 )
{
Hdia_inv[i] = 1.0 / H->val[pj];
}
else
{
Hdia_inv[i] = 1.0;
}
break;
}
}
}
}
}
/* Apply diagonal inverse (Jacobi) preconditioner to system residual
*
* Hdia_inv: diagonal inverse preconditioner (constructed using H)
* y: current residuals
* x: preconditioned residuals
* N: dimensions of preconditioner and vectors (# rows in H)
*/
static void dual_jacobi_app( const real * const Hdia_inv, const rvec2 * const y,
rvec2 * const x, const int N )
Kurt A. O'Hearn
committed
{
unsigned int i;
Kurt A. O'Hearn
committed
for ( i = 0; i < N; ++i )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
x[i][0] = y[i][0] * Hdia_inv[i];
x[i][1] = y[i][1] * Hdia_inv[i];
Kurt A. O'Hearn
committed
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
}
}
/* Apply diagonal inverse (Jacobi) preconditioner to system residual
*
* Hdia_inv: diagonal inverse preconditioner (constructed using H)
* y: current residual
* x: preconditioned residual
* N: dimensions of preconditioner and vectors (# rows in H)
*/
static void jacobi_app( const real * const Hdia_inv, const real * const y,
real * const x, const int N )
{
unsigned int i;
for ( i = 0; i < N; ++i )
{
x[i] = y[i] * Hdia_inv[i];
}
}
/* Local arithmetic portion of dual sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: two dense vectors
* b (output): two dense vectors
* N: number of entries in both vectors in b (must be equal)
*/
Kurt A. O'Hearn
committed
static void Dual_Sparse_MatVec_local( sparse_matrix const * const A,
Kurt A. O'Hearn
committed
rvec2 const * const x, rvec2 * const b, int N )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
Kurt A. O'Hearn
committed
real val;
for ( i = 0; i < N; ++i )
{
Kurt A. O'Hearn
committed
b[i][0] = 0.0;
b[i][1] = 0.0;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* diagonal only contributes once */
Kurt A. O'Hearn
committed
if ( i < A->n )
Kurt A. O'Hearn
committed
{
b[i][0] += A->val[si] * x[i][0];
b[i][1] += A->val[si] * x[i][1];
Kurt A. O'Hearn
committed
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
}
}
}
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
num_rows = A->n;
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
b[i][0] += A->val[si] * x[i][0];
b[i][1] += A->val[si] * x[i][1];
Kurt A. O'Hearn
committed
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* x: dense vector
* buf_type: data structure type for x
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
static void Sparse_MatVec_Comm_Part1( const reax_system * const system,
const control_params * const control, mpi_datatypes * const mpi_data,
void const * const x, int buf_type, MPI_Datatype mpi_type )
{
/* exploit 3D domain decomposition of simulation space with 3-stage communication pattern */
Dist( system, mpi_data, x, buf_type, mpi_type );
}
Kurt A. O'Hearn
committed
/* Local arithmetic portion of sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: dense vector
* b (output): dense vector
* N: number of entries in b
*/
static void Sparse_MatVec_local( sparse_matrix const * const A,
real const * const x, real * const b, int N )
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
real val;
Kurt A. O'Hearn
committed
b[i] = 0.0;
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
if ( i < A->n )
{
b[i] += A->val[si] * x[i];
Kurt A. O'Hearn
committed
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
}
}
}
#else
num_rows = A->n;
Kurt A. O'Hearn
committed
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* A symmetric, upper triangular portion stored
* => diagonal only contributes once */
b[i] += A->val[si] * x[i];
Kurt A. O'Hearn
committed
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* mat_format: storage type of sparse matrix A
* b: dense vector
* buf_type: data structure type for b
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
Kurt A. O'Hearn
committed
static void Sparse_MatVec_Comm_Part2( const reax_system * const system,
Kurt A. O'Hearn
committed
const control_params * const control, mpi_datatypes * const mpi_data,
int mat_format, void * const b, int buf_type, MPI_Datatype mpi_type )
{
if ( mat_format == SYM_HALF_MATRIX )
{
Kurt A. O'Hearn
committed
Coll( system, mpi_data, b, buf_type, mpi_type );
}
#if defined(NEUTRAL_TERRITORY)
else
{
Coll( system, mpi_data, b, buf_type, mpi_type );
}
Kurt A. O'Hearn
committed
#endif
}
Kurt A. O'Hearn
committed
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
/* sparse matrix, dense vector multiplication AX = B
*
* system:
* control:
* data:
* A: symmetric matrix, stored in CSR format
* X: dense vector
* n: number of entries in x
* B (output): dense vector */
static void Dual_Sparse_MatVec( reax_system const * const system,
control_params const * const control, simulation_data * const data,
mpi_datatypes * const mpi_data, sparse_matrix const * const A,
rvec2 const * const x, int n, rvec2 * const b )
{
#if defined(LOG_PERFORMANCE)
real time;
time = Get_Time( );
#endif
Sparse_MatVec_Comm_Part1( system, control, mpi_data, x,
RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Dual_Sparse_MatVec_local( A, x, b, n );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data, A->format, b,
RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
}
/* sparse matrix, dense vector multiplication Ax = b
*
* system:
* control:
* data:
* A: symmetric matrix, stored in CSR format
* x: dense vector
* n: number of entries in x
* b (output): dense vector */
static void Sparse_MatVec( reax_system const * const system,
control_params const * const control, simulation_data * const data,
mpi_datatypes * const mpi_data, sparse_matrix const * const A,
real const * const x, int n, real * const b )
{
#if defined(LOG_PERFORMANCE)
real time;
time = Get_Time( );
#endif
Sparse_MatVec_Comm_Part1( system, control, mpi_data, x,
REAL_PTR_TYPE, MPI_DOUBLE );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Sparse_MatVec_local( A, x, b, n );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data, A->format, b,
REAL_PTR_TYPE, MPI_DOUBLE );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
}
Kurt A. O'Hearn
committed
void setup_sparse_approx_inverse( reax_system const * const system,
Kurt A. O'Hearn
committed
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix *A_spar_patt,
int nprocs, real filter )
Kurt A. O'Hearn
committed
int i, ret, bin, total, pos;
Kurt A. O'Hearn
committed
int n, n_gather, s_local, s, n_local;
Kurt A. O'Hearn
committed
int k, pj, size;
int left, right, p, turn;
Kurt A. O'Hearn
committed
int num_rows;
Kurt A. O'Hearn
committed
int *srecv, *sdispls;
int *scounts_local, *scounts;
int *dspls_local, *dspls;
int *bin_elements;
real threshold, pivot, tmp;
real *input_array;
real *samplelist_local, *samplelist;
real *pivotlist;
real *bucketlist_local, *bucketlist;
Kurt A. O'Hearn
committed
real t_start, t_comm;
Kurt A. O'Hearn
committed
real total_comm;
srecv = NULL;
sdispls = NULL;
samplelist_local = NULL;
samplelist = NULL;
pivotlist = NULL;
input_array = NULL;
bucketlist_local = NULL;
bucketlist = NULL;
scounts_local = NULL;
scounts = NULL;
dspls_local = NULL;
dspls = NULL;
bin_elements = NULL;
Kurt A. O'Hearn
committed
t_comm = 0.0;
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
fprintf( stdout,"%d %d %d\n", A->n, A->NT, A->m );
fflush( stdout );
#else
num_rows = A->n;
#endif
Kurt A. O'Hearn
committed
if ( A_spar_patt->allocated == FALSE )
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
else /*if ( (*A_spar_patt)->m < A->m )*/
Kurt A. O'Hearn
committed
Deallocate_Matrix( A_spar_patt );
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
n_local = 0;
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
n_local += (A->end[i] - A->start[i] + 9) / 10;
Kurt A. O'Hearn
committed
s_local = (int) (12.0 * (LOG2(n_local) + LOG2(nprocs)));
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Allreduce( &n_local, &n, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Reduce( &s_local, &s, 1, MPI_INT, MPI_SUM, MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
/* count num. bin elements for each processor, uniform bin sizes */
input_array = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::input_array" );
scounts_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts_local" );
scounts = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts" );
bin_elements = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::bin_elements" );
dspls_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::displs_local" );
bucketlist_local = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::bucketlist_local" );
dspls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::dspls" );
if ( nprocs > 1 )
Kurt A. O'Hearn
committed
pivotlist = smalloc( sizeof(real) * (nprocs - 1),
"setup_sparse_approx_inverse::pivotlist" );
Kurt A. O'Hearn
committed
samplelist_local = smalloc( sizeof(real) * s_local,
"setup_sparse_approx_inverse::samplelist_local" );
if ( system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
samplelist = smalloc( sizeof(real) * s,
"setup_sparse_approx_inverse::samplelist" );
srecv = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::srecv" );
sdispls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::sdispls" );
Kurt A. O'Hearn
committed
n_local = 0;
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
for ( pj = A->start[i]; pj < A->end[i]; pj += 10 )
{
input_array[n_local++] = A->val[pj];
Kurt A. O'Hearn
committed
}
}
for ( i = 0; i < s_local; i++)
{
Kurt A. O'Hearn
committed
/* samplelist_local[i] = input_array[rand( ) % n_local]; */
samplelist_local[i] = input_array[ i ];
}
/* gather samples at the root process */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gather( &s_local, 1, MPI_INT, srecv, 1, MPI_INT, MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
if( system->my_rank == MASTER_NODE )
{
sdispls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
sdispls[i + 1] = sdispls[i] + srecv[i];
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gatherv( samplelist_local, s_local, MPI_DOUBLE,
samplelist, srecv, sdispls, MPI_DOUBLE, MASTER_NODE, MPI_COMM_WORLD);
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
/* sort samples at the root process and select pivots */
if ( system->my_rank == MASTER_NODE )
{
qsort_dbls( samplelist, s );
Kurt A. O'Hearn
committed
for ( i = 1; i < nprocs; ++i )
{
pivotlist[i - 1] = samplelist[(i * s) / nprocs];
}
}
/* broadcast pivots */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
ret = MPI_Bcast( pivotlist, nprocs - 1, MPI_DOUBLE, MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs; ++i )
{
scounts_local[i] = 0;
}
Kurt A. O'Hearn
committed
for ( i = 0; i < n_local; ++i )
{
pos = find_bucket( pivotlist, nprocs - 1, input_array[i] );
scounts_local[pos]++;
}
for ( i = 0; i < nprocs; ++i )
{
bin_elements[i] = scounts_local[i];
scounts[i] = scounts_local[i];
}
/* compute displacements for MPI comm */
dspls_local[0] = 0;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs - 1; ++i )
{
dspls_local[i + 1] = dspls_local[i] + scounts_local[i];
}
/* bin elements */
for ( i = 0; i < n_local; ++i )
{
bin = find_bucket( pivotlist, nprocs - 1, input_array[i] );
pos = dspls_local[bin] + scounts_local[bin] - bin_elements[bin];
bucketlist_local[pos] = input_array[i];
bin_elements[bin]--;
}
/* determine counts for elements per process */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Allreduce( MPI_IN_PLACE, scounts, nprocs, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
/* find the target process */
target_proc = 0;
total = 0;
Kurt A. O'Hearn
committed
k = n * filter;
for ( i = nprocs - 1; i >= 0; --i )
Kurt A. O'Hearn
committed
if ( total + scounts[i] >= k )
{
/* global k becomes local k*/
Kurt A. O'Hearn
committed
k -= total;
target_proc = i;
break;
}
total += scounts[i];
}
Kurt A. O'Hearn
committed
n_gather = scounts[target_proc];
if ( system->my_rank == target_proc )
{
bucketlist = smalloc( sizeof( real ) * n_gather,
"setup_sparse_approx_inverse::bucketlist" );
}
Kurt A. O'Hearn
committed
/* send local buckets to target processor for quickselect */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gather( scounts_local + target_proc, 1, MPI_INT, scounts,
1, MPI_INT, target_proc, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
if ( system->my_rank == target_proc )
{
dspls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
dspls[i + 1] = dspls[i] + scounts[i];
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gatherv( bucketlist_local + dspls_local[target_proc], scounts_local[target_proc], MPI_DOUBLE,
bucketlist, scounts, dspls, MPI_DOUBLE, target_proc, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
/* apply quick select algorithm at the target process */
if ( system->my_rank == target_proc )
Kurt A. O'Hearn
committed
right = n_gather-1;
Kurt A. O'Hearn
committed
while ( k )
{
p = left;
turn = 1 - turn;
Kurt A. O'Hearn
committed
/* alternating pivots in order to handle corner cases */
if ( turn == 1 )
{
pivot = bucketlist[right];
}
else
{
pivot = bucketlist[left];
}
Kurt A. O'Hearn
committed
for ( i = left + 1 - turn; i <= right-turn; ++i )
Kurt A. O'Hearn
committed
if ( bucketlist[i] > pivot )
{
tmp = bucketlist[i];
bucketlist[i] = bucketlist[p];
bucketlist[p] = tmp;
p++;
}
}
Kurt A. O'Hearn
committed
if ( turn == 1 )
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[right];
bucketlist[right] = tmp;
}
else
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[left];
bucketlist[left] = tmp;
}
Kurt A. O'Hearn
committed
if ( p == k - 1)
{
threshold = bucketlist[p];
break;
}
Kurt A. O'Hearn
committed
else if ( p > k - 1 )
{
right = p - 1;
}
else
{
left = p + 1;
}
}
Kurt A. O'Hearn
committed
/* comment out if ACKS2 and/or EE is not an option */
// if ( threshold < 1.000000 )
// {
// threshold = 1.000001;
// }
Kurt A. O'Hearn
committed
/* broadcast the filtering value */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
ret = MPI_Bcast( &threshold, 1, MPI_DOUBLE, target_proc, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
int nnz = 0;
#endif
/* build entries of that pattern*/
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
A_spar_patt->start[i] = A->start[i];
size = A->start[i];
for ( pj = A->start[i]; pj < A->end[i]; ++pj )
{
if ( ( A->val[pj] >= threshold ) || ( A->j[pj] == i ) )
A_spar_patt->val[size] = A->val[pj];
A_spar_patt->j[size] = A->j[pj];
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
nnz++;
#endif
Kurt A. O'Hearn
committed
A_spar_patt->end[i] = size;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, &nnz, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
{
fprintf( stdout, " [INFO] \ntotal nnz in all charge matrices = %d\ntotal nnz in all sparsity patterns = %d\nthreshold = %.15lf\n",
n, nnz, threshold );
fflush( stdout );
}
#endif
Kurt A. O'Hearn
committed
ret = MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM,
MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
{
data->timing.cm_solver_comm += total_comm / nprocs;
}
sfree( input_array, "setup_sparse_approx_inverse::input_array" );
sfree( scounts_local, "setup_sparse_approx_inverse::scounts_local" );
sfree( scounts, "setup_sparse_approx_inverse::scounts" );
sfree( bin_elements, "setup_sparse_approx_inverse::bin_elements" );
sfree( dspls_local, "setup_sparse_approx_inverse::displs_local" );
sfree( bucketlist_local, "setup_sparse_approx_inverse::bucketlist_local" );