Newer
Older
/*----------------------------------------------------------------------
PuReMD - Purdue ReaxFF Molecular Dynamics Program
Copyright (2010) Purdue University
Hasan Metin Aktulga, haktulga@cs.purdue.edu
Joseph Fogarty, jcfogart@mail.usf.edu
Sagar Pandit, pandit@usf.edu
Ananth Y Grama, ayg@cs.purdue.edu
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
See the GNU General Public License for more details:
<http://www.gnu.org/licenses/>.
----------------------------------------------------------------------*/
Kurt A. O'Hearn
committed
#include "reax_types.h"
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#include "allocate.h"
#include "basic_comm.h"
#include "io_tools.h"
#include "tool_box.h"
#include "vector.h"
Kurt A. O'Hearn
committed
/* Intel MKL */
#if defined(HAVE_LAPACKE_MKL)
#include "mkl.h"
/* reference LAPACK */
#elif defined(HAVE_LAPACKE)
#include "lapacke.h"
#endif
#if defined(CG_PERFORMANCE)
real t_start, t_elapsed, matvec_time, dot_time;
#endif
enum preconditioner_type
{
LEFT = 0,
RIGHT = 1,
};
Kurt A. O'Hearn
committed
static int compare_matrix_entry( const void * const v1, const void * const v2 )
{
return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j;
}
void Sort_Matrix_Rows( sparse_matrix * const A )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, si, ei;
for ( i = 0; i < A->n; ++i )
{
si = A->start[i];
ei = A->end[i];
qsort( &A->entries[si], ei - si,
sizeof(sparse_matrix_entry), compare_matrix_entry );
}
}
static int compare_dbls( const void* arg1, const void* arg2 )
{
Kurt A. O'Hearn
committed
int ret;
double a1, a2;
a1 = *(double *) arg1;
a2 = *(double *) arg2;
if ( a1 < a2 )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = -1;
}
else if (a1 == a2)
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 0;
}
else
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 1;
}
return ret;
}
static void qsort_dbls( double *array, int array_len )
{
Kurt A. O'Hearn
committed
qsort( array, (size_t) array_len, sizeof(double),
Kurt A. O'Hearn
committed
compare_dbls );
}
static int find_bucket( double *list, int len, double a )
{
int s, e, m;
Kurt A. O'Hearn
committed
if ( len == 0 )
{
return 0;
}
Kurt A. O'Hearn
committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
if ( a > list[len - 1] )
{
return len;
}
s = 0;
e = len - 1;
while ( s < e )
{
m = (s + e) / 2;
if ( list[m] < a )
{
s = m + 1;
}
else
{
e = m;
}
}
return s;
}
Kurt A. O'Hearn
committed
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
/* Jacobi preconditioner computation */
//real jacobi( const sparse_matrix * const H, real * const Hdia_inv )
real jacobi( const reax_system * const system, real * const Hdia_inv )
{
unsigned int i;
real start;
start = Get_Time( );
for ( i = 0; i < system->n; ++i )
{
// if ( FABS( H->val[H->start[i + 1] - 1] ) > 1.0e-15 )
// {
Hdia_inv[i] = 1.0 / system->reax_param.sbp[ system->my_atoms[i].type ].eta;
// }
// else
// {
// Hdia_inv[i] = 1.0;
// }
}
return Get_Timing_Info( start );
}
/* Apply diagonal inverse (Jacobi) preconditioner to system residual
*
* Hdia_inv: diagonal inverse preconditioner (constructed using H)
* y: current residual
* x: preconditioned residual
* N: dimensions of preconditioner and vectors (# rows in H)
*/
static void jacobi_app( const real * const Hdia_inv, const real * const y,
real * const x, const int N )
{
unsigned int i;
for ( i = 0; i < N; ++i )
{
x[i] = y[i] * Hdia_inv[i];
}
}
/* Local arithmetic portion of dual sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: two dense vectors
* b (output): two dense vectors
* N: number of entries in both vectors in b (must be equal)
*/
static void dual_Sparse_MatVec_local( sparse_matrix const * const A,
rvec2 const * const x, rvec2 * const b, int N )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
Kurt A. O'Hearn
committed
real val;
for ( i = 0; i < N; ++i )
{
Kurt A. O'Hearn
committed
b[i][0] = 0.0;
b[i][1] = 0.0;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
/* diagonal only contributes once */
if( i < A->n )
{
b[i][0] += A->entries[si].val * x[i][0];
b[i][1] += A->entries[si].val * x[i][1];
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->entries[k].j;
val = A->entries[k].val;
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->entries[k].j;
val = A->entries[k].val;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
}
}
}
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
num_rows = A->n;
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
b[i][0] += A->entries[si].val * x[i][0];
b[i][1] += A->entries[si].val * x[i][1];
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->entries[k].j;
val = A->entries[k].val;
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
for ( k = si; k < A->end[i]; ++k )
{
j = A->entries[k].j;
val = A->entries[k].val;
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
/* Local arithmetic portion of sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: dense vector
* b (output): dense vector
* N: number of entries in b
*/
static void Sparse_MatVec_local( sparse_matrix const * const A,
real const * const x, real * const b, int N )
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
real val;
Kurt A. O'Hearn
committed
b[i] = 0.0;
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
if ( i < A->n )
{
b[i] += A->entries[si].val * x[i];
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->entries[k].j;
val = A->entries[k].val;
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->entries[k].j;
Kurt A. O'Hearn
committed
val = A->entries[k].val;
Kurt A. O'Hearn
committed
b[i] += val * x[j];
}
}
}
#else
num_rows = A->n;
Kurt A. O'Hearn
committed
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* A symmetric, upper triangular portion stored
* => diagonal only contributes once */
b[i] += A->entries[si].val * x[i];
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->entries[k].j;
val = A->entries[k].val;
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->entries[k].j;
val = A->entries[k].val;
b[i] += val * x[j];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* x: dense vector
* buf_type: data structure type for x
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
static int Sparse_MatVec_Comm_Part1( const reax_system * const system,
const control_params * const control, mpi_datatypes * const mpi_data,
void const * const x, int buf_type, MPI_Datatype mpi_type )
{
Kurt A. O'Hearn
committed
int t_start, t_comm;
Kurt A. O'Hearn
committed
t_comm = 0.0;
Kurt A. O'Hearn
committed
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
/* exploit 3D domain decomposition of simulation space with 3-stage communication pattern */
t_start = MPI_Wtime( );
Dist( system, mpi_data, x, buf_type, mpi_type );
t_comm += MPI_Wtime( ) - t_start;
return t_comm;
}
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* mat_format: storage type of sparse matrix A
* b: dense vector
* buf_type: data structure type for b
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
static int Sparse_MatVec_Comm_Part2( const reax_system * const system,
const control_params * const control, mpi_datatypes * const mpi_data,
int mat_format, void * const b, int buf_type, MPI_Datatype mpi_type )
{
int t_start, t_comm;
t_comm = 0.0;
if ( mat_format == SYM_HALF_MATRIX )
{
Kurt A. O'Hearn
committed
t_start = MPI_Wtime( );
Coll( system, mpi_data, b, buf_type, mpi_type );
t_comm += MPI_Wtime( ) - t_start;
}
#if defined(NEUTRAL_TERRITORY)
else
{
t_start = MPI_Wtime( );
Coll( system, mpi_data, b, buf_type, mpi_type );
t_comm += MPI_Wtime( ) - t_start;
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
return t_comm;
}
Kurt A. O'Hearn
committed
real setup_sparse_approx_inverse( reax_system const * const system,
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix *A_spar_patt,
int nprocs, real filter )
{
int i, bin, total, pos;
Kurt A. O'Hearn
committed
int n, n_gather, s_local, s, n_local;
Kurt A. O'Hearn
committed
int k;
int pj, size;
int left, right, p, turn;
Kurt A. O'Hearn
committed
int num_rows;
real threshold, pivot, tmp;
real *input_array;
real *samplelist_local, *samplelist;
real *pivotlist;
real *bucketlist_local, *bucketlist;
Kurt A. O'Hearn
committed
int *srecv, *sdispls;
int *scounts_local, *scounts;
int *dspls_local, *dspls;
int *bin_elements;
MPI_Comm comm;
Kurt A. O'Hearn
committed
real start, t_start, t_comm;
real total_comm;
start = MPI_Wtime();
t_comm = 0.0;
srecv = NULL;
sdispls = NULL;
samplelist_local = NULL;
samplelist = NULL;
pivotlist = NULL;
input_array = NULL;
bucketlist_local = NULL;
bucketlist = NULL;
scounts_local = NULL;
scounts = NULL;
dspls_local = NULL;
dspls = NULL;
bin_elements = NULL;
Kurt A. O'Hearn
committed
comm = mpi_data->world;
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
fprintf( stdout,"%d %d %d\n", A->n, A->NT, A->m );
fflush( stdout );
#else
num_rows = A->n;
#endif
Kurt A. O'Hearn
committed
if ( A_spar_patt->allocated == FALSE )
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
else /*if ( (*A_spar_patt)->m < A->m )*/
Kurt A. O'Hearn
committed
Deallocate_Matrix( A_spar_patt );
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
n_local = 0;
for( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
n_local += (A->end[i] - A->start[i] + 9) / 10;
Kurt A. O'Hearn
committed
s_local = (int) (12.0 * (log2(n_local) + log2(nprocs)));
t_start = MPI_Wtime();
MPI_Allreduce( &n_local, &n, 1, MPI_INT, MPI_SUM, comm );
MPI_Reduce( &s_local, &s, 1, MPI_INT, MPI_SUM, MASTER_NODE, comm );
t_comm += MPI_Wtime() - t_start;
Kurt A. O'Hearn
committed
/* count num. bin elements for each processor, uniform bin sizes */
input_array = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::input_array" );
scounts_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts_local" );
scounts = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts" );
bin_elements = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::bin_elements" );
dspls_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::displs_local" );
bucketlist_local = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::bucketlist_local" );
dspls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::dspls" );
if ( nprocs > 1 )
Kurt A. O'Hearn
committed
pivotlist = smalloc( sizeof(real) * (nprocs - 1),
"setup_sparse_approx_inverse::pivotlist" );
Kurt A. O'Hearn
committed
samplelist_local = smalloc( sizeof(real) * s_local,
"setup_sparse_approx_inverse::samplelist_local" );
if ( system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
samplelist = smalloc( sizeof(real) * s,
"setup_sparse_approx_inverse::samplelist" );
srecv = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::srecv" );
sdispls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::sdispls" );
Kurt A. O'Hearn
committed
n_local = 0;
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
for ( pj = A->start[i]; pj < A->end[i]; pj += 10 )
{
input_array[n_local++] = A->entries[pj].val;
}
}
for ( i = 0; i < s_local; i++)
{
Kurt A. O'Hearn
committed
/* samplelist_local[i] = input_array[rand( ) % n_local]; */
samplelist_local[i] = input_array[ i ];
}
/* gather samples at the root process */
Kurt A. O'Hearn
committed
t_start = MPI_Wtime();
MPI_Gather( &s_local, 1, MPI_INT, srecv, 1, MPI_INT, MASTER_NODE, comm );
t_comm += MPI_Wtime() - t_start;
if( system->my_rank == MASTER_NODE )
{
sdispls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
sdispls[i + 1] = sdispls[i] + srecv[i];
}
}
Kurt A. O'Hearn
committed
t_start = MPI_Wtime();
MPI_Gatherv( samplelist_local, s_local, MPI_DOUBLE,
samplelist, srecv, sdispls, MPI_DOUBLE, MASTER_NODE, comm);
t_comm += MPI_Wtime() - t_start;
/* sort samples at the root process and select pivots */
if ( system->my_rank == MASTER_NODE )
{
qsort_dbls( samplelist, s );
Kurt A. O'Hearn
committed
for ( i = 1; i < nprocs; ++i )
{
pivotlist[i - 1] = samplelist[(i * s) / nprocs];
}
}
/* broadcast pivots */
Kurt A. O'Hearn
committed
t_start = MPI_Wtime();
MPI_Bcast( pivotlist, nprocs - 1, MPI_DOUBLE, MASTER_NODE, comm );
Kurt A. O'Hearn
committed
t_comm += MPI_Wtime() - t_start;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs; ++i )
{
scounts_local[i] = 0;
}
Kurt A. O'Hearn
committed
for ( i = 0; i < n_local; ++i )
{
pos = find_bucket( pivotlist, nprocs - 1, input_array[i] );
scounts_local[pos]++;
}
for ( i = 0; i < nprocs; ++i )
{
bin_elements[i] = scounts_local[i];
scounts[i] = scounts_local[i];
}
/* compute displacements for MPI comm */
dspls_local[0] = 0;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs - 1; ++i )
{
dspls_local[i + 1] = dspls_local[i] + scounts_local[i];
}
/* bin elements */
for ( i = 0; i < n_local; ++i )
{
bin = find_bucket( pivotlist, nprocs - 1, input_array[i] );
pos = dspls_local[bin] + scounts_local[bin] - bin_elements[bin];
bucketlist_local[pos] = input_array[i];
bin_elements[bin]--;
}
/* determine counts for elements per process */
Kurt A. O'Hearn
committed
t_start = MPI_Wtime( );
MPI_Allreduce( MPI_IN_PLACE, scounts, nprocs, MPI_INT, MPI_SUM, comm );
Kurt A. O'Hearn
committed
t_comm += MPI_Wtime( ) - t_start;
Kurt A. O'Hearn
committed
/* find the target process */
target_proc = 0;
total = 0;
Kurt A. O'Hearn
committed
k = n * filter;
for ( i = nprocs - 1; i >= 0; --i )
Kurt A. O'Hearn
committed
if ( total + scounts[i] >= k )
{
/* global k becomes local k*/
Kurt A. O'Hearn
committed
k -= total;
target_proc = i;
break;
}
total += scounts[i];
}
Kurt A. O'Hearn
committed
n_gather = scounts[target_proc];
if ( system->my_rank == target_proc )
{
bucketlist = smalloc( sizeof( real ) * n_gather,
"setup_sparse_approx_inverse::bucketlist" );
}
Kurt A. O'Hearn
committed
/* send local buckets to target processor for quickselect */
t_start = MPI_Wtime( );
MPI_Gather( scounts_local + target_proc, 1, MPI_INT, scounts,
1, MPI_INT, target_proc, comm );
t_comm += MPI_Wtime( ) - t_start;
if ( system->my_rank == target_proc )
{
dspls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
dspls[i + 1] = dspls[i] + scounts[i];
}
}
Kurt A. O'Hearn
committed
t_start = MPI_Wtime( );
MPI_Gatherv( bucketlist_local + dspls_local[target_proc], scounts_local[target_proc], MPI_DOUBLE,
bucketlist, scounts, dspls, MPI_DOUBLE, target_proc, comm);
Kurt A. O'Hearn
committed
t_comm += MPI_Wtime( ) - t_start;
Kurt A. O'Hearn
committed
/* apply quick select algorithm at the target process */
if ( system->my_rank == target_proc )
Kurt A. O'Hearn
committed
right = n_gather-1;
Kurt A. O'Hearn
committed
while ( k )
{
p = left;
turn = 1 - turn;
Kurt A. O'Hearn
committed
/* alternating pivots in order to handle corner cases */
if ( turn == 1 )
{
pivot = bucketlist[right];
}
else
{
pivot = bucketlist[left];
}
Kurt A. O'Hearn
committed
for ( i = left + 1 - turn; i <= right-turn; ++i )
Kurt A. O'Hearn
committed
if ( bucketlist[i] > pivot )
{
tmp = bucketlist[i];
bucketlist[i] = bucketlist[p];
bucketlist[p] = tmp;
p++;
}
}
Kurt A. O'Hearn
committed
if ( turn == 1 )
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[right];
bucketlist[right] = tmp;
}
else
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[left];
bucketlist[left] = tmp;
}
Kurt A. O'Hearn
committed
if ( p == k - 1)
{
threshold = bucketlist[p];
break;
}
Kurt A. O'Hearn
committed
else if ( p > k - 1 )
{
right = p - 1;
}
else
{
left = p + 1;
}
}
Kurt A. O'Hearn
committed
/* comment out if ACKS2 and/or EE is not an option */
// if ( threshold < 1.000000 )
// {
// threshold = 1.000001;
// }
Kurt A. O'Hearn
committed
/* broadcast the filtering value */
t_start = MPI_Wtime( );
MPI_Bcast( &threshold, 1, MPI_DOUBLE, target_proc, comm );
Kurt A. O'Hearn
committed
t_comm += MPI_Wtime( ) - t_start;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
int nnz = 0;
#endif
/* build entries of that pattern*/
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
A_spar_patt->start[i] = A->start[i];
size = A->start[i];
for ( pj = A->start[i]; pj < A->end[i]; ++pj )
{
if ( ( A->entries[pj].val >= threshold ) || ( A->entries[pj].j == i ) )
{
Kurt A. O'Hearn
committed
A_spar_patt->entries[size].val = A->entries[pj].val;
A_spar_patt->entries[size].j = A->entries[pj].j;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
nnz++;
#endif
Kurt A. O'Hearn
committed
A_spar_patt->end[i] = size;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
MPI_Allreduce( MPI_IN_PLACE, &nnz, 1, MPI_INT, MPI_SUM, comm );
if ( system->my_rank == MASTER_NODE )
{
fprintf( stdout, " [INFO] \ntotal nnz in all charge matrices = %d\ntotal nnz in all sparsity patterns = %d\nthreshold = %.15lf\n",
n, nnz, threshold );
fprintf( stdout, "SAI SETUP takes %.2f seconds\n", MPI_Wtime() - start );
fflush( stdout );
}
#endif
MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE,
mpi_data->world );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
if( system->my_rank == MASTER_NODE )
{
data->timing.cm_solver_comm += total_comm / nprocs;
}
sfree( input_array, "setup_sparse_approx_inverse::input_array" );
sfree( scounts_local, "setup_sparse_approx_inverse::scounts_local" );
sfree( scounts, "setup_sparse_approx_inverse::scounts" );
sfree( bin_elements, "setup_sparse_approx_inverse::bin_elements" );
sfree( dspls_local, "setup_sparse_approx_inverse::displs_local" );
sfree( bucketlist_local, "setup_sparse_approx_inverse::bucketlist_local" );
sfree( dspls, "setup_sparse_approx_inverse::dspls" );
if ( nprocs > 1)
{
sfree( pivotlist, "setup_sparse_approx_inverse::pivotlist" );
}
sfree( samplelist_local, "setup_sparse_approx_inverse::samplelist_local" );
if ( system->my_rank == MASTER_NODE )
{
sfree( samplelist, "setup_sparse_approx_inverse::samplelist" );
sfree( srecv, "setup_sparse_approx_inverse::srecv" );
sfree( sdispls, "setup_sparse_approx_inverse::sdispls" );
}
if ( system->my_rank == target_proc )
{
sfree( bucketlist, "setup_sparse_approx_inverse::bucketlist" );
}
Kurt A. O'Hearn
committed
return MPI_Wtime( ) - start;
}
#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
#if defined(NEUTRAL_TERRITORY)
real sparse_approx_inverse( reax_system const * const system,
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix * const A_spar_patt,
sparse_matrix **A_app_inv, int nprocs )
{
int N, M, d_i, d_j;
int i, k, pj, j_temp;
int local_pos, atom_pos, identity_pos;
lapack_int m, n, nrhs, lda, ldb, info;
int *pos_x, *X;
real *e_j, *dense_matrix;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
int *row_nnz;
int **j_list;
real **val_list;
Kurt A. O'Hearn
committed
int d, count, index;
mpi_out_data *out_bufs;
MPI_Comm comm;
Kurt A. O'Hearn
committed
MPI_Request req[12];
MPI_Status stat[12];
neighbor_proc *nbr;
int *j_send, *j_recv[6];
real *val_send, *val_recv[6];
real start, t_start, t_comm;
real total_comm;
start = MPI_Wtime( );
t_comm = 0.0;
Kurt A. O'Hearn
committed
comm = mpi_data->world;
Kurt A. O'Hearn
committed
if ( *A_app_inv == NULL)
{
//TODO: FULL_MATRIX?
Allocate_Matrix( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, SYM_FULL_MATRIX );
}
else /* if ( (*A_app_inv)->m < A_spar_patt->m ) */
{
Deallocate_Matrix( *A_app_inv );
Allocate_Matrix( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, SYM_FULL_MATRIX );
}
Kurt A. O'Hearn
committed
pos_x = NULL;
X = NULL;
Kurt A. O'Hearn
committed
row_nnz = NULL;
j_list = NULL;
val_list = NULL;
Kurt A. O'Hearn
committed
j_send = NULL;
val_send = NULL;
for ( d = 0; d < 6; ++d )
{
j_recv[d] = NULL;
val_recv[d] = NULL;
Kurt A. O'Hearn
committed
////////////////////
row_nnz = (int *) malloc( sizeof(int) * A->NT );
Kurt A. O'Hearn
committed
//TODO: allocation size
j_list = (int **) malloc( sizeof(int *) * system->N );
val_list = (real **) malloc( sizeof(real *) * system->N );
Kurt A. O'Hearn
committed
for ( i = 0; i < A->NT; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = 0;
Kurt A. O'Hearn
committed
/* mark the atoms that already have their row stored in the local matrix */
for ( i = 0; i < A->n; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = A->end[i] - A->start[i];
Kurt A. O'Hearn
committed
/* Announce the nnz's in each row that will be communicated later */
t_start = MPI_Wtime( );
Dist( system, mpi_data, row_nnz, REAL_PTR_TYPE, MPI_INT );
t_comm += MPI_Wtime( ) - t_start;
fprintf( stdout,"SAI after Dist call\n");
fflush( stdout );
comm = mpi_data->comm_mesh3D;
Kurt A. O'Hearn
committed
out_bufs = mpi_data->out_nt_buffers;
count = 0;
/* use a Dist-like approach to send the row information */
for ( d = 0; d < 6; ++d)
{
/* initiate recvs */
Kurt A. O'Hearn
committed
nbr = &system->my_nt_nbrs[d];
if ( nbr->atoms_cnt )
Kurt A. O'Hearn
committed
/* calculate the total data that will be received */
Kurt A. O'Hearn
committed
for ( i = nbr->atoms_str; i < (nbr->atoms_str + nbr->atoms_cnt); ++i )
Kurt A. O'Hearn
committed
cnt += row_nnz[i];
Kurt A. O'Hearn
committed
/* initiate Irecv */
if ( cnt )
Kurt A. O'Hearn
committed
count += 2;
Kurt A. O'Hearn
committed
j_recv[d] = (int *) malloc( sizeof(int) * cnt );
val_recv[d] = (real *) malloc( sizeof(real) * cnt );
Kurt A. O'Hearn
committed
fprintf( stdout,"Dist communication receive phase direction %d will receive %d\n", d, cnt);
fflush( stdout );
t_start = MPI_Wtime( );
MPI_Irecv( j_recv + d, cnt, MPI_INT, nbr->receive_rank, d, comm, &req[2 * d] );
MPI_Irecv( val_recv + d, cnt, MPI_DOUBLE, nbr->receive_rank, d, comm, &req[2 * d + 1] );
t_comm += MPI_Wtime( ) - t_start;
}
Kurt A. O'Hearn
committed
}
/////////////////////
for( d = 0; d < 6; ++d)
{
nbr = &system->my_nt_nbrs[d];
/* send both messages in dimension d */
Kurt A. O'Hearn
committed
if ( out_bufs[d].cnt )