Newer
Older
/*----------------------------------------------------------------------
PuReMD - Purdue ReaxFF Molecular Dynamics Program
Copyright (2010) Purdue University
Hasan Metin Aktulga, haktulga@cs.purdue.edu
Joseph Fogarty, jcfogart@mail.usf.edu
Sagar Pandit, pandit@usf.edu
Ananth Y Grama, ayg@cs.purdue.edu
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
See the GNU General Public License for more details:
<http://www.gnu.org/licenses/>.
----------------------------------------------------------------------*/
Kurt A. O'Hearn
committed
#include "reax_types.h"
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#include "allocate.h"
#include "basic_comm.h"
#include "io_tools.h"
#include "tool_box.h"
#include "vector.h"
Kurt A. O'Hearn
committed
/* Intel MKL */
#if defined(HAVE_LAPACKE_MKL)
#include "mkl.h"
/* reference LAPACK */
#elif defined(HAVE_LAPACKE)
#include "lapacke.h"
#endif
#if defined(CG_PERFORMANCE)
real t_start, t_elapsed, matvec_time, dot_time;
#endif
typedef struct
{
unsigned int j;
real val;
} sparse_matrix_entry;
enum preconditioner_type
{
LEFT = 0,
RIGHT = 1,
};
Kurt A. O'Hearn
committed
static int compare_matrix_entry( const void * const v1, const void * const v2 )
{
return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j;
}
/* Routine used for sorting nonzeros within a sparse matrix row;
* internally, a combination of qsort and manual sorting is utilized
*
* A: sparse matrix for which to sort nonzeros within a row, stored in CSR format
*/
Kurt A. O'Hearn
committed
void Sort_Matrix_Rows( sparse_matrix * const A )
Kurt A. O'Hearn
committed
{
unsigned int i, pj, si, ei, temp_size;
sparse_matrix_entry *temp;
temp = NULL;
temp_size = 0;
Kurt A. O'Hearn
committed
/* sort each row of A using column indices */
Kurt A. O'Hearn
committed
for ( i = 0; i < A->n; ++i )
{
si = A->start[i];
ei = A->end[i];
if ( temp == NULL )
{
temp = smalloc( sizeof(sparse_matrix_entry) * (ei - si), "Sort_Matrix_Rows::temp" );
temp_size = ei - si;
}
else if ( temp_size < ei - si )
{
sfree( temp, "Sort_Matrix_Rows::temp" );
temp = smalloc( sizeof(sparse_matrix_entry) * (ei - si), "Sort_Matrix_Rows::temp" );
temp_size = ei - si;
}
for ( pj = 0; pj < (ei - si); ++pj )
{
temp[pj].j = A->j[si + pj];
temp[pj].val = A->val[si + pj];
}
/* polymorphic sort in standard C library using column indices */
qsort( temp, ei - si, sizeof(sparse_matrix_entry), compare_matrix_entry );
for ( pj = 0; pj < (ei - si); ++pj )
{
A->j[si + pj] = temp[pj].j;
A->val[si + pj] = temp[pj].val;
}
Kurt A. O'Hearn
committed
}
sfree( temp, "Sort_Matrix_Rows::temp" );
Kurt A. O'Hearn
committed
}
static int compare_dbls( const void* arg1, const void* arg2 )
{
Kurt A. O'Hearn
committed
int ret;
double a1, a2;
a1 = *(double *) arg1;
a2 = *(double *) arg2;
if ( a1 < a2 )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = -1;
}
else if (a1 == a2)
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 0;
}
else
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 1;
}
return ret;
}
static void qsort_dbls( double *array, int array_len )
{
Kurt A. O'Hearn
committed
qsort( array, (size_t) array_len, sizeof(double),
Kurt A. O'Hearn
committed
compare_dbls );
}
static int find_bucket( double *list, int len, double a )
{
int s, e, m;
Kurt A. O'Hearn
committed
if ( len == 0 )
{
return 0;
}
Kurt A. O'Hearn
committed
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
if ( a > list[len - 1] )
{
return len;
}
s = 0;
e = len - 1;
while ( s < e )
{
m = (s + e) / 2;
if ( list[m] < a )
{
s = m + 1;
}
else
{
e = m;
}
}
return s;
}
Kurt A. O'Hearn
committed
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/* Jacobi preconditioner computation */
//real jacobi( const sparse_matrix * const H, real * const Hdia_inv )
real jacobi( const reax_system * const system, real * const Hdia_inv )
{
unsigned int i;
real start;
start = Get_Time( );
for ( i = 0; i < system->n; ++i )
{
// if ( FABS( H->val[H->start[i + 1] - 1] ) > 1.0e-15 )
// {
Hdia_inv[i] = 1.0 / system->reax_param.sbp[ system->my_atoms[i].type ].eta;
// }
// else
// {
// Hdia_inv[i] = 1.0;
// }
}
return Get_Timing_Info( start );
}
/* Apply diagonal inverse (Jacobi) preconditioner to system residual
*
* Hdia_inv: diagonal inverse preconditioner (constructed using H)
* y: current residual
* x: preconditioned residual
* N: dimensions of preconditioner and vectors (# rows in H)
*/
static void jacobi_app( const real * const Hdia_inv, const real * const y,
real * const x, const int N )
{
unsigned int i;
for ( i = 0; i < N; ++i )
{
x[i] = y[i] * Hdia_inv[i];
}
}
/* Local arithmetic portion of dual sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: two dense vectors
* b (output): two dense vectors
* N: number of entries in both vectors in b (must be equal)
*/
static void dual_Sparse_MatVec_local( sparse_matrix const * const A,
rvec2 const * const x, rvec2 * const b, int N )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
Kurt A. O'Hearn
committed
real val;
for ( i = 0; i < N; ++i )
{
Kurt A. O'Hearn
committed
b[i][0] = 0.0;
b[i][1] = 0.0;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* diagonal only contributes once */
if( i < A->n )
{
b[i][0] += A->val[si] * x[i][0];
b[i][1] += A->val[si] * x[i][1];
Kurt A. O'Hearn
committed
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
}
}
}
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
num_rows = A->n;
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
b[i][0] += A->val[si] * x[i][0];
b[i][1] += A->val[si] * x[i][1];
Kurt A. O'Hearn
committed
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
/* Local arithmetic portion of sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: dense vector
* b (output): dense vector
* N: number of entries in b
*/
static void Sparse_MatVec_local( sparse_matrix const * const A,
real const * const x, real * const b, int N )
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
real val;
Kurt A. O'Hearn
committed
b[i] = 0.0;
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
if ( i < A->n )
{
b[i] += A->val[si] * x[i];
Kurt A. O'Hearn
committed
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
}
}
}
#else
num_rows = A->n;
Kurt A. O'Hearn
committed
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* A symmetric, upper triangular portion stored
* => diagonal only contributes once */
b[i] += A->val[si] * x[i];
Kurt A. O'Hearn
committed
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* x: dense vector
* buf_type: data structure type for x
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
static real Sparse_MatVec_Comm_Part1( const reax_system * const system,
Kurt A. O'Hearn
committed
const control_params * const control, mpi_datatypes * const mpi_data,
void const * const x, int buf_type, MPI_Datatype mpi_type )
{
Kurt A. O'Hearn
committed
int t_start, t_comm;
Kurt A. O'Hearn
committed
t_comm = 0.0;
Kurt A. O'Hearn
committed
/* exploit 3D domain decomposition of simulation space with 3-stage communication pattern */
t_start = MPI_Wtime( );
Dist( system, mpi_data, x, buf_type, mpi_type );
t_comm += MPI_Wtime( ) - t_start;
return t_comm;
}
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* mat_format: storage type of sparse matrix A
* b: dense vector
* buf_type: data structure type for b
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
static real Sparse_MatVec_Comm_Part2( const reax_system * const system,
Kurt A. O'Hearn
committed
const control_params * const control, mpi_datatypes * const mpi_data,
int mat_format, void * const b, int buf_type, MPI_Datatype mpi_type )
{
int t_start, t_comm;
t_comm = 0.0;
if ( mat_format == SYM_HALF_MATRIX )
{
Kurt A. O'Hearn
committed
t_start = MPI_Wtime( );
Coll( system, mpi_data, b, buf_type, mpi_type );
t_comm += MPI_Wtime( ) - t_start;
}
#if defined(NEUTRAL_TERRITORY)
else
{
t_start = MPI_Wtime( );
Coll( system, mpi_data, b, buf_type, mpi_type );
t_comm += MPI_Wtime( ) - t_start;
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
return t_comm;
}
Kurt A. O'Hearn
committed
real setup_sparse_approx_inverse( reax_system const * const system,
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix *A_spar_patt,
int nprocs, real filter )
{
int i, bin, total, pos;
Kurt A. O'Hearn
committed
int n, n_gather, s_local, s, n_local;
Kurt A. O'Hearn
committed
int k;
int pj, size;
int left, right, p, turn;
Kurt A. O'Hearn
committed
int num_rows;
real threshold, pivot, tmp;
real *input_array;
real *samplelist_local, *samplelist;
real *pivotlist;
real *bucketlist_local, *bucketlist;
Kurt A. O'Hearn
committed
int *srecv, *sdispls;
int *scounts_local, *scounts;
int *dspls_local, *dspls;
int *bin_elements;
MPI_Comm comm;
Kurt A. O'Hearn
committed
real start, t_start, t_comm;
real total_comm;
start = MPI_Wtime();
t_comm = 0.0;
srecv = NULL;
sdispls = NULL;
samplelist_local = NULL;
samplelist = NULL;
pivotlist = NULL;
input_array = NULL;
bucketlist_local = NULL;
bucketlist = NULL;
scounts_local = NULL;
scounts = NULL;
dspls_local = NULL;
dspls = NULL;
bin_elements = NULL;
Kurt A. O'Hearn
committed
comm = mpi_data->world;
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
fprintf( stdout,"%d %d %d\n", A->n, A->NT, A->m );
fflush( stdout );
#else
num_rows = A->n;
#endif
Kurt A. O'Hearn
committed
if ( A_spar_patt->allocated == FALSE )
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
else /*if ( (*A_spar_patt)->m < A->m )*/
Kurt A. O'Hearn
committed
Deallocate_Matrix( A_spar_patt );
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
n_local = 0;
for( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
n_local += (A->end[i] - A->start[i] + 9) / 10;
Kurt A. O'Hearn
committed
s_local = (int) (12.0 * (log2(n_local) + log2(nprocs)));
t_start = MPI_Wtime();
MPI_Allreduce( &n_local, &n, 1, MPI_INT, MPI_SUM, comm );
MPI_Reduce( &s_local, &s, 1, MPI_INT, MPI_SUM, MASTER_NODE, comm );
t_comm += MPI_Wtime() - t_start;
Kurt A. O'Hearn
committed
/* count num. bin elements for each processor, uniform bin sizes */
input_array = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::input_array" );
scounts_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts_local" );
scounts = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts" );
bin_elements = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::bin_elements" );
dspls_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::displs_local" );
bucketlist_local = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::bucketlist_local" );
dspls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::dspls" );
if ( nprocs > 1 )
Kurt A. O'Hearn
committed
pivotlist = smalloc( sizeof(real) * (nprocs - 1),
"setup_sparse_approx_inverse::pivotlist" );
Kurt A. O'Hearn
committed
samplelist_local = smalloc( sizeof(real) * s_local,
"setup_sparse_approx_inverse::samplelist_local" );
if ( system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
samplelist = smalloc( sizeof(real) * s,
"setup_sparse_approx_inverse::samplelist" );
srecv = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::srecv" );
sdispls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::sdispls" );
Kurt A. O'Hearn
committed
n_local = 0;
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
for ( pj = A->start[i]; pj < A->end[i]; pj += 10 )
{
input_array[n_local++] = A->val[pj];
Kurt A. O'Hearn
committed
}
}
for ( i = 0; i < s_local; i++)
{
Kurt A. O'Hearn
committed
/* samplelist_local[i] = input_array[rand( ) % n_local]; */
samplelist_local[i] = input_array[ i ];
}
/* gather samples at the root process */
Kurt A. O'Hearn
committed
t_start = MPI_Wtime();
MPI_Gather( &s_local, 1, MPI_INT, srecv, 1, MPI_INT, MASTER_NODE, comm );
t_comm += MPI_Wtime() - t_start;
if( system->my_rank == MASTER_NODE )
{
sdispls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
sdispls[i + 1] = sdispls[i] + srecv[i];
}
}
Kurt A. O'Hearn
committed
t_start = MPI_Wtime();
MPI_Gatherv( samplelist_local, s_local, MPI_DOUBLE,
samplelist, srecv, sdispls, MPI_DOUBLE, MASTER_NODE, comm);
t_comm += MPI_Wtime() - t_start;
/* sort samples at the root process and select pivots */
if ( system->my_rank == MASTER_NODE )
{
qsort_dbls( samplelist, s );
Kurt A. O'Hearn
committed
for ( i = 1; i < nprocs; ++i )
{
pivotlist[i - 1] = samplelist[(i * s) / nprocs];
}
}
/* broadcast pivots */
Kurt A. O'Hearn
committed
t_start = MPI_Wtime();
MPI_Bcast( pivotlist, nprocs - 1, MPI_DOUBLE, MASTER_NODE, comm );
Kurt A. O'Hearn
committed
t_comm += MPI_Wtime() - t_start;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs; ++i )
{
scounts_local[i] = 0;
}
Kurt A. O'Hearn
committed
for ( i = 0; i < n_local; ++i )
{
pos = find_bucket( pivotlist, nprocs - 1, input_array[i] );
scounts_local[pos]++;
}
for ( i = 0; i < nprocs; ++i )
{
bin_elements[i] = scounts_local[i];
scounts[i] = scounts_local[i];
}
/* compute displacements for MPI comm */
dspls_local[0] = 0;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs - 1; ++i )
{
dspls_local[i + 1] = dspls_local[i] + scounts_local[i];
}
/* bin elements */
for ( i = 0; i < n_local; ++i )
{
bin = find_bucket( pivotlist, nprocs - 1, input_array[i] );
pos = dspls_local[bin] + scounts_local[bin] - bin_elements[bin];
bucketlist_local[pos] = input_array[i];
bin_elements[bin]--;
}
/* determine counts for elements per process */
Kurt A. O'Hearn
committed
t_start = MPI_Wtime( );
MPI_Allreduce( MPI_IN_PLACE, scounts, nprocs, MPI_INT, MPI_SUM, comm );
Kurt A. O'Hearn
committed
t_comm += MPI_Wtime( ) - t_start;
Kurt A. O'Hearn
committed
/* find the target process */
target_proc = 0;
total = 0;
Kurt A. O'Hearn
committed
k = n * filter;
for ( i = nprocs - 1; i >= 0; --i )
Kurt A. O'Hearn
committed
if ( total + scounts[i] >= k )
{
/* global k becomes local k*/
Kurt A. O'Hearn
committed
k -= total;
target_proc = i;
break;
}
total += scounts[i];
}
Kurt A. O'Hearn
committed
n_gather = scounts[target_proc];
if ( system->my_rank == target_proc )
{
bucketlist = smalloc( sizeof( real ) * n_gather,
"setup_sparse_approx_inverse::bucketlist" );
}
Kurt A. O'Hearn
committed
/* send local buckets to target processor for quickselect */
t_start = MPI_Wtime( );
MPI_Gather( scounts_local + target_proc, 1, MPI_INT, scounts,
1, MPI_INT, target_proc, comm );
t_comm += MPI_Wtime( ) - t_start;
if ( system->my_rank == target_proc )
{
dspls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
dspls[i + 1] = dspls[i] + scounts[i];
}
}
Kurt A. O'Hearn
committed
t_start = MPI_Wtime( );
MPI_Gatherv( bucketlist_local + dspls_local[target_proc], scounts_local[target_proc], MPI_DOUBLE,
bucketlist, scounts, dspls, MPI_DOUBLE, target_proc, comm);
Kurt A. O'Hearn
committed
t_comm += MPI_Wtime( ) - t_start;
Kurt A. O'Hearn
committed
/* apply quick select algorithm at the target process */
if ( system->my_rank == target_proc )
Kurt A. O'Hearn
committed
right = n_gather-1;
Kurt A. O'Hearn
committed
while ( k )
{
p = left;
turn = 1 - turn;
Kurt A. O'Hearn
committed
/* alternating pivots in order to handle corner cases */
if ( turn == 1 )
{
pivot = bucketlist[right];
}
else
{
pivot = bucketlist[left];
}
Kurt A. O'Hearn
committed
for ( i = left + 1 - turn; i <= right-turn; ++i )
Kurt A. O'Hearn
committed
if ( bucketlist[i] > pivot )
{
tmp = bucketlist[i];
bucketlist[i] = bucketlist[p];
bucketlist[p] = tmp;
p++;
}
}
Kurt A. O'Hearn
committed
if ( turn == 1 )
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[right];
bucketlist[right] = tmp;
}
else
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[left];
bucketlist[left] = tmp;
}
Kurt A. O'Hearn
committed
if ( p == k - 1)
{
threshold = bucketlist[p];
break;
}
Kurt A. O'Hearn
committed
else if ( p > k - 1 )
{
right = p - 1;
}
else
{
left = p + 1;
}
}
Kurt A. O'Hearn
committed
/* comment out if ACKS2 and/or EE is not an option */
// if ( threshold < 1.000000 )
// {
// threshold = 1.000001;
// }
Kurt A. O'Hearn
committed
/* broadcast the filtering value */
t_start = MPI_Wtime( );
MPI_Bcast( &threshold, 1, MPI_DOUBLE, target_proc, comm );
Kurt A. O'Hearn
committed
t_comm += MPI_Wtime( ) - t_start;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
int nnz = 0;
#endif
/* build entries of that pattern*/
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
A_spar_patt->start[i] = A->start[i];
size = A->start[i];
for ( pj = A->start[i]; pj < A->end[i]; ++pj )
{
if ( ( A->val[pj] >= threshold ) || ( A->j[pj] == i ) )
A_spar_patt->val[size] = A->val[pj];
A_spar_patt->j[size] = A->j[pj];
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
nnz++;
#endif
Kurt A. O'Hearn
committed
A_spar_patt->end[i] = size;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
MPI_Allreduce( MPI_IN_PLACE, &nnz, 1, MPI_INT, MPI_SUM, comm );
if ( system->my_rank == MASTER_NODE )
{
fprintf( stdout, " [INFO] \ntotal nnz in all charge matrices = %d\ntotal nnz in all sparsity patterns = %d\nthreshold = %.15lf\n",
n, nnz, threshold );
fprintf( stdout, "SAI SETUP takes %.2f seconds\n", MPI_Wtime() - start );
fflush( stdout );
}
#endif
MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE,
mpi_data->world );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
if( system->my_rank == MASTER_NODE )
{
data->timing.cm_solver_comm += total_comm / nprocs;
}
sfree( input_array, "setup_sparse_approx_inverse::input_array" );
sfree( scounts_local, "setup_sparse_approx_inverse::scounts_local" );
sfree( scounts, "setup_sparse_approx_inverse::scounts" );
sfree( bin_elements, "setup_sparse_approx_inverse::bin_elements" );
sfree( dspls_local, "setup_sparse_approx_inverse::displs_local" );
sfree( bucketlist_local, "setup_sparse_approx_inverse::bucketlist_local" );
sfree( dspls, "setup_sparse_approx_inverse::dspls" );
if ( nprocs > 1)
{
sfree( pivotlist, "setup_sparse_approx_inverse::pivotlist" );
}
sfree( samplelist_local, "setup_sparse_approx_inverse::samplelist_local" );
if ( system->my_rank == MASTER_NODE )
{
sfree( samplelist, "setup_sparse_approx_inverse::samplelist" );
sfree( srecv, "setup_sparse_approx_inverse::srecv" );
sfree( sdispls, "setup_sparse_approx_inverse::sdispls" );
}
if ( system->my_rank == target_proc )
{
sfree( bucketlist, "setup_sparse_approx_inverse::bucketlist" );
}
Kurt A. O'Hearn
committed
return MPI_Wtime( ) - start;
}
#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
#if defined(NEUTRAL_TERRITORY)
real sparse_approx_inverse( reax_system const * const system,
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix * const A_spar_patt,
sparse_matrix **A_app_inv, int nprocs )
{
int N, M, d_i, d_j;
int i, k, pj, j_temp;
int local_pos, atom_pos, identity_pos;
lapack_int m, n, nrhs, lda, ldb, info;
int *pos_x, *X;
real *e_j, *dense_matrix;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
int *row_nnz;
int **j_list;
real **val_list;
Kurt A. O'Hearn
committed
int d, count, index;
mpi_out_data *out_bufs;
MPI_Comm comm;
Kurt A. O'Hearn
committed
MPI_Request req[12];
MPI_Status stat[12];
neighbor_proc *nbr;
int *j_send, *j_recv[6];
real *val_send, *val_recv[6];
real start, t_start, t_comm;
real total_comm;
start = MPI_Wtime( );
t_comm = 0.0;
Kurt A. O'Hearn
committed
comm = mpi_data->world;
Kurt A. O'Hearn
committed
if ( *A_app_inv == NULL)
{
//TODO: FULL_MATRIX?
Allocate_Matrix( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, SYM_FULL_MATRIX );
}
else /* if ( (*A_app_inv)->m < A_spar_patt->m ) */
{
Deallocate_Matrix( *A_app_inv );
Allocate_Matrix( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, SYM_FULL_MATRIX );
}
Kurt A. O'Hearn
committed
pos_x = NULL;
X = NULL;
Kurt A. O'Hearn
committed
row_nnz = NULL;
j_list = NULL;
val_list = NULL;
Kurt A. O'Hearn
committed
j_send = NULL;
val_send = NULL;
for ( d = 0; d < 6; ++d )
{
j_recv[d] = NULL;
val_recv[d] = NULL;
Kurt A. O'Hearn
committed
////////////////////
row_nnz = (int *) malloc( sizeof(int) * A->NT );
Kurt A. O'Hearn
committed
//TODO: allocation size
j_list = (int **) malloc( sizeof(int *) * system->N );
val_list = (real **) malloc( sizeof(real *) * system->N );
Kurt A. O'Hearn
committed
for ( i = 0; i < A->NT; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = 0;
Kurt A. O'Hearn
committed
/* mark the atoms that already have their row stored in the local matrix */
for ( i = 0; i < A->n; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = A->end[i] - A->start[i];
Kurt A. O'Hearn
committed
/* Announce the nnz's in each row that will be communicated later */
t_start = MPI_Wtime( );
Dist( system, mpi_data, row_nnz, REAL_PTR_TYPE, MPI_INT );