Newer
Older
/*----------------------------------------------------------------------
PuReMD - Purdue ReaxFF Molecular Dynamics Program
Copyright (2010) Purdue University
Hasan Metin Aktulga, haktulga@cs.purdue.edu
Joseph Fogarty, jcfogart@mail.usf.edu
Sagar Pandit, pandit@usf.edu
Ananth Y Grama, ayg@cs.purdue.edu
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
See the GNU General Public License for more details:
<http://www.gnu.org/licenses/>.
----------------------------------------------------------------------*/
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#include "allocate.h"
Kurt A. O'Hearn
committed
#include "comm_tools.h"
#include "io_tools.h"
#include "tool_box.h"
#include "vector.h"
Kurt A. O'Hearn
committed
/* Intel MKL */
#if defined(HAVE_LAPACKE_MKL)
#include "mkl.h"
/* reference LAPACK */
#elif defined(HAVE_LAPACKE)
#include "lapacke.h"
#endif
typedef struct
{
unsigned int j;
real val;
} sparse_matrix_entry;
enum preconditioner_type
{
LEFT = 0,
RIGHT = 1,
};
Kurt A. O'Hearn
committed
static int compare_matrix_entry( const void * const v1, const void * const v2 )
{
return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j;
}
/* Routine used for sorting nonzeros within a sparse matrix row;
* internally, a combination of qsort and manual sorting is utilized
*
* A: sparse matrix for which to sort nonzeros within a row, stored in CSR format
*/
Kurt A. O'Hearn
committed
void Sort_Matrix_Rows( sparse_matrix * const A )
Kurt A. O'Hearn
committed
{
unsigned int i, pj, si, ei, temp_size;
sparse_matrix_entry *temp;
temp = NULL;
temp_size = 0;
Kurt A. O'Hearn
committed
/* sort each row of A using column indices */
Kurt A. O'Hearn
committed
for ( i = 0; i < A->n; ++i )
{
si = A->start[i];
ei = A->end[i];
if ( temp == NULL )
{
temp = smalloc( sizeof(sparse_matrix_entry) * (ei - si), "Sort_Matrix_Rows::temp" );
temp_size = ei - si;
}
else if ( temp_size < ei - si )
{
sfree( temp, "Sort_Matrix_Rows::temp" );
temp = smalloc( sizeof(sparse_matrix_entry) * (ei - si), "Sort_Matrix_Rows::temp" );
temp_size = ei - si;
}
for ( pj = 0; pj < (ei - si); ++pj )
{
temp[pj].j = A->j[si + pj];
temp[pj].val = A->val[si + pj];
}
/* polymorphic sort in standard C library using column indices */
qsort( temp, ei - si, sizeof(sparse_matrix_entry), compare_matrix_entry );
for ( pj = 0; pj < (ei - si); ++pj )
{
A->j[si + pj] = temp[pj].j;
A->val[si + pj] = temp[pj].val;
}
Kurt A. O'Hearn
committed
}
sfree( temp, "Sort_Matrix_Rows::temp" );
Kurt A. O'Hearn
committed
}
static int compare_dbls( const void* arg1, const void* arg2 )
{
Kurt A. O'Hearn
committed
int ret;
double a1, a2;
a1 = *(double *) arg1;
a2 = *(double *) arg2;
if ( a1 < a2 )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = -1;
}
else if (a1 == a2)
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 0;
}
else
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 1;
}
return ret;
}
static void qsort_dbls( double *array, int array_len )
{
Kurt A. O'Hearn
committed
qsort( array, (size_t) array_len, sizeof(double),
Kurt A. O'Hearn
committed
compare_dbls );
}
static int find_bucket( double *list, int len, double a )
{
int s, e, m;
Kurt A. O'Hearn
committed
if ( len == 0 )
{
return 0;
}
Kurt A. O'Hearn
committed
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
if ( a > list[len - 1] )
{
return len;
}
s = 0;
e = len - 1;
while ( s < e )
{
m = (s + e) / 2;
if ( list[m] < a )
{
s = m + 1;
}
else
{
e = m;
}
}
return s;
}
Kurt A. O'Hearn
committed
/* Jacobi preconditioner computation */
//real jacobi( const sparse_matrix * const H, real * const Hdia_inv )
Kurt A. O'Hearn
committed
void jacobi( const reax_system * const system, real * const Hdia_inv )
Kurt A. O'Hearn
committed
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
{
unsigned int i;
for ( i = 0; i < system->n; ++i )
{
// if ( FABS( H->val[H->start[i + 1] - 1] ) > 1.0e-15 )
// {
Hdia_inv[i] = 1.0 / system->reax_param.sbp[ system->my_atoms[i].type ].eta;
// }
// else
// {
// Hdia_inv[i] = 1.0;
// }
}
}
/* Apply diagonal inverse (Jacobi) preconditioner to system residual
*
* Hdia_inv: diagonal inverse preconditioner (constructed using H)
* y: current residual
* x: preconditioned residual
* N: dimensions of preconditioner and vectors (# rows in H)
*/
static void jacobi_app( const real * const Hdia_inv, const real * const y,
real * const x, const int N )
{
unsigned int i;
for ( i = 0; i < N; ++i )
{
x[i] = y[i] * Hdia_inv[i];
}
}
/* Local arithmetic portion of dual sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: two dense vectors
* b (output): two dense vectors
* N: number of entries in both vectors in b (must be equal)
*/
static void dual_Sparse_MatVec_local( sparse_matrix const * const A,
rvec2 const * const x, rvec2 * const b, int N )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
Kurt A. O'Hearn
committed
real val;
for ( i = 0; i < N; ++i )
{
Kurt A. O'Hearn
committed
b[i][0] = 0.0;
b[i][1] = 0.0;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* diagonal only contributes once */
if( i < A->n )
{
b[i][0] += A->val[si] * x[i][0];
b[i][1] += A->val[si] * x[i][1];
Kurt A. O'Hearn
committed
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
}
}
}
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
num_rows = A->n;
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
b[i][0] += A->val[si] * x[i][0];
b[i][1] += A->val[si] * x[i][1];
Kurt A. O'Hearn
committed
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
/* Local arithmetic portion of sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: dense vector
* b (output): dense vector
* N: number of entries in b
*/
static void Sparse_MatVec_local( sparse_matrix const * const A,
real const * const x, real * const b, int N )
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
real val;
Kurt A. O'Hearn
committed
b[i] = 0.0;
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
if ( i < A->n )
{
b[i] += A->val[si] * x[i];
Kurt A. O'Hearn
committed
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
}
}
}
#else
num_rows = A->n;
Kurt A. O'Hearn
committed
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* A symmetric, upper triangular portion stored
* => diagonal only contributes once */
b[i] += A->val[si] * x[i];
Kurt A. O'Hearn
committed
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* x: dense vector
* buf_type: data structure type for x
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
Kurt A. O'Hearn
committed
static void Sparse_MatVec_Comm_Part1( const reax_system * const system,
Kurt A. O'Hearn
committed
const control_params * const control, mpi_datatypes * const mpi_data,
void const * const x, int buf_type, MPI_Datatype mpi_type )
{
Kurt A. O'Hearn
committed
/* exploit 3D domain decomposition of simulation space with 3-stage communication pattern */
Kurt A. O'Hearn
committed
Dist( system, mpi_data, x, buf_type, mpi_type );
}
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* mat_format: storage type of sparse matrix A
* b: dense vector
* buf_type: data structure type for b
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
Kurt A. O'Hearn
committed
static void Sparse_MatVec_Comm_Part2( const reax_system * const system,
Kurt A. O'Hearn
committed
const control_params * const control, mpi_datatypes * const mpi_data,
int mat_format, void * const b, int buf_type, MPI_Datatype mpi_type )
{
if ( mat_format == SYM_HALF_MATRIX )
{
Kurt A. O'Hearn
committed
Coll( system, mpi_data, b, buf_type, mpi_type );
}
#if defined(NEUTRAL_TERRITORY)
else
{
Coll( system, mpi_data, b, buf_type, mpi_type );
}
Kurt A. O'Hearn
committed
#endif
}
Kurt A. O'Hearn
committed
void setup_sparse_approx_inverse( reax_system const * const system,
Kurt A. O'Hearn
committed
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix *A_spar_patt,
int nprocs, real filter )
Kurt A. O'Hearn
committed
int i, ret, bin, total, pos;
Kurt A. O'Hearn
committed
int n, n_gather, s_local, s, n_local;
Kurt A. O'Hearn
committed
int k, pj, size;
int left, right, p, turn;
Kurt A. O'Hearn
committed
int num_rows;
Kurt A. O'Hearn
committed
int *srecv, *sdispls;
int *scounts_local, *scounts;
int *dspls_local, *dspls;
int *bin_elements;
real threshold, pivot, tmp;
real *input_array;
real *samplelist_local, *samplelist;
real *pivotlist;
real *bucketlist_local, *bucketlist;
Kurt A. O'Hearn
committed
real t_start, t_comm;
Kurt A. O'Hearn
committed
real total_comm;
srecv = NULL;
sdispls = NULL;
samplelist_local = NULL;
samplelist = NULL;
pivotlist = NULL;
input_array = NULL;
bucketlist_local = NULL;
bucketlist = NULL;
scounts_local = NULL;
scounts = NULL;
dspls_local = NULL;
dspls = NULL;
bin_elements = NULL;
Kurt A. O'Hearn
committed
t_comm = 0.0;
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
fprintf( stdout,"%d %d %d\n", A->n, A->NT, A->m );
fflush( stdout );
#else
num_rows = A->n;
#endif
Kurt A. O'Hearn
committed
if ( A_spar_patt->allocated == FALSE )
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
else /*if ( (*A_spar_patt)->m < A->m )*/
Kurt A. O'Hearn
committed
Deallocate_Matrix( A_spar_patt );
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
n_local = 0;
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
n_local += (A->end[i] - A->start[i] + 9) / 10;
Kurt A. O'Hearn
committed
s_local = (int) (12.0 * (LOG2(n_local) + LOG2(nprocs)));
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Allreduce( &n_local, &n, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Reduce( &s_local, &s, 1, MPI_INT, MPI_SUM, MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
/* count num. bin elements for each processor, uniform bin sizes */
input_array = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::input_array" );
scounts_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts_local" );
scounts = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts" );
bin_elements = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::bin_elements" );
dspls_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::displs_local" );
bucketlist_local = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::bucketlist_local" );
dspls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::dspls" );
if ( nprocs > 1 )
Kurt A. O'Hearn
committed
pivotlist = smalloc( sizeof(real) * (nprocs - 1),
"setup_sparse_approx_inverse::pivotlist" );
Kurt A. O'Hearn
committed
samplelist_local = smalloc( sizeof(real) * s_local,
"setup_sparse_approx_inverse::samplelist_local" );
if ( system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
samplelist = smalloc( sizeof(real) * s,
"setup_sparse_approx_inverse::samplelist" );
srecv = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::srecv" );
sdispls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::sdispls" );
Kurt A. O'Hearn
committed
n_local = 0;
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
for ( pj = A->start[i]; pj < A->end[i]; pj += 10 )
{
input_array[n_local++] = A->val[pj];
Kurt A. O'Hearn
committed
}
}
for ( i = 0; i < s_local; i++)
{
Kurt A. O'Hearn
committed
/* samplelist_local[i] = input_array[rand( ) % n_local]; */
samplelist_local[i] = input_array[ i ];
}
/* gather samples at the root process */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gather( &s_local, 1, MPI_INT, srecv, 1, MPI_INT, MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
if( system->my_rank == MASTER_NODE )
{
sdispls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
sdispls[i + 1] = sdispls[i] + srecv[i];
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gatherv( samplelist_local, s_local, MPI_DOUBLE,
samplelist, srecv, sdispls, MPI_DOUBLE, MASTER_NODE, MPI_COMM_WORLD);
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
/* sort samples at the root process and select pivots */
if ( system->my_rank == MASTER_NODE )
{
qsort_dbls( samplelist, s );
Kurt A. O'Hearn
committed
for ( i = 1; i < nprocs; ++i )
{
pivotlist[i - 1] = samplelist[(i * s) / nprocs];
}
}
/* broadcast pivots */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
ret = MPI_Bcast( pivotlist, nprocs - 1, MPI_DOUBLE, MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs; ++i )
{
scounts_local[i] = 0;
}
Kurt A. O'Hearn
committed
for ( i = 0; i < n_local; ++i )
{
pos = find_bucket( pivotlist, nprocs - 1, input_array[i] );
scounts_local[pos]++;
}
for ( i = 0; i < nprocs; ++i )
{
bin_elements[i] = scounts_local[i];
scounts[i] = scounts_local[i];
}
/* compute displacements for MPI comm */
dspls_local[0] = 0;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs - 1; ++i )
{
dspls_local[i + 1] = dspls_local[i] + scounts_local[i];
}
/* bin elements */
for ( i = 0; i < n_local; ++i )
{
bin = find_bucket( pivotlist, nprocs - 1, input_array[i] );
pos = dspls_local[bin] + scounts_local[bin] - bin_elements[bin];
bucketlist_local[pos] = input_array[i];
bin_elements[bin]--;
}
/* determine counts for elements per process */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Allreduce( MPI_IN_PLACE, scounts, nprocs, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
/* find the target process */
target_proc = 0;
total = 0;
Kurt A. O'Hearn
committed
k = n * filter;
for ( i = nprocs - 1; i >= 0; --i )
Kurt A. O'Hearn
committed
if ( total + scounts[i] >= k )
{
/* global k becomes local k*/
Kurt A. O'Hearn
committed
k -= total;
target_proc = i;
break;
}
total += scounts[i];
}
Kurt A. O'Hearn
committed
n_gather = scounts[target_proc];
if ( system->my_rank == target_proc )
{
bucketlist = smalloc( sizeof( real ) * n_gather,
"setup_sparse_approx_inverse::bucketlist" );
}
Kurt A. O'Hearn
committed
/* send local buckets to target processor for quickselect */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gather( scounts_local + target_proc, 1, MPI_INT, scounts,
1, MPI_INT, target_proc, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
if ( system->my_rank == target_proc )
{
dspls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
dspls[i + 1] = dspls[i] + scounts[i];
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gatherv( bucketlist_local + dspls_local[target_proc], scounts_local[target_proc], MPI_DOUBLE,
bucketlist, scounts, dspls, MPI_DOUBLE, target_proc, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
/* apply quick select algorithm at the target process */
if ( system->my_rank == target_proc )
Kurt A. O'Hearn
committed
right = n_gather-1;
Kurt A. O'Hearn
committed
while ( k )
{
p = left;
turn = 1 - turn;
Kurt A. O'Hearn
committed
/* alternating pivots in order to handle corner cases */
if ( turn == 1 )
{
pivot = bucketlist[right];
}
else
{
pivot = bucketlist[left];
}
Kurt A. O'Hearn
committed
for ( i = left + 1 - turn; i <= right-turn; ++i )
Kurt A. O'Hearn
committed
if ( bucketlist[i] > pivot )
{
tmp = bucketlist[i];
bucketlist[i] = bucketlist[p];
bucketlist[p] = tmp;
p++;
}
}
Kurt A. O'Hearn
committed
if ( turn == 1 )
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[right];
bucketlist[right] = tmp;
}
else
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[left];
bucketlist[left] = tmp;
}
Kurt A. O'Hearn
committed
if ( p == k - 1)
{
threshold = bucketlist[p];
break;
}
Kurt A. O'Hearn
committed
else if ( p > k - 1 )
{
right = p - 1;
}
else
{
left = p + 1;
}
}
Kurt A. O'Hearn
committed
/* comment out if ACKS2 and/or EE is not an option */
// if ( threshold < 1.000000 )
// {
// threshold = 1.000001;
// }
Kurt A. O'Hearn
committed
/* broadcast the filtering value */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
ret = MPI_Bcast( &threshold, 1, MPI_DOUBLE, target_proc, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
int nnz = 0;
#endif
/* build entries of that pattern*/
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
A_spar_patt->start[i] = A->start[i];
size = A->start[i];
for ( pj = A->start[i]; pj < A->end[i]; ++pj )
{
if ( ( A->val[pj] >= threshold ) || ( A->j[pj] == i ) )
A_spar_patt->val[size] = A->val[pj];
A_spar_patt->j[size] = A->j[pj];
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
nnz++;
#endif
Kurt A. O'Hearn
committed
A_spar_patt->end[i] = size;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, &nnz, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
{
fprintf( stdout, " [INFO] \ntotal nnz in all charge matrices = %d\ntotal nnz in all sparsity patterns = %d\nthreshold = %.15lf\n",
n, nnz, threshold );
fflush( stdout );
}
#endif
Kurt A. O'Hearn
committed
ret = MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM,
MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
{
data->timing.cm_solver_comm += total_comm / nprocs;
}
sfree( input_array, "setup_sparse_approx_inverse::input_array" );
sfree( scounts_local, "setup_sparse_approx_inverse::scounts_local" );
sfree( scounts, "setup_sparse_approx_inverse::scounts" );
sfree( bin_elements, "setup_sparse_approx_inverse::bin_elements" );
sfree( dspls_local, "setup_sparse_approx_inverse::displs_local" );
sfree( bucketlist_local, "setup_sparse_approx_inverse::bucketlist_local" );
sfree( dspls, "setup_sparse_approx_inverse::dspls" );
Kurt A. O'Hearn
committed
if ( nprocs > 1 )
Kurt A. O'Hearn
committed
{
sfree( pivotlist, "setup_sparse_approx_inverse::pivotlist" );
}
sfree( samplelist_local, "setup_sparse_approx_inverse::samplelist_local" );
if ( system->my_rank == MASTER_NODE )
{
sfree( samplelist, "setup_sparse_approx_inverse::samplelist" );
sfree( srecv, "setup_sparse_approx_inverse::srecv" );
sfree( sdispls, "setup_sparse_approx_inverse::sdispls" );
}
if ( system->my_rank == target_proc )
{
sfree( bucketlist, "setup_sparse_approx_inverse::bucketlist" );
}
}
#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
#if defined(NEUTRAL_TERRITORY)
real sparse_approx_inverse( reax_system const * const system,
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix * const A_spar_patt,
sparse_matrix * const A_app_inv, int nprocs )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, k, pj, j_temp, ret;
Kurt A. O'Hearn
committed
int N, M, d_i, d_j;
int local_pos, atom_pos, identity_pos;
int *pos_x, *X;
Kurt A. O'Hearn
committed
int *row_nnz;
Kurt A. O'Hearn
committed
int d, count, index;
Kurt A. O'Hearn
committed
int *j_send, *j_recv[6];
real *e_j, *dense_matrix;
real *val_send, *val_recv[6];
reax_atom *atom;
real **val_list;
real start, t_start, t_comm, total_comm;
mpi_out_data *out_bufs;
Kurt A. O'Hearn
committed
neighbor_proc *nbr;
Kurt A. O'Hearn
committed
MPI_Request req[12];
MPI_Status stat[12];
Kurt A. O'Hearn
committed
lapack_int m, n, nrhs, lda, ldb, info;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
start = Get_Time( );
Kurt A. O'Hearn
committed
t_comm = 0.0;
Kurt A. O'Hearn
committed
if ( A_app_inv->allocated == FALSE )
Kurt A. O'Hearn
committed
{
//TODO: FULL_MATRIX?
Allocate_Matrix( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, SYM_FULL_MATRIX );
}
else /* if ( A_app_inv->m < A_spar_patt->m ) */
Kurt A. O'Hearn
committed
{
Deallocate_Matrix( A_app_inv );
Kurt A. O'Hearn
committed
Allocate_Matrix( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, SYM_FULL_MATRIX );
}
Kurt A. O'Hearn
committed
pos_x = NULL;
X = NULL;
Kurt A. O'Hearn
committed
row_nnz = NULL;
j_list = NULL;
val_list = NULL;
Kurt A. O'Hearn
committed
j_send = NULL;
val_send = NULL;
for ( d = 0; d < 6; ++d )
{
j_recv[d] = NULL;
val_recv[d] = NULL;
Kurt A. O'Hearn
committed
////////////////////
row_nnz = (int *) malloc( sizeof(int) * A->NT );
Kurt A. O'Hearn
committed
//TODO: allocation size
j_list = (int **) malloc( sizeof(int *) * system->N );
val_list = (real **) malloc( sizeof(real *) * system->N );
Kurt A. O'Hearn
committed
for ( i = 0; i < A->NT; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = 0;
Kurt A. O'Hearn
committed
/* mark the atoms that already have their row stored in the local matrix */
for ( i = 0; i < A->n; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = A->end[i] - A->start[i];
Kurt A. O'Hearn
committed
/* Announce the nnz's in each row that will be communicated later */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
Dist( system, mpi_data, row_nnz, REAL_PTR_TYPE, MPI_INT );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
fprintf( stdout,"SAI after Dist call\n");
fflush( stdout );
Kurt A. O'Hearn
committed
out_bufs = mpi_data->out_nt_buffers;
count = 0;
/* use a Dist-like approach to send the row information */
for ( d = 0; d < 6; ++d)
{
/* initiate recvs */
Kurt A. O'Hearn
committed
nbr = &system->my_nt_nbrs[d];
if ( nbr->atoms_cnt )
Kurt A. O'Hearn
committed
/* calculate the total data that will be received */
Kurt A. O'Hearn
committed
for ( i = nbr->atoms_str; i < (nbr->atoms_str + nbr->atoms_cnt); ++i )
Kurt A. O'Hearn
committed
cnt += row_nnz[i];
Kurt A. O'Hearn
committed
/* initiate Irecv */
if ( cnt )
Kurt A. O'Hearn
committed
count += 2;
Kurt A. O'Hearn
committed
j_recv[d] = (int *) malloc( sizeof(int) * cnt );
val_recv[d] = (real *) malloc( sizeof(real) * cnt );
Kurt A. O'Hearn
committed
fprintf( stdout,"Dist communication receive phase direction %d will receive %d\n", d, cnt);
fflush( stdout );
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Irecv( j_recv + d, cnt, MPI_INT, nbr->receive_rank,
d, mpi_data->comm_mesh3D, &req[2 * d] );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );