Newer
Older
/*----------------------------------------------------------------------
PuReMD - Purdue ReaxFF Molecular Dynamics Program
Copyright (2010) Purdue University
Hasan Metin Aktulga, haktulga@cs.purdue.edu
Joseph Fogarty, jcfogart@mail.usf.edu
Sagar Pandit, pandit@usf.edu
Ananth Y Grama, ayg@cs.purdue.edu
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
See the GNU General Public License for more details:
<http://www.gnu.org/licenses/>.
----------------------------------------------------------------------*/
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
#include "allocate.h"
Kurt A. O'Hearn
committed
#include "comm_tools.h"
#include "io_tools.h"
#include "tool_box.h"
#include "vector.h"
Kurt A. O'Hearn
committed
/* Intel MKL */
#if defined(HAVE_LAPACKE_MKL)
#include "mkl.h"
/* reference LAPACK */
#elif defined(HAVE_LAPACKE)
#include "lapacke.h"
#endif
typedef struct
{
unsigned int j;
real val;
} sparse_matrix_entry;
enum preconditioner_type
{
LEFT = 0,
RIGHT = 1,
};
Kurt A. O'Hearn
committed
static int compare_matrix_entry( const void * const v1, const void * const v2 )
{
return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j;
}
/* Routine used for sorting nonzeros within a sparse matrix row;
* internally, a combination of qsort and manual sorting is utilized
*
* A: sparse matrix for which to sort nonzeros within a row, stored in CSR format
*/
Kurt A. O'Hearn
committed
void Sort_Matrix_Rows( sparse_matrix * const A )
Kurt A. O'Hearn
committed
{
unsigned int i, pj, si, ei, temp_size;
sparse_matrix_entry *temp;
temp = NULL;
temp_size = 0;
Kurt A. O'Hearn
committed
/* sort each row of A using column indices */
Kurt A. O'Hearn
committed
for ( i = 0; i < A->n; ++i )
{
si = A->start[i];
ei = A->end[i];
if ( temp == NULL )
{
temp = smalloc( sizeof(sparse_matrix_entry) * (ei - si), "Sort_Matrix_Rows::temp" );
temp_size = ei - si;
}
else if ( temp_size < ei - si )
{
sfree( temp, "Sort_Matrix_Rows::temp" );
temp = smalloc( sizeof(sparse_matrix_entry) * (ei - si), "Sort_Matrix_Rows::temp" );
temp_size = ei - si;
}
for ( pj = 0; pj < (ei - si); ++pj )
{
temp[pj].j = A->j[si + pj];
temp[pj].val = A->val[si + pj];
}
/* polymorphic sort in standard C library using column indices */
qsort( temp, ei - si, sizeof(sparse_matrix_entry), compare_matrix_entry );
for ( pj = 0; pj < (ei - si); ++pj )
{
A->j[si + pj] = temp[pj].j;
A->val[si + pj] = temp[pj].val;
}
Kurt A. O'Hearn
committed
}
sfree( temp, "Sort_Matrix_Rows::temp" );
Kurt A. O'Hearn
committed
}
static int compare_dbls( const void* arg1, const void* arg2 )
{
Kurt A. O'Hearn
committed
int ret;
double a1, a2;
a1 = *(double *) arg1;
a2 = *(double *) arg2;
if ( a1 < a2 )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = -1;
}
else if (a1 == a2)
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 0;
}
else
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
ret = 1;
}
return ret;
}
static void qsort_dbls( double *array, int array_len )
{
Kurt A. O'Hearn
committed
qsort( array, (size_t) array_len, sizeof(double),
Kurt A. O'Hearn
committed
compare_dbls );
}
static int find_bucket( double *list, int len, double a )
{
int s, e, m;
Kurt A. O'Hearn
committed
if ( len == 0 )
{
return 0;
}
Kurt A. O'Hearn
committed
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
if ( a > list[len - 1] )
{
return len;
}
s = 0;
e = len - 1;
while ( s < e )
{
m = (s + e) / 2;
if ( list[m] < a )
{
s = m + 1;
}
else
{
e = m;
}
}
return s;
}
Kurt A. O'Hearn
committed
/* Compute diagonal inverese (Jacobi) preconditioner
*
* H: matrix used to compute preconditioner, in CSR format
* Hdia_inv: computed diagonal inverse preconditioner
*/
void jacobi( sparse_matrix const * const H, real * const Hdia_inv )
{
unsigned int i, pj;
if ( H->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < H->n; ++i )
{
if ( FABS( H->val[H->start[i]] ) > 1.0e-15 )
{
Hdia_inv[i] = 1.0 / H->val[H->start[i]];
}
else
{
Hdia_inv[i] = 1.0;
}
}
}
Kurt A. O'Hearn
committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
else if ( H->format == SYM_FULL_MATRIX || H->format == FULL_MATRIX )
{
for ( i = 0; i < H->n; ++i )
{
for ( pj = H->start[i]; pj < H->start[i + 1]; ++pj )
{
if ( H->j[pj] == i )
{
if ( FABS( H->val[H->start[i]] ) > 1.0e-15 )
{
Hdia_inv[i] = 1.0 / H->val[pj];
}
else
{
Hdia_inv[i] = 1.0;
}
break;
}
}
}
}
}
/* Apply diagonal inverse (Jacobi) preconditioner to system residual
*
* Hdia_inv: diagonal inverse preconditioner (constructed using H)
* y: current residuals
* x: preconditioned residuals
* N: dimensions of preconditioner and vectors (# rows in H)
*/
static void dual_jacobi_app( const real * const Hdia_inv, const rvec2 * const y,
rvec2 * const x, const int N )
Kurt A. O'Hearn
committed
{
unsigned int i;
Kurt A. O'Hearn
committed
for ( i = 0; i < N; ++i )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
x[i][0] = y[i][0] * Hdia_inv[i];
x[i][1] = y[i][1] * Hdia_inv[i];
Kurt A. O'Hearn
committed
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
}
}
/* Apply diagonal inverse (Jacobi) preconditioner to system residual
*
* Hdia_inv: diagonal inverse preconditioner (constructed using H)
* y: current residual
* x: preconditioned residual
* N: dimensions of preconditioner and vectors (# rows in H)
*/
static void jacobi_app( const real * const Hdia_inv, const real * const y,
real * const x, const int N )
{
unsigned int i;
for ( i = 0; i < N; ++i )
{
x[i] = y[i] * Hdia_inv[i];
}
}
/* Local arithmetic portion of dual sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: two dense vectors
* b (output): two dense vectors
* N: number of entries in both vectors in b (must be equal)
*/
Kurt A. O'Hearn
committed
static void Dual_Sparse_MatVec_local( sparse_matrix const * const A,
Kurt A. O'Hearn
committed
rvec2 const * const x, rvec2 * const b, int N )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
Kurt A. O'Hearn
committed
real val;
for ( i = 0; i < N; ++i )
{
Kurt A. O'Hearn
committed
b[i][0] = 0.0;
b[i][1] = 0.0;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* diagonal only contributes once */
Kurt A. O'Hearn
committed
if ( i < A->n )
Kurt A. O'Hearn
committed
{
b[i][0] += A->val[si] * x[i][0];
b[i][1] += A->val[si] * x[i][1];
Kurt A. O'Hearn
committed
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
}
}
}
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
num_rows = A->n;
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
b[i][0] += A->val[si] * x[i][0];
b[i][1] += A->val[si] * x[i][1];
Kurt A. O'Hearn
committed
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
b[j][0] += val * x[i][0];
b[j][1] += val * x[i][1];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
Kurt A. O'Hearn
committed
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
b[i][0] += val * x[j][0];
b[i][1] += val * x[j][1];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* x: dense vector
* buf_type: data structure type for x
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
static void Sparse_MatVec_Comm_Part1( const reax_system * const system,
const control_params * const control, mpi_datatypes * const mpi_data,
void const * const x, int buf_type, MPI_Datatype mpi_type )
{
/* exploit 3D domain decomposition of simulation space with 3-stage communication pattern */
Dist( system, mpi_data, x, buf_type, mpi_type );
}
Kurt A. O'Hearn
committed
/* Local arithmetic portion of sparse matrix-dense vector multiplication Ax = b
*
* A: sparse matrix, 1D partitioned row-wise
* x: dense vector
* b (output): dense vector
* N: number of entries in b
*/
static void Sparse_MatVec_local( sparse_matrix const * const A,
real const * const x, real * const b, int N )
Kurt A. O'Hearn
committed
int i, j, k, si, num_rows;
real val;
Kurt A. O'Hearn
committed
b[i] = 0.0;
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
if ( A->format == SYM_HALF_MATRIX )
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* diagonal only contributes once */
if ( i < A->n )
{
b[i] += A->val[si] * x[i];
Kurt A. O'Hearn
committed
k = si + 1;
}
/* zeros on the diagonal for i >= A->n,
* so skip the diagonal multplication step as zeros
* are not stored (idea: keep the NNZ's the same
* for full shell and neutral territory half-stored
* charge matrices to make debugging easier) */
else
{
k = si;
}
for ( ; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
}
}
}
#else
num_rows = A->n;
Kurt A. O'Hearn
committed
if ( A->format == SYM_HALF_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
/* A symmetric, upper triangular portion stored
* => diagonal only contributes once */
b[i] += A->val[si] * x[i];
Kurt A. O'Hearn
committed
for ( k = si + 1; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
b[j] += val * x[i];
}
}
}
else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX )
{
for ( i = 0; i < num_rows; ++i )
{
si = A->start[i];
for ( k = si; k < A->end[i]; ++k )
{
j = A->j[k];
val = A->val[k];
Kurt A. O'Hearn
committed
b[i] += val * x[j];
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
#endif
Kurt A. O'Hearn
committed
/* Communications for sparse matrix-dense vector multiplication Ax = b
*
* system:
* control:
* mpi_data:
* mat_format: storage type of sparse matrix A
* b: dense vector
* buf_type: data structure type for b
* mpi_type: MPI_Datatype struct for communications
*
* returns: communication time
*/
Kurt A. O'Hearn
committed
static void Sparse_MatVec_Comm_Part2( const reax_system * const system,
Kurt A. O'Hearn
committed
const control_params * const control, mpi_datatypes * const mpi_data,
int mat_format, void * const b, int buf_type, MPI_Datatype mpi_type )
{
if ( mat_format == SYM_HALF_MATRIX )
{
Kurt A. O'Hearn
committed
Coll( system, mpi_data, b, buf_type, mpi_type );
}
#if defined(NEUTRAL_TERRITORY)
else
{
Coll( system, mpi_data, b, buf_type, mpi_type );
}
Kurt A. O'Hearn
committed
#endif
}
Kurt A. O'Hearn
committed
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
/* sparse matrix, dense vector multiplication AX = B
*
* system:
* control:
* data:
* A: symmetric matrix, stored in CSR format
* X: dense vector
* n: number of entries in x
* B (output): dense vector */
static void Dual_Sparse_MatVec( reax_system const * const system,
control_params const * const control, simulation_data * const data,
mpi_datatypes * const mpi_data, sparse_matrix const * const A,
rvec2 const * const x, int n, rvec2 * const b )
{
#if defined(LOG_PERFORMANCE)
real time;
time = Get_Time( );
#endif
Sparse_MatVec_Comm_Part1( system, control, mpi_data, x,
RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Dual_Sparse_MatVec_local( A, x, b, n );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data, A->format, b,
RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
}
/* sparse matrix, dense vector multiplication Ax = b
*
* system:
* control:
* data:
* A: symmetric matrix, stored in CSR format
* x: dense vector
* n: number of entries in x
* b (output): dense vector */
static void Sparse_MatVec( reax_system const * const system,
control_params const * const control, simulation_data * const data,
mpi_datatypes * const mpi_data, sparse_matrix const * const A,
real const * const x, int n, real * const b )
{
#if defined(LOG_PERFORMANCE)
real time;
time = Get_Time( );
#endif
Sparse_MatVec_Comm_Part1( system, control, mpi_data, x,
REAL_PTR_TYPE, MPI_DOUBLE );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
Sparse_MatVec_local( A, x, b, n );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_spmv );
#endif
Sparse_MatVec_Comm_Part2( system, control, mpi_data, A->format, b,
REAL_PTR_TYPE, MPI_DOUBLE );
#if defined(LOG_PERFORMANCE)
Update_Timing_Info( &time, &data->timing.cm_solver_comm );
#endif
}
Kurt A. O'Hearn
committed
void setup_sparse_approx_inverse( reax_system const * const system,
Kurt A. O'Hearn
committed
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix *A_spar_patt,
int nprocs, real filter )
Kurt A. O'Hearn
committed
int i, ret, bin, total, pos;
Kurt A. O'Hearn
committed
int n, n_gather, s_local, s, n_local;
Kurt A. O'Hearn
committed
int k, pj, size;
int left, right, p, turn;
Kurt A. O'Hearn
committed
int num_rows;
Kurt A. O'Hearn
committed
int *srecv, *sdispls;
int *scounts_local, *scounts;
int *dspls_local, *dspls;
int *bin_elements;
real threshold, pivot, tmp;
real *input_array;
real *samplelist_local, *samplelist;
real *pivotlist;
real *bucketlist_local, *bucketlist;
Kurt A. O'Hearn
committed
real t_start, t_comm;
Kurt A. O'Hearn
committed
real total_comm;
srecv = NULL;
sdispls = NULL;
samplelist_local = NULL;
samplelist = NULL;
pivotlist = NULL;
input_array = NULL;
bucketlist_local = NULL;
bucketlist = NULL;
scounts_local = NULL;
scounts = NULL;
dspls_local = NULL;
dspls = NULL;
bin_elements = NULL;
Kurt A. O'Hearn
committed
t_comm = 0.0;
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
num_rows = A->NT;
fprintf( stdout,"%d %d %d\n", A->n, A->NT, A->m );
fflush( stdout );
#else
num_rows = A->n;
#endif
Kurt A. O'Hearn
committed
if ( A_spar_patt->allocated == FALSE )
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
else /*if ( (*A_spar_patt)->m < A->m )*/
Kurt A. O'Hearn
committed
Deallocate_Matrix( A_spar_patt );
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Allocate_Matrix( A_spar_patt, A->n, A->NT, A->m, A->format );
#else
Allocate_Matrix( A_spar_patt, A->n, system->local_cap, A->m, A->format );
#endif
Kurt A. O'Hearn
committed
n_local = 0;
Kurt A. O'Hearn
committed
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
n_local += (A->end[i] - A->start[i] + 9) / 10;
Kurt A. O'Hearn
committed
s_local = (int) (12.0 * (LOG2(n_local) + LOG2(nprocs)));
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Allreduce( &n_local, &n, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Reduce( &s_local, &s, 1, MPI_INT, MPI_SUM, MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
/* count num. bin elements for each processor, uniform bin sizes */
input_array = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::input_array" );
scounts_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts_local" );
scounts = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::scounts" );
bin_elements = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::bin_elements" );
dspls_local = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::displs_local" );
bucketlist_local = smalloc( sizeof(real) * n_local,
"setup_sparse_approx_inverse::bucketlist_local" );
dspls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::dspls" );
if ( nprocs > 1 )
Kurt A. O'Hearn
committed
pivotlist = smalloc( sizeof(real) * (nprocs - 1),
"setup_sparse_approx_inverse::pivotlist" );
Kurt A. O'Hearn
committed
samplelist_local = smalloc( sizeof(real) * s_local,
"setup_sparse_approx_inverse::samplelist_local" );
if ( system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
samplelist = smalloc( sizeof(real) * s,
"setup_sparse_approx_inverse::samplelist" );
srecv = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::srecv" );
sdispls = smalloc( sizeof(int) * nprocs,
"setup_sparse_approx_inverse::sdispls" );
Kurt A. O'Hearn
committed
n_local = 0;
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
for ( pj = A->start[i]; pj < A->end[i]; pj += 10 )
{
input_array[n_local++] = A->val[pj];
Kurt A. O'Hearn
committed
}
}
for ( i = 0; i < s_local; i++)
{
Kurt A. O'Hearn
committed
/* samplelist_local[i] = input_array[rand( ) % n_local]; */
samplelist_local[i] = input_array[ i ];
}
/* gather samples at the root process */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gather( &s_local, 1, MPI_INT, srecv, 1, MPI_INT, MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
if( system->my_rank == MASTER_NODE )
{
sdispls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
sdispls[i + 1] = sdispls[i] + srecv[i];
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gatherv( samplelist_local, s_local, MPI_DOUBLE,
samplelist, srecv, sdispls, MPI_DOUBLE, MASTER_NODE, MPI_COMM_WORLD);
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
/* sort samples at the root process and select pivots */
if ( system->my_rank == MASTER_NODE )
{
qsort_dbls( samplelist, s );
Kurt A. O'Hearn
committed
for ( i = 1; i < nprocs; ++i )
{
pivotlist[i - 1] = samplelist[(i * s) / nprocs];
}
}
/* broadcast pivots */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
ret = MPI_Bcast( pivotlist, nprocs - 1, MPI_DOUBLE, MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs; ++i )
{
scounts_local[i] = 0;
}
Kurt A. O'Hearn
committed
for ( i = 0; i < n_local; ++i )
{
pos = find_bucket( pivotlist, nprocs - 1, input_array[i] );
scounts_local[pos]++;
}
for ( i = 0; i < nprocs; ++i )
{
bin_elements[i] = scounts_local[i];
scounts[i] = scounts_local[i];
}
/* compute displacements for MPI comm */
dspls_local[0] = 0;
Kurt A. O'Hearn
committed
for ( i = 0; i < nprocs - 1; ++i )
{
dspls_local[i + 1] = dspls_local[i] + scounts_local[i];
}
/* bin elements */
for ( i = 0; i < n_local; ++i )
{
bin = find_bucket( pivotlist, nprocs - 1, input_array[i] );
pos = dspls_local[bin] + scounts_local[bin] - bin_elements[bin];
bucketlist_local[pos] = input_array[i];
bin_elements[bin]--;
}
/* determine counts for elements per process */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Allreduce( MPI_IN_PLACE, scounts, nprocs, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
/* find the target process */
target_proc = 0;
total = 0;
Kurt A. O'Hearn
committed
k = n * filter;
for ( i = nprocs - 1; i >= 0; --i )
Kurt A. O'Hearn
committed
if ( total + scounts[i] >= k )
{
/* global k becomes local k*/
Kurt A. O'Hearn
committed
k -= total;
target_proc = i;
break;
}
total += scounts[i];
}
Kurt A. O'Hearn
committed
n_gather = scounts[target_proc];
if ( system->my_rank == target_proc )
{
bucketlist = smalloc( sizeof( real ) * n_gather,
"setup_sparse_approx_inverse::bucketlist" );
}
Kurt A. O'Hearn
committed
/* send local buckets to target processor for quickselect */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gather( scounts_local + target_proc, 1, MPI_INT, scounts,
1, MPI_INT, target_proc, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
if ( system->my_rank == target_proc )
{
dspls[0] = 0;
for ( i = 0; i < nprocs - 1; ++i )
{
dspls[i + 1] = dspls[i] + scounts[i];
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Gatherv( bucketlist_local + dspls_local[target_proc], scounts_local[target_proc], MPI_DOUBLE,
bucketlist, scounts, dspls, MPI_DOUBLE, target_proc, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
/* apply quick select algorithm at the target process */
if ( system->my_rank == target_proc )
Kurt A. O'Hearn
committed
right = n_gather-1;
Kurt A. O'Hearn
committed
while ( k )
{
p = left;
turn = 1 - turn;
Kurt A. O'Hearn
committed
/* alternating pivots in order to handle corner cases */
if ( turn == 1 )
{
pivot = bucketlist[right];
}
else
{
pivot = bucketlist[left];
}
Kurt A. O'Hearn
committed
for ( i = left + 1 - turn; i <= right-turn; ++i )
Kurt A. O'Hearn
committed
if ( bucketlist[i] > pivot )
{
tmp = bucketlist[i];
bucketlist[i] = bucketlist[p];
bucketlist[p] = tmp;
p++;
}
}
Kurt A. O'Hearn
committed
if ( turn == 1 )
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[right];
bucketlist[right] = tmp;
}
else
{
tmp = bucketlist[p];
bucketlist[p] = bucketlist[left];
bucketlist[left] = tmp;
}
Kurt A. O'Hearn
committed
if ( p == k - 1)
{
threshold = bucketlist[p];
break;
}
Kurt A. O'Hearn
committed
else if ( p > k - 1 )
{
right = p - 1;
}
else
{
left = p + 1;
}
}
Kurt A. O'Hearn
committed
/* comment out if ACKS2 and/or EE is not an option */
// if ( threshold < 1.000000 )
// {
// threshold = 1.000001;
// }
Kurt A. O'Hearn
committed
/* broadcast the filtering value */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
ret = MPI_Bcast( &threshold, 1, MPI_DOUBLE, target_proc, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
int nnz = 0;
#endif
/* build entries of that pattern*/
for ( i = 0; i < num_rows; ++i )
Kurt A. O'Hearn
committed
A_spar_patt->start[i] = A->start[i];
size = A->start[i];
for ( pj = A->start[i]; pj < A->end[i]; ++pj )
{
if ( ( A->val[pj] >= threshold ) || ( A->j[pj] == i ) )
A_spar_patt->val[size] = A->val[pj];
A_spar_patt->j[size] = A->j[pj];
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
nnz++;
#endif
Kurt A. O'Hearn
committed
A_spar_patt->end[i] = size;
Kurt A. O'Hearn
committed
#if defined(DEBUG_FOCUS)
Kurt A. O'Hearn
committed
ret = MPI_Allreduce( MPI_IN_PLACE, &nnz, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
{
fprintf( stdout, " [INFO] \ntotal nnz in all charge matrices = %d\ntotal nnz in all sparsity patterns = %d\nthreshold = %.15lf\n",
n, nnz, threshold );
fflush( stdout );
}
#endif
Kurt A. O'Hearn
committed
ret = MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM,
MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
Kurt A. O'Hearn
committed
{
data->timing.cm_solver_comm += total_comm / nprocs;
}
sfree( input_array, "setup_sparse_approx_inverse::input_array" );
sfree( scounts_local, "setup_sparse_approx_inverse::scounts_local" );
sfree( scounts, "setup_sparse_approx_inverse::scounts" );
sfree( bin_elements, "setup_sparse_approx_inverse::bin_elements" );
sfree( dspls_local, "setup_sparse_approx_inverse::displs_local" );
sfree( bucketlist_local, "setup_sparse_approx_inverse::bucketlist_local" );
sfree( dspls, "setup_sparse_approx_inverse::dspls" );
Kurt A. O'Hearn
committed
if ( nprocs > 1 )
Kurt A. O'Hearn
committed
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
{
sfree( pivotlist, "setup_sparse_approx_inverse::pivotlist" );
}
sfree( samplelist_local, "setup_sparse_approx_inverse::samplelist_local" );
if ( system->my_rank == MASTER_NODE )
{
sfree( samplelist, "setup_sparse_approx_inverse::samplelist" );
sfree( srecv, "setup_sparse_approx_inverse::srecv" );
sfree( sdispls, "setup_sparse_approx_inverse::sdispls" );
}
if ( system->my_rank == target_proc )
{
sfree( bucketlist, "setup_sparse_approx_inverse::bucketlist" );
}
}
#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
#if defined(NEUTRAL_TERRITORY)
real sparse_approx_inverse( reax_system const * const system,
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix * const A_spar_patt,
sparse_matrix * const A_app_inv, int nprocs )
Kurt A. O'Hearn
committed
{
Kurt A. O'Hearn
committed
int i, k, pj, j_temp, ret;
Kurt A. O'Hearn
committed
int N, M, d_i, d_j;
int local_pos, atom_pos, identity_pos;
int *pos_x, *X;
Kurt A. O'Hearn
committed
int *row_nnz;
Kurt A. O'Hearn
committed
int d, count, index;
Kurt A. O'Hearn
committed
int *j_send, *j_recv[6];
real *e_j, *dense_matrix;
real *val_send, *val_recv[6];
reax_atom *atom;
real **val_list;
real start, t_start, t_comm, total_comm;
mpi_out_data *out_bufs;
Kurt A. O'Hearn
committed
neighbor_proc *nbr;
Kurt A. O'Hearn
committed
MPI_Request req[12];
MPI_Status stat[12];
Kurt A. O'Hearn
committed
lapack_int m, n, nrhs, lda, ldb, info;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
start = Get_Time( );
Kurt A. O'Hearn
committed
t_comm = 0.0;
Kurt A. O'Hearn
committed
if ( A_app_inv->allocated == FALSE )
Kurt A. O'Hearn
committed
{
//TODO: FULL_MATRIX?
Allocate_Matrix( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, SYM_FULL_MATRIX );
}
else /* if ( A_app_inv->m < A_spar_patt->m ) */
Kurt A. O'Hearn
committed
{
Deallocate_Matrix( A_app_inv );
Kurt A. O'Hearn
committed
Allocate_Matrix( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, SYM_FULL_MATRIX );
}
Kurt A. O'Hearn
committed
pos_x = NULL;
X = NULL;
Kurt A. O'Hearn
committed
row_nnz = NULL;
j_list = NULL;
val_list = NULL;
Kurt A. O'Hearn
committed
j_send = NULL;
val_send = NULL;
for ( d = 0; d < 6; ++d )
{
j_recv[d] = NULL;
val_recv[d] = NULL;
Kurt A. O'Hearn
committed
////////////////////
row_nnz = (int *) malloc( sizeof(int) * A->NT );
Kurt A. O'Hearn
committed
//TODO: allocation size
j_list = (int **) malloc( sizeof(int *) * system->N );
val_list = (real **) malloc( sizeof(real *) * system->N );
Kurt A. O'Hearn
committed
for ( i = 0; i < A->NT; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = 0;
Kurt A. O'Hearn
committed
/* mark the atoms that already have their row stored in the local matrix */
for ( i = 0; i < A->n; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = A->end[i] - A->start[i];
Kurt A. O'Hearn
committed
/* Announce the nnz's in each row that will be communicated later */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
Dist( system, mpi_data, row_nnz, REAL_PTR_TYPE, MPI_INT );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
fprintf( stdout,"SAI after Dist call\n" );
Kurt A. O'Hearn
committed
fflush( stdout );
Kurt A. O'Hearn
committed
out_bufs = mpi_data->out_nt_buffers;
count = 0;
/* use a Dist-like approach to send the row information */
for ( d = 0; d < 6; ++d)
{
/* initiate recvs */
Kurt A. O'Hearn
committed
nbr = &system->my_nt_nbrs[d];
if ( nbr->atoms_cnt )
Kurt A. O'Hearn
committed
/* calculate the total data that will be received */
Kurt A. O'Hearn
committed
for ( i = nbr->atoms_str; i < (nbr->atoms_str + nbr->atoms_cnt); ++i )
Kurt A. O'Hearn
committed
cnt += row_nnz[i];
Kurt A. O'Hearn
committed
/* initiate Irecv */
if ( cnt )
Kurt A. O'Hearn
committed
count += 2;
Kurt A. O'Hearn
committed
j_recv[d] = (int *) malloc( sizeof(int) * cnt );
val_recv[d] = (real *) malloc( sizeof(real) * cnt );
fprintf( stdout, "Dist communication receive phase direction %d will receive %d\n", d, cnt );
Kurt A. O'Hearn
committed
fflush( stdout );
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Irecv( j_recv + d, cnt, MPI_INT, nbr->receive_rank,
d, mpi_data->comm_mesh3D, &req[2 * d] );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Irecv( val_recv + d, cnt, MPI_DOUBLE, nbr->receive_rank,
d, mpi_data->comm_mesh3D, &req[2 * d + 1] );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
/////////////////////
for( d = 0; d < 6; ++d)
{
nbr = &system->my_nt_nbrs[d];
/* send both messages in dimension d */
Kurt A. O'Hearn
committed
if ( out_bufs[d].cnt )
Kurt A. O'Hearn
committed
for ( i = 0; i < out_bufs[d].cnt; ++i )
Kurt A. O'Hearn
committed
cnt += A->end[ out_bufs[d].index[i] ] - A->start[ out_bufs[d].index[i] ];
if ( out_bufs[d].index[i] < 0 || out_bufs[d].index[i] >= A->n )
Kurt A. O'Hearn
committed
fprintf( stdout, "INDEXING ERROR %d > %d\n", out_bufs[d].index[i], A->n );
fflush( stdout );
Kurt A. O'Hearn
committed
// row_nnz[ out_bufs[d].index[i] ];
fprintf( stdout,"Dist communication send phase direction %d should send %d\n", d, cnt );
Kurt A. O'Hearn
committed
fflush( stdout );
Kurt A. O'Hearn
committed
if ( cnt )
Kurt A. O'Hearn
committed
j_send = (int *) malloc( sizeof(int) * cnt );
val_send = (real *) malloc( sizeof(real) * cnt );
Kurt A. O'Hearn
committed
cnt = 0;
for ( i = 0; i < out_bufs[d].cnt; ++i )
Kurt A. O'Hearn
committed
for ( pj = A->start[ out_bufs[d].index[i] ]; pj < A->end[ out_bufs[d].index[i] ]; ++pj )
atom = &system->my_atoms[ A->j[pj] ];
Kurt A. O'Hearn
committed
j_send[cnt] = atom->orig_id;
val_send[cnt] = A->val[pj];
cnt++;
}
}
Kurt A. O'Hearn
committed
fprintf( stdout,"Dist communication send phase direction %d will send %d\n", d, cnt );
fflush( stdout );
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Send( j_send, cnt, MPI_INT, nbr->rank,
d, mpi_data->comm_mesh3D );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
fprintf( stdout,"Dist communication send phase direction %d cnt = %d\n", d, cnt );
Kurt A. O'Hearn
committed
fflush( stdout );
ret = MPI_Send( val_send, cnt, MPI_DOUBLE, nbr->rank,
d, mpi_data->comm_mesh3D );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
fprintf( stdout,"Dist communication send phase direction %d cnt = %d\n", d, cnt );
Kurt A. O'Hearn
committed
fflush( stdout );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
}
fprintf( stdout," Dist communication for sending row info before waitany\n" );
Kurt A. O'Hearn
committed
fflush( stdout );
///////////////////////
for ( d = 0; d < count; ++d )
{
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
ret = MPI_Waitany( MAX_NT_NBRS, req, &index, stat );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
nbr = &system->my_nt_nbrs[index / 2];
cnt = 0;
for ( i = nbr->atoms_str; i < (nbr->atoms_str + nbr->atoms_cnt); ++i )
Kurt A. O'Hearn
committed
if ( index % 2 == 0 )
Kurt A. O'Hearn
committed
j_list[i] = (int *) malloc( sizeof(int) * row_nnz[i] );
for ( pj = 0; pj < row_nnz[i]; ++pj )
Kurt A. O'Hearn
committed
j_list[i][pj] = j_recv[index / 2][cnt];
cnt++;
}
}
Kurt A. O'Hearn
committed
else
Kurt A. O'Hearn
committed
val_list[i] = (real *) malloc( sizeof(real) * row_nnz[i] );
for ( pj = 0; pj < row_nnz[i]; ++pj )
Kurt A. O'Hearn
committed
val_list[i][pj] = val_recv[index / 2][cnt];
cnt++;
}
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
//////////////////////
fprintf( stdout, "Dist communication for sending row info worked\n" );
Kurt A. O'Hearn
committed
fflush( stdout );
//TODO: size?
X = (int *) malloc( sizeof(int) * (system->bigN + 1) );
pos_x = (int *) malloc( sizeof(int) * (system->bigN + 1) );
for ( i = 0; i < A_spar_patt->NT; ++i )
{
N = 0;
M = 0;
Kurt A. O'Hearn
committed
for ( k = 0; k <= system->bigN; ++k )
{
X[k] = 0;
pos_x[k] = 0;
}
/* find column indices of nonzeros (which will be the columns indices of the dense matrix) */
for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj )
{
j_temp = A_spar_patt->j[pj];
Kurt A. O'Hearn
committed
atom = &system->my_atoms[j_temp];
++N;
/* for each of those indices
Kurt A. O'Hearn
committed
* search through the row of full A of that index */
/* the case where the local matrix has that index's row */
Kurt A. O'Hearn
committed
if ( j_temp < A->NT )
Kurt A. O'Hearn
committed
for ( k = A->start[ j_temp ]; k < A->end[ j_temp ]; ++k )
{
/* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */
atom = &system->my_atoms[ A->j[k] ];
Kurt A. O'Hearn
committed
X[atom->orig_id] = 1;
}
}
/* the case where we communicated that index's row */
else
{
for ( k = 0; k < row_nnz[j_temp]; ++k )
{
/* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */
X[ j_list[j_temp][k] ] = 1;
}
}
}
/* enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix */
identity_pos = M;
Kurt A. O'Hearn
committed
atom = &system->my_atoms[ i ];
atom_pos = atom->orig_id;
for ( k = 0; k <= system->bigN; k++)
{
if ( X[k] != 0 )
{
Kurt A. O'Hearn
committed
pos_x[k] = M;
if ( k == atom_pos )
{
identity_pos = M;
}
++M;
}
}
/* allocate memory for NxM dense matrix */
dense_matrix = (real *) malloc( sizeof(real) * N * M );
/* fill in the entries of dense matrix */
Kurt A. O'Hearn
committed
for ( d_j = 0; d_j < N; ++d_j)
{
/* all rows are initialized to zero */
Kurt A. O'Hearn
committed
for ( d_i = 0; d_i < M; ++d_i )
{
dense_matrix[d_i * N + d_j] = 0.0;
}
/* change the value if any of the column indices is seen */
/* it is in the original list */
local_pos = A_spar_patt->j[ A_spar_patt->start[i] + d_j ];
Kurt A. O'Hearn
committed
if ( local_pos < 0 || local_pos >= system->N )
fprintf( stderr, "THE LOCAL POSITION OF THE ATOM IS NOT VALID, STOP THE EXECUTION\n" );
Kurt A. O'Hearn
committed
fflush( stderr );
}
/////////////////////////////
if ( local_pos < A->NT )
{
for ( d_i = A->start[local_pos]; d_i < A->end[local_pos]; ++d_i )
atom = &system->my_atoms[ A->j[d_i] ];
Kurt A. O'Hearn
committed
if ( pos_x[ atom->orig_id ] >= M || d_j >= N )
{
fprintf( stderr, "CANNOT MAP IT TO THE DENSE MATRIX, STOP THE EXECUTION, orig_id = %d, i = %d, j = %d, M = %d N = %d\n", atom->orig_id, pos_x[ atom->orig_id ], d_j, M, N );
fflush( stderr );
}
if ( X[ atom->orig_id ] == 1 )
dense_matrix[ pos_x[ atom->orig_id ] * N + d_j ] = A->val[d_i];
}
}
}
else
{
Kurt A. O'Hearn
committed
for ( d_i = 0; d_i < row_nnz[ local_pos ]; ++d_i )
Kurt A. O'Hearn
committed
if ( pos_x[ j_list[local_pos][d_i] ] >= M || d_j >= N )
{
fprintf( stderr, "CANNOT MAP IT TO THE DENSE MATRIX, STOP THE EXECUTION, %d %d\n", pos_x[ j_list[local_pos][d_i] ], d_j);
fflush( stderr );
}
if ( X[ j_list[local_pos][d_i] ] == 1 )
Kurt A. O'Hearn
committed
dense_matrix[ pos_x[ j_list[local_pos][d_i] ] * N + d_j ] = val_list[local_pos][d_i];
}
}
}
}
/* create the right hand side of the linear equation
Kurt A. O'Hearn
committed
* that is the full column of the identity matrix */
e_j = (real *) malloc( sizeof(real) * M );
Kurt A. O'Hearn
committed
//////////////////////
for ( k = 0; k < M; ++k )
{
e_j[k] = 0.0;
}
e_j[identity_pos] = 1.0;
/* Solve the overdetermined system AX = B through the least-squares problem:
Kurt A. O'Hearn
committed
* min ||B - AX||_2 */
m = M;
n = N;
nrhs = 1;
lda = N;
ldb = nrhs;
Kurt A. O'Hearn
committed
info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, nrhs, dense_matrix, lda,
e_j, ldb );
/* Check for the full rank */
if ( info > 0 )
{
Kurt A. O'Hearn
committed
fprintf( stderr, "The diagonal element %i of the triangular factor ", info );
fprintf( stderr, "of A is zero, so that A does not have full rank;\n" );
fprintf( stderr, "the least squares solution could not be computed.\n" );
exit( INVALID_INPUT );
}
/* accumulate the resulting vector to build A_app_inv */
A_app_inv->start[i] = A_spar_patt->start[i];
A_app_inv->end[i] = A_spar_patt->end[i];
for ( k = A_app_inv->start[i]; k < A_app_inv->end[i]; ++k)
A_app_inv->j[k] = A_spar_patt->j[k];
A_app_inv->val[k] = e_j[k - A_spar_patt->start[i]];
}
free( dense_matrix );
free( e_j );
}
free( pos_x);
free( X );
Kurt A. O'Hearn
committed
/////////////////////
Kurt A. O'Hearn
committed
ret = MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM,
MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
{
data->timing.cm_solver_comm += total_comm / nprocs;
}
Kurt A. O'Hearn
committed
return Get_Time( ) - start;
Kurt A. O'Hearn
committed
#else
real sparse_approx_inverse( reax_system const * const system,
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix * const A_spar_patt,
sparse_matrix * const A_app_inv, int nprocs )
Kurt A. O'Hearn
committed
int i, k, pj, j_temp, push, ret;
Kurt A. O'Hearn
committed
int N, M, d_i, d_j, mark;
int local_pos, atom_pos, identity_pos;
int *X, *q;
int size_e, size_dense;
int cnt;
int *row_nnz;
int **j_list;
int d;
int flag1, flag2;
int *j_send, *j_recv1, *j_recv2;
int size_send, size_recv1, size_recv2;
Kurt A. O'Hearn
committed
real *e_j, *dense_matrix;
real **val_list;
Kurt A. O'Hearn
committed
real *val_send, *val_recv1, *val_recv2;
Kurt A. O'Hearn
committed
real start, t_start, t_comm, total_comm;
reax_atom *atom;
mpi_out_data *out_bufs;
const neighbor_proc *nbr1, *nbr2;
MPI_Request req1, req2, req3, req4;
MPI_Status stat1, stat2, stat3, stat4;
lapack_int m, n, nrhs, lda, ldb, info;
Kurt A. O'Hearn
committed
start = Get_Time( );
Kurt A. O'Hearn
committed
t_comm = 0.0;
Kurt A. O'Hearn
committed
if ( A_app_inv->allocated == FALSE )
Kurt A. O'Hearn
committed
Allocate_Matrix( A_app_inv, A_spar_patt->n, system->local_cap, A_spar_patt->m,
SYM_FULL_MATRIX );
}
else /* if ( A_app_inv->m < A_spar_patt->m ) */
Kurt A. O'Hearn
committed
{
Deallocate_Matrix( A_app_inv );
Kurt A. O'Hearn
committed
Allocate_Matrix( A_app_inv, A_spar_patt->n, system->local_cap, A_spar_patt->m,
SYM_FULL_MATRIX );
Kurt A. O'Hearn
committed
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
X = NULL;
j_send = NULL;
val_send = NULL;
j_recv1 = NULL;
j_recv2 = NULL;
val_recv1 = NULL;
val_recv2 = NULL;
size_send = 0;
size_recv1 = 0;
size_recv2 = 0;
e_j = NULL;
dense_matrix = NULL;
size_e = 0;
size_dense = 0;
row_nnz = smalloc( sizeof(int) * system->total_cap,
"sparse_approx_inverse::row_nnz" );
j_list = smalloc( sizeof(int *) * system->N,
"sparse_approx_inverse::j_list" );
val_list = smalloc( sizeof(real *) * system->N,
"sparse_approx_inverse::val_list" );
for ( i = 0; i < system->total_cap; ++i )
{
row_nnz[i] = 0;
}
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* mark the atoms that already have their row stored in the local matrix */
for ( i = 0; i < system->n; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = A->end[i] - A->start[i];
Kurt A. O'Hearn
committed
/* Announce the nnz's in each row that will be communicated later */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
Dist( system, mpi_data, row_nnz, INT_PTR_TYPE, MPI_INT );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
out_bufs = mpi_data->out_buffers;
/* use a Dist-like approach to send the row information */
for ( d = 0; d < 3; ++d)
Kurt A. O'Hearn
committed
flag1 = 0;
flag2 = 0;
cnt = 0;
/* initiate recvs */
nbr1 = &system->my_nbrs[2 * d];
if ( nbr1->atoms_cnt )
Kurt A. O'Hearn
committed
cnt = 0;
/* calculate the total data that will be received */
for( i = nbr1->atoms_str; i < (nbr1->atoms_str + nbr1->atoms_cnt); ++i )
{
cnt += row_nnz[i];
}
/* initiate Irecv */
if( cnt )
{
flag1 = 1;
if ( size_recv1 < cnt )
Kurt A. O'Hearn
committed
if ( size_recv1 )
{
sfree( j_recv1, "sparse_approx_inverse::j_recv1" );
sfree( val_recv1, "sparse_approx_inverse::val_recv1" );
}
Kurt A. O'Hearn
committed
size_recv1 = cnt * SAFE_ZONE;
Kurt A. O'Hearn
committed
j_recv1 = smalloc( sizeof(int) * size_recv1,
"sparse_approx_inverse::j_recv1" );
val_recv1 = smalloc( sizeof(real) * size_recv1,
"sparse_approx_inverse::val_recv1" );
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Irecv( j_recv1, cnt, MPI_INT, nbr1->rank,
2 * d + 1, mpi_data->comm_mesh3D, &req1 );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Irecv( val_recv1, cnt, MPI_DOUBLE, nbr1->rank,
2 * d + 1, mpi_data->comm_mesh3D, &req2 );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
}
}
nbr2 = &system->my_nbrs[2 * d + 1];
if ( nbr2->atoms_cnt )
{
/* calculate the total data that will be received */
cnt = 0;
for( i = nbr2->atoms_str; i < (nbr2->atoms_str + nbr2->atoms_cnt); ++i )
{
cnt += row_nnz[i];
}
/* initiate Irecv */
if( cnt )
{
flag2 = 1;
if ( size_recv2 < cnt )
Kurt A. O'Hearn
committed
if ( size_recv2 )
{
sfree( j_recv2, "sparse_approx_inverse::j_recv2" );
sfree( val_recv2, "sparse_approx_inverse::val_recv2" );
}
size_recv2 = cnt * SAFE_ZONE;
j_recv2 = smalloc( sizeof(int) * size_recv2,
"sparse_approx_inverse::j_recv2" );
val_recv2 = smalloc( sizeof(real) * size_recv2,
"sparse_approx_inverse::val_recv2" );
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Irecv( j_recv2, cnt, MPI_INT, nbr2->rank,
2 * d, mpi_data->comm_mesh3D, &req3 );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Irecv( val_recv2, cnt, MPI_DOUBLE, nbr2->rank,
2 * d, mpi_data->comm_mesh3D, &req4 );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
}
}
/* send both messages in dimension d */
if ( out_bufs[2 * d].cnt )
{
cnt = 0;
for ( i = 0; i < out_bufs[2 * d].cnt; ++i )
{
cnt += row_nnz[ out_bufs[2 * d].index[i] ];
}
if ( cnt > 0 )
{
if ( size_send < cnt )
{
if ( size_send )
{
sfree( j_send, "sparse_approx_inverse::j_send" );
sfree( val_send, "sparse_approx_inverse::val_send" );
}
size_send = cnt * SAFE_ZONE;
j_send = smalloc( sizeof(int) * size_send,
"sparse_approx_inverse::j_send" );
val_send = smalloc( sizeof(real) * size_send,
"sparse_approx_inverse::j_send" );
}
cnt = 0;
for ( i = 0; i < out_bufs[2 * d].cnt; ++i )
{
if ( out_bufs[2 * d].index[i] < A->n )
{
for ( pj = A->start[ out_bufs[2 * d].index[i] ]; pj < A->end[ out_bufs[2 * d].index[i] ]; ++pj )
atom = &system->my_atoms[ A->j[pj] ];
Kurt A. O'Hearn
committed
j_send[cnt] = atom->orig_id;
val_send[cnt] = A->val[pj];
Kurt A. O'Hearn
committed
cnt++;
Kurt A. O'Hearn
committed
}
else
{
for ( pj = 0; pj < row_nnz[ out_bufs[2 * d].index[i] ]; ++pj )
Kurt A. O'Hearn
committed
j_send[cnt] = j_list[ out_bufs[2 * d].index[i] ][pj];
val_send[cnt] = val_list[ out_bufs[2 * d].index[i] ][pj];
cnt++;
Kurt A. O'Hearn
committed
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Send( j_send, cnt, MPI_INT, nbr1->rank,
2 * d, mpi_data->comm_mesh3D );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Send( val_send, cnt, MPI_DOUBLE, nbr1->rank,
2 * d, mpi_data->comm_mesh3D );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
}
}
if ( out_bufs[2 * d + 1].cnt )
{
cnt = 0;
for ( i = 0; i < out_bufs[2 * d + 1].cnt; ++i )
{
cnt += row_nnz[ out_bufs[2 * d + 1].index[i] ];
}
if ( cnt > 0 )
{
if ( size_send < cnt )
{
if ( size_send )
{
sfree( j_send, "sparse_approx_inverse::j_send" );
sfree( val_send, "sparse_approx_inverse::j_send" );
}
size_send = cnt * SAFE_ZONE;
j_send = smalloc( sizeof(int) * size_send,
"sparse_approx_inverse::j_send" );
val_send = smalloc( sizeof(real) * size_send,
"sparse_approx_inverse::val_send" );
}
cnt = 0;
for ( i = 0; i < out_bufs[2 * d + 1].cnt; ++i )
{
if ( out_bufs[2 * d + 1].index[i] < A->n )
{
for ( pj = A->start[ out_bufs[2 * d + 1].index[i] ]; pj < A->end[ out_bufs[2 * d + 1].index[i] ]; ++pj )
atom = &system->my_atoms[ A->j[pj] ];
Kurt A. O'Hearn
committed
j_send[cnt] = atom->orig_id;
val_send[cnt] = A->val[pj];
Kurt A. O'Hearn
committed
cnt++;
Kurt A. O'Hearn
committed
}
else
{
for ( pj = 0; pj < row_nnz[ out_bufs[2 * d + 1].index[i] ]; ++pj )
Kurt A. O'Hearn
committed
j_send[cnt] = j_list[ out_bufs[2 * d + 1].index[i] ][pj];
val_send[cnt] = val_list[ out_bufs[2 * d + 1].index[i] ][pj];
cnt++;
Kurt A. O'Hearn
committed
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Send( j_send, cnt, MPI_INT, nbr2->rank,
2 * d + 1, mpi_data->comm_mesh3D );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Send( val_send, cnt, MPI_DOUBLE, nbr2->rank,
2 * d + 1, mpi_data->comm_mesh3D );
Kurt A. O'Hearn
committed
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
}
}
if ( flag1 )
{
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Wait( &req1, &stat1 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Wait( &req2, &stat2 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
cnt = 0;
for ( i = nbr1->atoms_str; i < (nbr1->atoms_str + nbr1->atoms_cnt); ++i )
{
j_list[i] = smalloc( sizeof(int) * row_nnz[i],
"sparse_approx_inverse::j_list[i]" );
val_list[i] = smalloc( sizeof(real) * row_nnz[i],
"sparse_approx_inverse::val_list[i]" );
Kurt A. O'Hearn
committed
for ( pj = 0; pj < row_nnz[i]; ++pj )
{
j_list[i][pj] = j_recv1[cnt];
val_list[i][pj] = val_recv1[cnt];
cnt++;
Kurt A. O'Hearn
committed
}
}
if ( flag2 )
{
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Wait( &req3, &stat3 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Wait( &req4, &stat4 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
cnt = 0;
for ( i = nbr2->atoms_str; i < (nbr2->atoms_str + nbr2->atoms_cnt); ++i )
{
j_list[i] = smalloc( sizeof(int) * row_nnz[i],
"sparse_approx_inverse::j_list[i]" );
val_list[i] = smalloc( sizeof(real) * row_nnz[i],
"sparse_approx_inverse::val_list[i]" );
for ( pj = 0; pj < row_nnz[i]; ++pj )
{
j_list[i][pj] = j_recv2[cnt];
val_list[i][pj] = val_recv2[cnt];
cnt++;
}
}
Kurt A. O'Hearn
committed
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
sfree( j_send, "sparse_approx_inverse::j_send" );
sfree( val_send, "sparse_approx_inverse::val_send" );
sfree( j_recv1, "sparse_approx_inverse::j_recv1" );
sfree( j_recv2, "sparse_approx_inverse::j_recv2" );
sfree( val_recv1, "sparse_approx_inverse::val_recv1" );
sfree( val_recv2, "sparse_approx_inverse::val_recv2" );
X = smalloc( sizeof(int) * (system->bigN + 1),
"sparse_approx_inverse::X" );
//size of q should be equal to the maximum possible cardinalty
//of the set formed by neighbors of neighbors of an atom
//i.e, maximum number of rows of dense matrix
//for water systems, this number is 34000
//for silica systems, it is 12000
q = smalloc( sizeof(int) * 50000,
"sparse_approx_inverse::q" );
for ( i = 0; i <= system->bigN; ++i )
{
X[i] = -1;
}
for ( i = 0; i < A_spar_patt->n; ++i )
{
N = 0;
M = 0;
push = 0;
mark = i + system->bigN;
/* find column indices of nonzeros (which will be the columns indices of the dense matrix) */
for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj )
{
j_temp = A_spar_patt->j[pj];
Kurt A. O'Hearn
committed
atom = &system->my_atoms[j_temp];
++N;
Kurt A. O'Hearn
committed
/* for each of those indices
* search through the row of full A of that index */
Kurt A. O'Hearn
committed
/* the case where the local matrix has that index's row */
if( j_temp < A->n )
{
for ( k = A->start[ j_temp ]; k < A->end[ j_temp ]; ++k )
{
/* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */
atom = &system->my_atoms[ A->j[k] ];
Kurt A. O'Hearn
committed
X[atom->orig_id] = mark;
q[push++] = atom->orig_id;
}
}
Kurt A. O'Hearn
committed
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
/* the case where we communicated that index's row */
else
{
for ( k = 0; k < row_nnz[j_temp]; ++k )
{
/* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */
X[ j_list[j_temp][k] ] = mark;
q[push++] = j_list[j_temp][k];
}
}
}
/* enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix */
identity_pos = M;
atom = &system->my_atoms[ i ];
atom_pos = atom->orig_id;
for ( k = 0; k < push; k++)
{
if ( X[ q[k] ] == mark )
{
X[ q[k] ] = M;
++M;
}
}
identity_pos = X[atom_pos];
/* allocate memory for NxM dense matrix */
if ( size_dense < N * M )
{
if ( size_dense )
{
sfree( dense_matrix, "sparse_approx_inverse::dense_matrix" );
}
size_dense = N * M * SAFE_ZONE;
dense_matrix = smalloc( sizeof(real) * size_dense,
"sparse_approx_inverse::dense_matrix" );
}
/* fill in the entries of dense matrix */
for ( d_j = 0; d_j < N; ++d_j)
{
/* all rows are initialized to zero */
for ( d_i = 0; d_i < M; ++d_i )
{
dense_matrix[d_i * N + d_j] = 0.0;
}
/* change the value if any of the column indices is seen */
/* it is in the original list */
local_pos = A_spar_patt->j[ A_spar_patt->start[i] + d_j ];
Kurt A. O'Hearn
committed
if ( local_pos < A->n )
{
for ( d_i = A->start[local_pos]; d_i < A->end[local_pos]; ++d_i )
{
atom = &system->my_atoms[ A->j[d_i] ];
dense_matrix[ X[ atom->orig_id ] * N + d_j ] = A->val[d_i];
Kurt A. O'Hearn
committed
}
}
else
{
for ( d_i = 0; d_i < row_nnz[ local_pos ]; ++d_i )
{
dense_matrix[ X[ j_list[local_pos][d_i] ] * N + d_j ] = val_list[local_pos][d_i];
}
}
}
Kurt A. O'Hearn
committed
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
/* create the right hand side of the linear equation
* that is the full column of the identity matrix */
if ( size_e < M )
{
if ( size_e )
{
sfree( e_j, "sparse_approx_inverse::e_j" );
}
size_e = M * SAFE_ZONE;
e_j = smalloc( sizeof(real) * size_e, "sparse_approx_inverse::e_j" );
}
for ( k = 0; k < M; ++k )
{
e_j[k] = 0.0;
}
e_j[identity_pos] = 1.0;
/* Solve the overdetermined system AX = B through the least-squares problem:
* min ||B - AX||_2 */
m = M;
n = N;
nrhs = 1;
lda = N;
ldb = nrhs;
info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, nrhs, dense_matrix, lda,
e_j, ldb );
/* Check for the full rank */
if ( info > 0 )
{
fprintf( stderr, "[ERROR] The diagonal element %i of the triangular factor ", info );
fprintf( stderr, "of A is zero, so that A does not have full rank;\n" );
fprintf( stderr, "the least squares solution could not be computed.\n" );
MPI_Abort( MPI_COMM_WORLD, RUNTIME_ERROR );
}
/* accumulate the resulting vector to build A_app_inv */
A_app_inv->start[i] = A_spar_patt->start[i];
A_app_inv->end[i] = A_spar_patt->end[i];
for ( k = A_app_inv->start[i]; k < A_app_inv->end[i]; ++k)
Kurt A. O'Hearn
committed
{
A_app_inv->j[k] = A_spar_patt->j[k];
A_app_inv->val[k] = e_j[k - A_spar_patt->start[i]];
Kurt A. O'Hearn
committed
}
}
sfree( dense_matrix, "sparse_approx_inverse::dense_matrix" );
sfree( e_j, "sparse_approx_inverse::e_j" );
sfree( X, "sparse_approx_inverse::X" );
/*for ( i = 0; i < system->N; ++i )
{
sfree( j_list[i], "sparse_approx_inverse::j_list" );
sfree( val_list[i], "sparse_approx_inverse::val_list" );
}
sfree( j_list, "sparse_approx_inverse::j_list" );
sfree( val_list, "sparse_approx_inverse::val_list" );*/
sfree( row_nnz, "sparse_approx_inverse::row_nnz" );
Kurt A. O'Hearn
committed
ret = MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM,
MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
{
data->timing.cm_solver_comm += total_comm / nprocs;
}
Kurt A. O'Hearn
committed
return Get_Time( ) - start;
Kurt A. O'Hearn
committed
}
#endif
#endif
Kurt A. O'Hearn
committed
/* Apply left-sided preconditioning while solving M^{-1}AX = M^{-1}B
*
* system:
* workspace: data struct containing matrices and vectors, stored in CSR
* control: data struct containing parameters
* data: struct containing timing simulation data (including performance data)
* y: vector to which to apply preconditioning,
* specific to internals of iterative solver being used
* x (output): preconditioned vector
* fresh_pre: parameter indicating if this is a newly computed (fresh) preconditioner
* side: used in determining how to apply preconditioner if the preconditioner is
* factorized as M = M_{1}M_{2} (e.g., incomplete LU, A \approx LU)
*
* Assumptions:
* Matrices have non-zero diagonals
* Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */
static void dual_apply_preconditioner( reax_system const * const system,
storage const * const workspace, control_params const * const control,
simulation_data * const data, mpi_datatypes * const mpi_data,
rvec2 const * const y, rvec2 * const x, int fresh_pre, int side )
Kurt A. O'Hearn
committed
{
// int i, si;
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* no preconditioning */
if ( control->cm_solver_pre_comp_type == NONE_PC )
{
if ( x != y )
{
Kurt A. O'Hearn
committed
Vector_Copy_rvec2( x, y, system->n );
Kurt A. O'Hearn
committed
}
}
else
{
switch ( side )
{
case LEFT:
switch ( control->cm_solver_pre_app_type )
{
case TRI_SOLVE_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
Kurt A. O'Hearn
committed
dual_jacobi_app( workspace->Hdia_inv, y, x, system->n );
Kurt A. O'Hearn
committed
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// tri_solve( workspace->L, y, x, workspace->L->n, LOWER );
// break;
case SAI_PC:
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
y, H->NT, x );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
y, system->n, x );
Kurt A. O'Hearn
committed
#endif
break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case TRI_SOLVE_LEVEL_SCHED_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
Kurt A. O'Hearn
committed
dual_jacobi_app( workspace->Hdia_inv, y, x, system->n );
Kurt A. O'Hearn
committed
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// tri_solve_level_sched( (static_storage *) workspace,
// workspace->L, y, x, workspace->L->n, LOWER, fresh_pre );
// break;
case SAI_PC:
#if defined(NEUTRAL_TERRITORY)
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
y, H->NT, x );
Kurt A. O'Hearn
committed
#else
Kurt A. O'Hearn
committed
Dual_Sparse_MatVec( system, control, data, mpi_data, &workspace->H_app_inv,
y, system->n, x );
Kurt A. O'Hearn
committed
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
#endif
break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case TRI_SOLVE_GC_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
case SAI_PC:
fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
exit( INVALID_INPUT );
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// for ( i = 0; i < workspace->H->n; ++i )
// {
// workspace->y_p[i] = y[i];
// }
//
// permute_vector( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER );
// tri_solve_level_sched( (static_storage *) workspace,
// workspace->L, workspace->y_p, x, workspace->L->n, LOWER, fresh_pre );
// break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case JACOBI_ITER_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
case SAI_PC:
fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
exit( INVALID_INPUT );
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// // construct D^{-1}_L
// if ( fresh_pre == TRUE )
// {
// for ( i = 0; i < workspace->L->n; ++i )
// {
// si = workspace->L->start[i + 1] - 1;
// workspace->Dinv_L[i] = 1.0 / workspace->L->val[si];
// }
// }
//
// jacobi_iter( workspace, workspace->L, workspace->Dinv_L,
// y, x, LOWER, control->cm_solver_pre_app_jacobi_iters );
// break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case RIGHT:
switch ( control->cm_solver_pre_app_type )
{
case TRI_SOLVE_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
case SAI_PC:
if ( x != y )
{
Kurt A. O'Hearn
committed
Vector_Copy_rvec2( x, y, system->n );
Kurt A. O'Hearn
committed
}
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// tri_solve( workspace->U, y, x, workspace->U->n, UPPER );
// break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case TRI_SOLVE_LEVEL_SCHED_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
case SAI_PC:
if ( x != y )
{
Kurt A. O'Hearn
committed
Vector_Copy_rvec2( x, y, system->n );
Kurt A. O'Hearn
committed
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
}
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// tri_solve_level_sched( (static_storage *) workspace,
// workspace->U, y, x, workspace->U->n, UPPER, fresh_pre );
// break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case TRI_SOLVE_GC_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
case SAI_PC:
fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
exit( INVALID_INPUT );
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// tri_solve_level_sched( (static_storage *) workspace,
// workspace->U, y, x, workspace->U->n, UPPER, fresh_pre );
// permute_vector( workspace, x, workspace->H->n, TRUE, UPPER );
// break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case JACOBI_ITER_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
case SAI_PC:
fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
exit( INVALID_INPUT );
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
Loading
Loading full blame...