Newer
Older
/* initiate recvs */
Kurt A. O'Hearn
committed
nbr = &system->my_nt_nbrs[d];
if ( nbr->atoms_cnt )
Kurt A. O'Hearn
committed
/* calculate the total data that will be received */
Kurt A. O'Hearn
committed
for ( i = nbr->atoms_str; i < (nbr->atoms_str + nbr->atoms_cnt); ++i )
Kurt A. O'Hearn
committed
cnt += row_nnz[i];
Kurt A. O'Hearn
committed
/* initiate Irecv */
if ( cnt )
Kurt A. O'Hearn
committed
count += 2;
Kurt A. O'Hearn
committed
j_recv[d] = (int *) malloc( sizeof(int) * cnt );
val_recv[d] = (real *) malloc( sizeof(real) * cnt );
Kurt A. O'Hearn
committed
fprintf( stdout,"Dist communication receive phase direction %d will receive %d\n", d, cnt);
fflush( stdout );
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Irecv( j_recv + d, cnt, MPI_INT, nbr->receive_rank, d, comm, &req[2 * d] );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Irecv( val_recv + d, cnt, MPI_DOUBLE, nbr->receive_rank, d, comm, &req[2 * d + 1] );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
}
/////////////////////
for( d = 0; d < 6; ++d)
{
nbr = &system->my_nt_nbrs[d];
/* send both messages in dimension d */
Kurt A. O'Hearn
committed
if ( out_bufs[d].cnt )
Kurt A. O'Hearn
committed
for ( i = 0; i < out_bufs[d].cnt; ++i )
Kurt A. O'Hearn
committed
cnt += A->end[ out_bufs[d].index[i] ] - A->start[ out_bufs[d].index[i] ];
if ( out_bufs[d].index[i] < 0 || out_bufs[d].index[i] >= A->n )
Kurt A. O'Hearn
committed
fprintf( stdout, "INDEXING ERROR %d > %d\n", out_bufs[d].index[i], A->n );
fflush( stdout );
Kurt A. O'Hearn
committed
// row_nnz[ out_bufs[d].index[i] ];
Kurt A. O'Hearn
committed
fprintf( stdout,"Dist communication send phase direction %d should send %d\n", d, cnt);
fflush( stdout );
Kurt A. O'Hearn
committed
if ( cnt )
Kurt A. O'Hearn
committed
j_send = (int *) malloc( sizeof(int) * cnt );
val_send = (real *) malloc( sizeof(real) * cnt );
Kurt A. O'Hearn
committed
cnt = 0;
for ( i = 0; i < out_bufs[d].cnt; ++i )
Kurt A. O'Hearn
committed
for ( pj = A->start[ out_bufs[d].index[i] ]; pj < A->end[ out_bufs[d].index[i] ]; ++pj )
atom = &system->my_atoms[ A->j[pj] ];
Kurt A. O'Hearn
committed
j_send[cnt] = atom->orig_id;
val_send[cnt] = A->val[pj];
cnt++;
}
}
Kurt A. O'Hearn
committed
fprintf( stdout,"Dist communication send phase direction %d will send %d\n", d, cnt );
fflush( stdout );
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Send( j_send, cnt, MPI_INT, nbr->rank, d, comm );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
fprintf( stdout,"Dist communication send phase direction %d cnt = %d\n", d, cnt);
fflush( stdout );
Kurt A. O'Hearn
committed
ret = MPI_Send( val_send, cnt, MPI_DOUBLE, nbr->rank, d, comm );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
fprintf( stdout,"Dist communication send phase direction %d cnt = %d\n", d, cnt);
fflush( stdout );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
}
fprintf( stdout," Dist communication for sending row info before waitany\n");
fflush( stdout );
///////////////////////
for ( d = 0; d < count; ++d )
{
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Waitany( MAX_NT_NBRS, req, &index, stat);
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
nbr = &system->my_nt_nbrs[index / 2];
cnt = 0;
for ( i = nbr->atoms_str; i < (nbr->atoms_str + nbr->atoms_cnt); ++i )
Kurt A. O'Hearn
committed
if ( index % 2 == 0 )
Kurt A. O'Hearn
committed
j_list[i] = (int *) malloc( sizeof(int) * row_nnz[i] );
for ( pj = 0; pj < row_nnz[i]; ++pj )
Kurt A. O'Hearn
committed
j_list[i][pj] = j_recv[index / 2][cnt];
cnt++;
}
}
Kurt A. O'Hearn
committed
else
Kurt A. O'Hearn
committed
val_list[i] = (real *) malloc( sizeof(real) * row_nnz[i] );
for ( pj = 0; pj < row_nnz[i]; ++pj )
Kurt A. O'Hearn
committed
val_list[i][pj] = val_recv[index / 2][cnt];
cnt++;
}
}
Kurt A. O'Hearn
committed
}
Kurt A. O'Hearn
committed
//////////////////////
fprintf( stdout," wow wow wow, Dist communication for sending row info worked\n");
fflush( stdout );
//TODO: size?
X = (int *) malloc( sizeof(int) * (system->bigN + 1) );
pos_x = (int *) malloc( sizeof(int) * (system->bigN + 1) );
for ( i = 0; i < A_spar_patt->NT; ++i )
{
N = 0;
M = 0;
Kurt A. O'Hearn
committed
for ( k = 0; k <= system->bigN; ++k )
{
X[k] = 0;
pos_x[k] = 0;
}
/* find column indices of nonzeros (which will be the columns indices of the dense matrix) */
for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj )
{
j_temp = A_spar_patt->j[pj];
Kurt A. O'Hearn
committed
atom = &system->my_atoms[j_temp];
++N;
/* for each of those indices
Kurt A. O'Hearn
committed
* search through the row of full A of that index */
/* the case where the local matrix has that index's row */
Kurt A. O'Hearn
committed
if ( j_temp < A->NT )
Kurt A. O'Hearn
committed
for ( k = A->start[ j_temp ]; k < A->end[ j_temp ]; ++k )
{
/* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */
atom = &system->my_atoms[ A->j[k] ];
Kurt A. O'Hearn
committed
X[atom->orig_id] = 1;
}
}
/* the case where we communicated that index's row */
else
{
for ( k = 0; k < row_nnz[j_temp]; ++k )
{
/* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */
X[ j_list[j_temp][k] ] = 1;
}
}
}
/* enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix */
identity_pos = M;
Kurt A. O'Hearn
committed
atom = &system->my_atoms[ i ];
atom_pos = atom->orig_id;
for ( k = 0; k <= system->bigN; k++)
{
if ( X[k] != 0 )
{
Kurt A. O'Hearn
committed
pos_x[k] = M;
if ( k == atom_pos )
{
identity_pos = M;
}
++M;
}
}
/* allocate memory for NxM dense matrix */
dense_matrix = (real *) malloc( sizeof(real) * N * M );
/* fill in the entries of dense matrix */
Kurt A. O'Hearn
committed
for ( d_j = 0; d_j < N; ++d_j)
{
/* all rows are initialized to zero */
Kurt A. O'Hearn
committed
for ( d_i = 0; d_i < M; ++d_i )
{
dense_matrix[d_i * N + d_j] = 0.0;
}
/* change the value if any of the column indices is seen */
/* it is in the original list */
local_pos = A_spar_patt->j[ A_spar_patt->start[i] + d_j ];
Kurt A. O'Hearn
committed
if ( local_pos < 0 || local_pos >= system->N )
Kurt A. O'Hearn
committed
fprintf( stderr, "THE LOCAL POSITION OF THE ATOM IS NOT VALID, STOP THE EXECUTION\n");
fflush( stderr );
}
/////////////////////////////
if ( local_pos < A->NT )
{
for ( d_i = A->start[local_pos]; d_i < A->end[local_pos]; ++d_i )
atom = &system->my_atoms[ A->j[d_i] ];
Kurt A. O'Hearn
committed
if ( pos_x[ atom->orig_id ] >= M || d_j >= N )
{
fprintf( stderr, "CANNOT MAP IT TO THE DENSE MATRIX, STOP THE EXECUTION, orig_id = %d, i = %d, j = %d, M = %d N = %d\n", atom->orig_id, pos_x[ atom->orig_id ], d_j, M, N );
fflush( stderr );
}
if ( X[ atom->orig_id ] == 1 )
dense_matrix[ pos_x[ atom->orig_id ] * N + d_j ] = A->val[d_i];
}
}
}
else
{
Kurt A. O'Hearn
committed
for ( d_i = 0; d_i < row_nnz[ local_pos ]; ++d_i )
Kurt A. O'Hearn
committed
if ( pos_x[ j_list[local_pos][d_i] ] >= M || d_j >= N )
{
fprintf( stderr, "CANNOT MAP IT TO THE DENSE MATRIX, STOP THE EXECUTION, %d %d\n", pos_x[ j_list[local_pos][d_i] ], d_j);
fflush( stderr );
}
if ( X[ j_list[local_pos][d_i] ] == 1 )
Kurt A. O'Hearn
committed
dense_matrix[ pos_x[ j_list[local_pos][d_i] ] * N + d_j ] = val_list[local_pos][d_i];
}
}
}
}
/* create the right hand side of the linear equation
Kurt A. O'Hearn
committed
* that is the full column of the identity matrix */
e_j = (real *) malloc( sizeof(real) * M );
Kurt A. O'Hearn
committed
//////////////////////
for ( k = 0; k < M; ++k )
{
e_j[k] = 0.0;
}
e_j[identity_pos] = 1.0;
/* Solve the overdetermined system AX = B through the least-squares problem:
Kurt A. O'Hearn
committed
* min ||B - AX||_2 */
m = M;
n = N;
nrhs = 1;
lda = N;
ldb = nrhs;
Kurt A. O'Hearn
committed
info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, nrhs, dense_matrix, lda,
e_j, ldb );
/* Check for the full rank */
if ( info > 0 )
{
Kurt A. O'Hearn
committed
fprintf( stderr, "The diagonal element %i of the triangular factor ", info );
fprintf( stderr, "of A is zero, so that A does not have full rank;\n" );
fprintf( stderr, "the least squares solution could not be computed.\n" );
exit( INVALID_INPUT );
}
/* accumulate the resulting vector to build A_app_inv */
(*A_app_inv)->start[i] = A_spar_patt->start[i];
(*A_app_inv)->end[i] = A_spar_patt->end[i];
Kurt A. O'Hearn
committed
for ( k = (*A_app_inv)->start[i]; k < (*A_app_inv)->end[i]; ++k)
(*A_app_inv)->j[k] = A_spar_patt->j[k];
(*A_app_inv)->val[k] = e_j[k - A_spar_patt->start[i]];
}
free( dense_matrix );
free( e_j );
}
free( pos_x);
free( X );
Kurt A. O'Hearn
committed
/////////////////////
Kurt A. O'Hearn
committed
ret = MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM,
MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
{
data->timing.cm_solver_comm += total_comm / nprocs;
}
Kurt A. O'Hearn
committed
return Get_Time( ) - start;
Kurt A. O'Hearn
committed
#else
real sparse_approx_inverse( reax_system const * const system,
simulation_data * const data,
storage * const workspace, mpi_datatypes * const mpi_data,
sparse_matrix * const A, sparse_matrix * const A_spar_patt,
sparse_matrix **A_app_inv, int nprocs )
Kurt A. O'Hearn
committed
int i, k, pj, j_temp, push, ret;
Kurt A. O'Hearn
committed
int N, M, d_i, d_j, mark;
int local_pos, atom_pos, identity_pos;
int *X, *q;
int size_e, size_dense;
int cnt;
int *row_nnz;
int **j_list;
int d;
int flag1, flag2;
int *j_send, *j_recv1, *j_recv2;
int size_send, size_recv1, size_recv2;
Kurt A. O'Hearn
committed
real *e_j, *dense_matrix;
real **val_list;
Kurt A. O'Hearn
committed
real *val_send, *val_recv1, *val_recv2;
Kurt A. O'Hearn
committed
real start, t_start, t_comm, total_comm;
reax_atom *atom;
mpi_out_data *out_bufs;
const neighbor_proc *nbr1, *nbr2;
MPI_Request req1, req2, req3, req4;
MPI_Status stat1, stat2, stat3, stat4;
lapack_int m, n, nrhs, lda, ldb, info;
Kurt A. O'Hearn
committed
start = Get_Time( );
Kurt A. O'Hearn
committed
t_comm = 0.0;
if ( *A_app_inv == NULL)
Kurt A. O'Hearn
committed
Allocate_Matrix( A_app_inv, A_spar_patt->n, system->local_cap, A_spar_patt->m,
SYM_FULL_MATRIX );
}
else /* if ( (*A_app_inv)->m < A_spar_patt->m ) */
{
Deallocate_Matrix( *A_app_inv );
Allocate_Matrix( A_app_inv, A_spar_patt->n, system->local_cap, A_spar_patt->m,
SYM_FULL_MATRIX );
Kurt A. O'Hearn
committed
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
X = NULL;
j_send = NULL;
val_send = NULL;
j_recv1 = NULL;
j_recv2 = NULL;
val_recv1 = NULL;
val_recv2 = NULL;
size_send = 0;
size_recv1 = 0;
size_recv2 = 0;
e_j = NULL;
dense_matrix = NULL;
size_e = 0;
size_dense = 0;
row_nnz = smalloc( sizeof(int) * system->total_cap,
"sparse_approx_inverse::row_nnz" );
j_list = smalloc( sizeof(int *) * system->N,
"sparse_approx_inverse::j_list" );
val_list = smalloc( sizeof(real *) * system->N,
"sparse_approx_inverse::val_list" );
for ( i = 0; i < system->total_cap; ++i )
{
row_nnz[i] = 0;
}
Kurt A. O'Hearn
committed
Kurt A. O'Hearn
committed
/* mark the atoms that already have their row stored in the local matrix */
for ( i = 0; i < system->n; ++i )
Kurt A. O'Hearn
committed
row_nnz[i] = A->end[i] - A->start[i];
Kurt A. O'Hearn
committed
/* Announce the nnz's in each row that will be communicated later */
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
Dist( system, mpi_data, row_nnz, INT_PTR_TYPE, MPI_INT );
Kurt A. O'Hearn
committed
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
comm = mpi_data->comm_mesh3D;
out_bufs = mpi_data->out_buffers;
/* use a Dist-like approach to send the row information */
for ( d = 0; d < 3; ++d)
Kurt A. O'Hearn
committed
flag1 = 0;
flag2 = 0;
cnt = 0;
/* initiate recvs */
nbr1 = &system->my_nbrs[2 * d];
if ( nbr1->atoms_cnt )
Kurt A. O'Hearn
committed
cnt = 0;
/* calculate the total data that will be received */
for( i = nbr1->atoms_str; i < (nbr1->atoms_str + nbr1->atoms_cnt); ++i )
{
cnt += row_nnz[i];
}
/* initiate Irecv */
if( cnt )
{
flag1 = 1;
if ( size_recv1 < cnt )
Kurt A. O'Hearn
committed
if ( size_recv1 )
{
sfree( j_recv1, "sparse_approx_inverse::j_recv1" );
sfree( val_recv1, "sparse_approx_inverse::val_recv1" );
}
Kurt A. O'Hearn
committed
size_recv1 = cnt * SAFE_ZONE;
Kurt A. O'Hearn
committed
j_recv1 = smalloc( sizeof(int) * size_recv1,
"sparse_approx_inverse::j_recv1" );
val_recv1 = smalloc( sizeof(real) * size_recv1,
"sparse_approx_inverse::val_recv1" );
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Irecv( j_recv1, cnt, MPI_INT, nbr1->rank, 2 * d + 1, comm, &req1 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Irecv( val_recv1, cnt, MPI_DOUBLE, nbr1->rank, 2 * d + 1, comm, &req2 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
}
}
nbr2 = &system->my_nbrs[2 * d + 1];
if ( nbr2->atoms_cnt )
{
/* calculate the total data that will be received */
cnt = 0;
for( i = nbr2->atoms_str; i < (nbr2->atoms_str + nbr2->atoms_cnt); ++i )
{
cnt += row_nnz[i];
}
/* initiate Irecv */
if( cnt )
{
flag2 = 1;
if ( size_recv2 < cnt )
Kurt A. O'Hearn
committed
if ( size_recv2 )
{
sfree( j_recv2, "sparse_approx_inverse::j_recv2" );
sfree( val_recv2, "sparse_approx_inverse::val_recv2" );
}
size_recv2 = cnt * SAFE_ZONE;
j_recv2 = smalloc( sizeof(int) * size_recv2,
"sparse_approx_inverse::j_recv2" );
val_recv2 = smalloc( sizeof(real) * size_recv2,
"sparse_approx_inverse::val_recv2" );
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Irecv( j_recv2, cnt, MPI_INT, nbr2->rank, 2 * d, comm, &req3 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Irecv( val_recv2, cnt, MPI_DOUBLE, nbr2->rank, 2 * d, comm, &req4 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
}
}
/* send both messages in dimension d */
if ( out_bufs[2 * d].cnt )
{
cnt = 0;
for ( i = 0; i < out_bufs[2 * d].cnt; ++i )
{
cnt += row_nnz[ out_bufs[2 * d].index[i] ];
}
if ( cnt > 0 )
{
if ( size_send < cnt )
{
if ( size_send )
{
sfree( j_send, "sparse_approx_inverse::j_send" );
sfree( val_send, "sparse_approx_inverse::val_send" );
}
size_send = cnt * SAFE_ZONE;
j_send = smalloc( sizeof(int) * size_send,
"sparse_approx_inverse::j_send" );
val_send = smalloc( sizeof(real) * size_send,
"sparse_approx_inverse::j_send" );
}
cnt = 0;
for ( i = 0; i < out_bufs[2 * d].cnt; ++i )
{
if ( out_bufs[2 * d].index[i] < A->n )
{
for ( pj = A->start[ out_bufs[2 * d].index[i] ]; pj < A->end[ out_bufs[2 * d].index[i] ]; ++pj )
atom = &system->my_atoms[ A->j[pj] ];
Kurt A. O'Hearn
committed
j_send[cnt] = atom->orig_id;
val_send[cnt] = A->val[pj];
Kurt A. O'Hearn
committed
cnt++;
Kurt A. O'Hearn
committed
}
else
{
for ( pj = 0; pj < row_nnz[ out_bufs[2 * d].index[i] ]; ++pj )
Kurt A. O'Hearn
committed
j_send[cnt] = j_list[ out_bufs[2 * d].index[i] ][pj];
val_send[cnt] = val_list[ out_bufs[2 * d].index[i] ][pj];
cnt++;
Kurt A. O'Hearn
committed
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Send( j_send, cnt, MPI_INT, nbr1->rank, 2 * d, comm );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Send( val_send, cnt, MPI_DOUBLE, nbr1->rank, 2 * d, comm );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
}
}
if ( out_bufs[2 * d + 1].cnt )
{
cnt = 0;
for ( i = 0; i < out_bufs[2 * d + 1].cnt; ++i )
{
cnt += row_nnz[ out_bufs[2 * d + 1].index[i] ];
}
if ( cnt > 0 )
{
if ( size_send < cnt )
{
if ( size_send )
{
sfree( j_send, "sparse_approx_inverse::j_send" );
sfree( val_send, "sparse_approx_inverse::j_send" );
}
size_send = cnt * SAFE_ZONE;
j_send = smalloc( sizeof(int) * size_send,
"sparse_approx_inverse::j_send" );
val_send = smalloc( sizeof(real) * size_send,
"sparse_approx_inverse::val_send" );
}
cnt = 0;
for ( i = 0; i < out_bufs[2 * d + 1].cnt; ++i )
{
if ( out_bufs[2 * d + 1].index[i] < A->n )
{
for ( pj = A->start[ out_bufs[2 * d + 1].index[i] ]; pj < A->end[ out_bufs[2 * d + 1].index[i] ]; ++pj )
atom = &system->my_atoms[ A->j[pj] ];
Kurt A. O'Hearn
committed
j_send[cnt] = atom->orig_id;
val_send[cnt] = A->val[pj];
Kurt A. O'Hearn
committed
cnt++;
Kurt A. O'Hearn
committed
}
else
{
for ( pj = 0; pj < row_nnz[ out_bufs[2 * d + 1].index[i] ]; ++pj )
Kurt A. O'Hearn
committed
j_send[cnt] = j_list[ out_bufs[2 * d + 1].index[i] ][pj];
val_send[cnt] = val_list[ out_bufs[2 * d + 1].index[i] ][pj];
cnt++;
Kurt A. O'Hearn
committed
}
}
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Send( j_send, cnt, MPI_INT, nbr2->rank, 2 * d + 1, comm );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Send( val_send, cnt, MPI_DOUBLE, nbr2->rank, 2 * d + 1, comm );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
}
}
if ( flag1 )
{
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Wait( &req1, &stat1 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Wait( &req2, &stat2 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
cnt = 0;
for ( i = nbr1->atoms_str; i < (nbr1->atoms_str + nbr1->atoms_cnt); ++i )
{
j_list[i] = smalloc( sizeof(int) * row_nnz[i],
"sparse_approx_inverse::j_list[i]" );
val_list[i] = smalloc( sizeof(real) * row_nnz[i],
"sparse_approx_inverse::val_list[i]" );
Kurt A. O'Hearn
committed
for ( pj = 0; pj < row_nnz[i]; ++pj )
{
j_list[i][pj] = j_recv1[cnt];
val_list[i][pj] = val_recv1[cnt];
cnt++;
Kurt A. O'Hearn
committed
}
}
if ( flag2 )
{
Kurt A. O'Hearn
committed
t_start = Get_Time( );
ret = MPI_Wait( &req3, &stat3 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
ret = MPI_Wait( &req4, &stat4 );
Check_MPI_Error( ret, __FILE__, __LINE__ );
t_comm += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
cnt = 0;
for ( i = nbr2->atoms_str; i < (nbr2->atoms_str + nbr2->atoms_cnt); ++i )
{
j_list[i] = smalloc( sizeof(int) * row_nnz[i],
"sparse_approx_inverse::j_list[i]" );
val_list[i] = smalloc( sizeof(real) * row_nnz[i],
"sparse_approx_inverse::val_list[i]" );
for ( pj = 0; pj < row_nnz[i]; ++pj )
{
j_list[i][pj] = j_recv2[cnt];
val_list[i][pj] = val_recv2[cnt];
cnt++;
}
}
Kurt A. O'Hearn
committed
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
sfree( j_send, "sparse_approx_inverse::j_send" );
sfree( val_send, "sparse_approx_inverse::val_send" );
sfree( j_recv1, "sparse_approx_inverse::j_recv1" );
sfree( j_recv2, "sparse_approx_inverse::j_recv2" );
sfree( val_recv1, "sparse_approx_inverse::val_recv1" );
sfree( val_recv2, "sparse_approx_inverse::val_recv2" );
X = smalloc( sizeof(int) * (system->bigN + 1),
"sparse_approx_inverse::X" );
//size of q should be equal to the maximum possible cardinalty
//of the set formed by neighbors of neighbors of an atom
//i.e, maximum number of rows of dense matrix
//for water systems, this number is 34000
//for silica systems, it is 12000
q = smalloc( sizeof(int) * 50000,
"sparse_approx_inverse::q" );
for ( i = 0; i <= system->bigN; ++i )
{
X[i] = -1;
}
for ( i = 0; i < A_spar_patt->n; ++i )
{
N = 0;
M = 0;
push = 0;
mark = i + system->bigN;
/* find column indices of nonzeros (which will be the columns indices of the dense matrix) */
for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj )
{
j_temp = A_spar_patt->j[pj];
Kurt A. O'Hearn
committed
atom = &system->my_atoms[j_temp];
++N;
Kurt A. O'Hearn
committed
/* for each of those indices
* search through the row of full A of that index */
Kurt A. O'Hearn
committed
/* the case where the local matrix has that index's row */
if( j_temp < A->n )
{
for ( k = A->start[ j_temp ]; k < A->end[ j_temp ]; ++k )
{
/* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */
atom = &system->my_atoms[ A->j[k] ];
Kurt A. O'Hearn
committed
X[atom->orig_id] = mark;
q[push++] = atom->orig_id;
}
}
Kurt A. O'Hearn
committed
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
/* the case where we communicated that index's row */
else
{
for ( k = 0; k < row_nnz[j_temp]; ++k )
{
/* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */
X[ j_list[j_temp][k] ] = mark;
q[push++] = j_list[j_temp][k];
}
}
}
/* enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix */
identity_pos = M;
atom = &system->my_atoms[ i ];
atom_pos = atom->orig_id;
for ( k = 0; k < push; k++)
{
if ( X[ q[k] ] == mark )
{
X[ q[k] ] = M;
++M;
}
}
identity_pos = X[atom_pos];
/* allocate memory for NxM dense matrix */
if ( size_dense < N * M )
{
if ( size_dense )
{
sfree( dense_matrix, "sparse_approx_inverse::dense_matrix" );
}
size_dense = N * M * SAFE_ZONE;
dense_matrix = smalloc( sizeof(real) * size_dense,
"sparse_approx_inverse::dense_matrix" );
}
/* fill in the entries of dense matrix */
for ( d_j = 0; d_j < N; ++d_j)
{
/* all rows are initialized to zero */
for ( d_i = 0; d_i < M; ++d_i )
{
dense_matrix[d_i * N + d_j] = 0.0;
}
/* change the value if any of the column indices is seen */
/* it is in the original list */
local_pos = A_spar_patt->j[ A_spar_patt->start[i] + d_j ];
Kurt A. O'Hearn
committed
if ( local_pos < A->n )
{
for ( d_i = A->start[local_pos]; d_i < A->end[local_pos]; ++d_i )
{
atom = &system->my_atoms[ A->j[d_i] ];
dense_matrix[ X[ atom->orig_id ] * N + d_j ] = A->val[d_i];
Kurt A. O'Hearn
committed
}
}
else
{
for ( d_i = 0; d_i < row_nnz[ local_pos ]; ++d_i )
{
dense_matrix[ X[ j_list[local_pos][d_i] ] * N + d_j ] = val_list[local_pos][d_i];
}
}
}
Kurt A. O'Hearn
committed
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
/* create the right hand side of the linear equation
* that is the full column of the identity matrix */
if ( size_e < M )
{
if ( size_e )
{
sfree( e_j, "sparse_approx_inverse::e_j" );
}
size_e = M * SAFE_ZONE;
e_j = smalloc( sizeof(real) * size_e, "sparse_approx_inverse::e_j" );
}
for ( k = 0; k < M; ++k )
{
e_j[k] = 0.0;
}
e_j[identity_pos] = 1.0;
/* Solve the overdetermined system AX = B through the least-squares problem:
* min ||B - AX||_2 */
m = M;
n = N;
nrhs = 1;
lda = N;
ldb = nrhs;
info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, nrhs, dense_matrix, lda,
e_j, ldb );
/* Check for the full rank */
if ( info > 0 )
{
fprintf( stderr, "[ERROR] The diagonal element %i of the triangular factor ", info );
fprintf( stderr, "of A is zero, so that A does not have full rank;\n" );
fprintf( stderr, "the least squares solution could not be computed.\n" );
MPI_Abort( MPI_COMM_WORLD, RUNTIME_ERROR );
}
/* accumulate the resulting vector to build A_app_inv */
(*A_app_inv)->start[i] = A_spar_patt->start[i];
(*A_app_inv)->end[i] = A_spar_patt->end[i];
for ( k = (*A_app_inv)->start[i]; k < (*A_app_inv)->end[i]; ++k)
{
(*A_app_inv)->j[k] = A_spar_patt->j[k];
(*A_app_inv)->val[k] = e_j[k - A_spar_patt->start[i]];
Kurt A. O'Hearn
committed
}
}
sfree( dense_matrix, "sparse_approx_inverse::dense_matrix" );
sfree( e_j, "sparse_approx_inverse::e_j" );
sfree( X, "sparse_approx_inverse::X" );
/*for ( i = 0; i < system->N; ++i )
{
sfree( j_list[i], "sparse_approx_inverse::j_list" );
sfree( val_list[i], "sparse_approx_inverse::val_list" );
}
sfree( j_list, "sparse_approx_inverse::j_list" );
sfree( val_list, "sparse_approx_inverse::val_list" );*/
sfree( row_nnz, "sparse_approx_inverse::row_nnz" );
Kurt A. O'Hearn
committed
ret = MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM,
MASTER_NODE, MPI_COMM_WORLD );
Check_MPI_Error( ret, __FILE__, __LINE__ );
Kurt A. O'Hearn
committed
if ( system->my_rank == MASTER_NODE )
{
data->timing.cm_solver_comm += total_comm / nprocs;
}
Kurt A. O'Hearn
committed
return Get_Time( ) - start;
Kurt A. O'Hearn
committed
}
#endif
#endif
static void apply_preconditioner( const reax_system * const system,
const storage * const workspace,
const control_params * const control,
mpi_datatypes * const mpi_data,
const real * const y, real * const x,
const int fresh_pre, const int side )
{
// int i, si;
Kurt A. O'Hearn
committed
real t_start, t_pa, t_comm;
Kurt A. O'Hearn
committed
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
/* no preconditioning */
if ( control->cm_solver_pre_comp_type == NONE_PC )
{
if ( x != y )
{
Vector_Copy( x, y, system->n );
}
}
else
{
switch ( side )
{
case LEFT:
switch ( control->cm_solver_pre_app_type )
{
case TRI_SOLVE_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
jacobi_app( workspace->Hdia_inv, y, x, system->n );
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// tri_solve( workspace->L, y, x, workspace->L->n, LOWER );
// break;
case SAI_PC:
t_comm += Sparse_MatVec_Comm_Part1( system, control, mpi_data,
y, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( &workspace->H_app_inv, y, x, H->NT );
#else
Sparse_MatVec_local( &workspace->H_app_inv, y, x, system->n );
#endif
Kurt A. O'Hearn
committed
t_pa += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
/* no comm part2 because x is only local portion */
break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case TRI_SOLVE_LEVEL_SCHED_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
jacobi_app( workspace->Hdia_inv, y, x, system->n );
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// tri_solve_level_sched( (static_storage *) workspace,
// workspace->L, y, x, workspace->L->n, LOWER, fresh_pre );
// break;
case SAI_PC:
t_comm += Sparse_MatVec_Comm_Part1( system, control, mpi_data,
y, REAL_PTR_TYPE, MPI_DOUBLE );
Kurt A. O'Hearn
committed
t_start = Get_Time( );
Kurt A. O'Hearn
committed
#if defined(NEUTRAL_TERRITORY)
Sparse_MatVec_local( &workspace->H_app_inv, y, x, H->NT );
#else
Sparse_MatVec_local( &workspace->H_app_inv, y, x, system->n );
#endif
Kurt A. O'Hearn
committed
t_pa += Get_Time( ) - t_start;
Kurt A. O'Hearn
committed
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
/* no comm part2 because x is only local portion */
break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case TRI_SOLVE_GC_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
case SAI_PC:
fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
exit( INVALID_INPUT );
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// for ( i = 0; i < workspace->H->n; ++i )
// {
// workspace->y_p[i] = y[i];
// }
//
// permute_vector( workspace, workspace->y_p, workspace->H->n, FALSE, LOWER );
// tri_solve_level_sched( (static_storage *) workspace,
// workspace->L, workspace->y_p, x, workspace->L->n, LOWER, fresh_pre );
// break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
case JACOBI_ITER_PA:
switch ( control->cm_solver_pre_comp_type )
{
case JACOBI_PC:
case SAI_PC:
fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
exit( INVALID_INPUT );
break;
// case ICHOLT_PC:
// case ILUT_PC:
// case ILUTP_PC:
// // construct D^{-1}_L
// if ( fresh_pre == TRUE )
// {
// for ( i = 0; i < workspace->L->n; ++i )
// {
// si = workspace->L->start[i + 1] - 1;
// workspace->Dinv_L[i] = 1.0 / workspace->L->val[si];
// }
// }
//
// jacobi_iter( workspace, workspace->L, workspace->Dinv_L,
// y, x, LOWER, control->cm_solver_pre_app_jacobi_iters );
// break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;
default:
fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
break;