Newer
Older
/*----------------------------------------------------------------------
PuReMD - Purdue ReaxFF Molecular Dynamics Program
Copyright (2010) Purdue University
Hasan Metin Aktulga, haktulga@cs.purdue.edu
Joseph Fogarty, jcfogart@mail.usf.edu
Sagar Pandit, pandit@usf.edu
Ananth Y Grama, ayg@cs.purdue.edu
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
See the GNU General Public License for more details:
<http://www.gnu.org/licenses/>.
----------------------------------------------------------------------*/
Kurt A. O'Hearn
committed
#include "reax_types.h"
#include "allocate.h"
#include "basic_comm.h"
#include "io_tools.h"
int compare_matrix_entry(const void *v1, const void *v2)
{
return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j;
}
void Sort_Matrix_Rows( sparse_matrix *A )
{
int i, si, ei;
for ( i = 0; i < A->n; ++i )
{
si = A->start[i];
ei = A->end[i];
qsort( &(A->entries[si]), ei - si,
sizeof(sparse_matrix_entry), compare_matrix_entry );
}
void Calculate_Droptol( sparse_matrix *A, real *droptol, real dtol )
{
int i, j, k;
real val;
/* init droptol to 0 - not necessary for an upper-triangular A */
for ( i = 0; i < A->n; ++i )
/* calculate sqaure of the norm of each row */
for ( i = 0; i < A->n; ++i )
{
val = A->entries[A->start[i]].val; // diagonal entry
droptol[i] += val * val;
// only within my block
for ( k = A->start[i] + 1; A->entries[k].j < A->n; ++k )
{
j = A->entries[k].j;
val = A->entries[k].val;
droptol[i] += val * val;
droptol[j] += val * val;
}
/* calculate local droptol for each row */
//fprintf( stderr, "droptol: " );
for ( i = 0; i < A->n; ++i )
{
droptol[i] = SQRT( droptol[i] ) * dtol;
//fprintf( stderr, "%f\n", droptol[i] );
}
//fprintf( stderr, "\n" );
}
int Estimate_LU_Fill( sparse_matrix *A, real *droptol )
{
int i, j, pj;
int fillin;
real val;
fillin = 0;
for ( i = 0; i < A->n; ++i )
for ( pj = A->start[i] + 1; A->entries[pj].j < A->n; ++pj )
{
j = A->entries[pj].j;
val = A->entries[pj].val;
Kurt A. O'Hearn
committed
if ( FABS(val) > droptol[i] )
sparse_matrix_entry tmp[1000];
int i, j, pj, k1, k2, tmptop, Utop;
real val, dval;
int *Ltop;
Kurt A. O'Hearn
committed
Ltop = (int*) smalloc( A->n * sizeof(int), "ICHOLT::Ltop" );
for ( i = A->n - 1; i >= 0; --i )
{
U->start[i] = Utop;
tmptop = 0;
for ( pj = A->end[i] - 1; A->entries[pj].j >= A->n; --pj ); // skip ghosts
for ( ; pj > A->start[i]; --pj )
{
j = A->entries[pj].j;
val = A->entries[pj].val;
fprintf( stderr, "i: %d, j: %d val=%f ", i, j, val );
//fprintf( stdout, "%d %d %24.16f\n", 6540-i, 6540-j, val );
//fprintf( stdout, "%d %d %24.16f\n", 6540-j, 6540-i, val );
Kurt A. O'Hearn
committed
if ( FABS(val) > droptol[i] )
{
k1 = tmptop - 1;
k2 = U->start[j] + 1;
while ( k1 >= 0 && k2 < U->end[j] )
{
if ( tmp[k1].j < U->entries[k2].j )
}
// U matrix is upper triangular
val /= U->entries[U->start[j]].val;
fprintf( stderr, " newval=%f", val );
tmp[tmptop].j = j;
tmp[tmptop].val = val;
tmptop++;
}
fprintf( stderr, "\n" );
}
//fprintf( stderr, "i = %d - tmptop = %d\n", i, tmptop );
// compute the ith diagonal in U
dval = A->entries[A->start[i]].val;
//fprintf( stdout, "%d %d %24.16f\n", 6540-i, 6540-i, dval );
for ( k1 = 0; k1 < tmptop; ++k1 )
Kurt A. O'Hearn
committed
//if( FABS(tmp[k1].val) > droptol[i] )
dval = SQRT(dval);
// keep the diagonal in any case
U->entries[Utop].j = i;
U->entries[Utop].val = dval;
Utop++;
fprintf(stderr, "row%d: droptol=%.15f val=%.15f\n", i, droptol[i], dval);
for ( k1 = tmptop - 1; k1 >= 0; --k1 )
{
// apply the dropping rule once again
Kurt A. O'Hearn
committed
if ( FABS(tmp[k1].val) > droptol[i] / dval )
{
U->entries[Utop].j = tmp[k1].j;
U->entries[Utop].val = tmp[k1].val;
Utop++;
Ltop[tmp[k1].j]++;
fprintf( stderr, "%d(%.15f)\n", tmp[k1].j, tmp[k1].val );
}
}
U->end[i] = Utop;
//fprintf( stderr, "i = %d - Utop = %d\n", i, Utop );
fprintf( stderr, "nnz(U): %d\n", Utop );
for ( i = 0; i < U->n; ++i )
{
fprintf( stderr, "row%d: ", i );
for ( pj = U->start[i]; pj < U->end[i]; ++pj )
// transpose matrix U into L
L->start[0] = L->end[0] = 0;
for ( i = 1; i < L->n; ++i )
{
L->start[i] = L->end[i] = L->start[i - 1] + Ltop[i - 1] + 1;
//fprintf( stderr, "i=%d L->start[i]=%d\n", i, L->start[i] );
for ( pj = U->start[i]; pj < U->end[i]; ++pj )
{
j = U->entries[pj].j;
L->entries[L->end[j]].j = i;
L->entries[L->end[j]].val = U->entries[pj].val;
L->end[j]++;
}
fprintf( stderr, "nnz(L): %d\n", L->end[L->n - 1] );
for ( i = 0; i < L->n; ++i )
{
fprintf( stderr, "row%d: ", i );
for ( pj = L->start[i]; pj < L->end[i]; ++pj )
void Init_MatVec( reax_system *system, simulation_data *data,
control_params *control, storage *workspace, mpi_datatypes *mpi_data )
// if( (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 ||
Kurt A. O'Hearn
committed
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
// workspace->L == NULL )
// {
//// Print_Linear_System( system, control, workspace, data->step );
// Sort_Matrix_Rows( workspace->H );
// fprintf( stderr, "H matrix sorted\n" );
//
// Calculate_Droptol( workspace->H, workspace->droptol, control->droptol );
// fprintf( stderr, "drop tolerances calculated\n" );
//
// if( workspace->L == NULL )
// {
// fillin = Estimate_LU_Fill( workspace->H, workspace->droptol );
//
// if( Allocate_Matrix( &(workspace->L), workspace->H->cap, fillin ) == 0 ||
// Allocate_Matrix( &(workspace->U), workspace->H->cap, fillin ) == 0 )
// {
// fprintf( stderr, "not enough memory for LU matrices. terminating.\n" );
// MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY );
// }
//
// workspace->L->n = workspace->H->n;
// workspace->U->n = workspace->H->n;
//
//#if defined(DEBUG_FOCUS)
// fprintf( stderr, "p%d: n=%d, fillin = %d\n",x
// system->my_rank, workspace->L->n, fillin );
// fprintf( stderr, "p%d: allocated memory: L = U = %ldMB\n",
// system->my_rank,fillin*sizeof(sparse_matrix_entry)/(1024*1024) );
//#endif
// }
//
// ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U );
//#if defined(DEBUG_FOCUS)
// fprintf( stderr, "p%d: icholt finished\n", system->my_rank );
//// sprintf( fname, "%s.L%d.out", control->sim_name, data->step );
//// Print_Sparse_Matrix2( workspace->L, fname );
//// Print_Sparse_Matrix( U );
//#endif
// }
for ( i = 0; i < system->n; ++i )
{
atom = &( system->my_atoms[i] );
/* initialize diagonal inverse preconditioner vectors */
workspace->Hdia_inv[i] = 1. / system->reax_param.sbp[ atom->type ].eta;
/* linear extrapolation for s and for t */
// newQEq: no extrapolation!
//workspace->s[i] = 2 * atom->s[0] - atom->s[1]; //0;
//workspace->t[i] = 2 * atom->t[0] - atom->t[1]; //0;
//workspace->x[i][0] = 2 * atom->s[0] - atom->s[1]; //0;
//workspace->x[i][1] = 2 * atom->t[0] - atom->t[1]; //0;
/* quadratic extrapolation for s and t */
// workspace->s[i] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] );
// workspace->t[i] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] );
//workspace->x[i][0] = atom->s[2] + 3 * ( atom->s[0] - atom->s[1] );
workspace->x[i][1] = atom->t[2] + 3 * ( atom->t[0] - atom->t[1] );
/* cubic extrapolation for s and t */
workspace->x[i][0] = 4 * (atom->s[0] + atom->s[2]) - (6 * atom->s[1] + atom->s[3]);
//workspace->x[i][1] = 4*(atom->t[0]+atom->t[2])-(6*atom->t[1]+atom->t[3]);
Kurt A. O'Hearn
committed
// fprintf(stderr, "i=%d s=%f t=%f\n", i, workspace->s[i], workspace->t[i]);
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
/* initialize solution vectors for linear solves in charge method */
switch ( control->charge_method )
{
case QEQ_CM:
for ( i = 0; i < system->n; ++i )
{
atom = &( system->my_atoms[i] );
workspace->b_s[i] = -system->reax_param.sbp[ atom->type ].chi;
workspace->b_t[i] = -1.0;
workspace->b[i][0] = -system->reax_param.sbp[ atom->type ].chi;
workspace->b[i][1] = -1.0;
}
break;
case EE_CM:
for ( i = 0; i < system->n; ++i )
{
atom = &( system->my_atoms[i] );
workspace->b_s[i] = -system->reax_param.sbp[ atom->type ].chi;
//TODO: check if unused (redundant)
workspace->b[i][0] = -system->reax_param.sbp[ atom->type ].chi;
}
if ( system->my_rank == 0 )
{
workspace->b_s[system->n] = control->cm_q_net;
workspace->b[system->n][0] = control->cm_q_net;
}
break;
case ACKS2_CM:
for ( i = 0; i < system->n; ++i )
{
atom = &( system->my_atoms[i] );
workspace->b_s[i] = -system->reax_param.sbp[ atom->type ].chi;
//TODO: check if unused (redundant)
workspace->b[i][0] = -system->reax_param.sbp[ atom->type ].chi;
}
if ( system->my_rank == 0 )
{
workspace->b_s[system->n] = control->cm_q_net;
workspace->b[system->n][0] = control->cm_q_net;
}
for ( i = system->n + 1; i < system->N_cm; ++i )
{
atom = &( system->my_atoms[i] );
workspace->b_s[i] = 0.0;
//TODO: check if unused (redundant)
workspace->b[i][0] = 0.0;
}
if ( system->my_rank == 0 )
{
workspace->b_s[system->n] = control->cm_q_net;
workspace->b[system->n][0] = control->cm_q_net;
}
break;
default:
fprintf( stderr, "Unknown charge method type. Terminating...\n" );
exit( INVALID_INPUT );
break;
}
void Calculate_Charges( reax_system *system, storage *workspace,
Kurt A. O'Hearn
committed
int i, scale;
real u;//, s_sum, t_sum;
rvec2 my_sum, all_sum;
reax_atom *atom;
real *q;
scale = sizeof(real) / sizeof(void);
Kurt A. O'Hearn
committed
q = (real*) smalloc( system->N * sizeof(real), "Calculate_Charges::q" );
//s_sum = Parallel_Vector_Acc(workspace->s, system->n, mpi_data->world);
//t_sum = Parallel_Vector_Acc(workspace->t, system->n, mpi_data->world);
my_sum[0] = my_sum[1] = 0;
for ( i = 0; i < system->n; ++i )
{
my_sum[0] += workspace->x[i][0];
my_sum[1] += workspace->x[i][1];
}
Kurt A. O'Hearn
committed
#if defined(DEBUG)
fprintf( stderr, "Host : my_sum[0]: %f and %f \n", my_sum[0], my_sum[1] );
Kurt A. O'Hearn
committed
#endif
MPI_Allreduce( &my_sum, &all_sum, 2, MPI_DOUBLE, MPI_SUM, mpi_data->world );
u = all_sum[0] / all_sum[1];
Kurt A. O'Hearn
committed
#if defined(DEBUG)
fprintf( stderr, "Host : u: %f \n", u );
Kurt A. O'Hearn
committed
#endif
for ( i = 0; i < system->n; ++i )
{
atom = &( system->my_atoms[i] );
/* compute charge based on s & t */
//atom->q = workspace->s[i] - u * workspace->t[i];
q[i] = atom->q = workspace->x[i][0] - u * workspace->x[i][1];
/* backup s & t */
atom->s[3] = atom->s[2];
atom->s[2] = atom->s[1];
atom->s[1] = atom->s[0];
//atom->s[0] = workspace->s[i];
atom->s[0] = workspace->x[i][0];
atom->t[3] = atom->t[2];
atom->t[2] = atom->t[1];
atom->t[1] = atom->t[0];
//atom->t[0] = workspace->t[i];
atom->t[0] = workspace->x[i][1];
}
Dist( system, mpi_data, q, MPI_DOUBLE, scale, real_packer );
for ( i = system->n; i < system->N; ++i )
Kurt A. O'Hearn
committed
sfree( q, "Calculate_Charges::q" );
Kurt A. O'Hearn
committed
void QEq( reax_system *system, control_params *control, simulation_data *data,
storage *workspace, output_controls *out_control,
mpi_datatypes *mpi_data )
int s_matvecs, t_matvecs;
Init_MatVec( system, data, control, workspace, mpi_data );
//if( data->step == 50010 ) {
// Print_Linear_System( system, control, workspace, data->step );
// }
fprintf( stderr, "p%d: initialized qEq\n", system->my_rank );
//Print_Linear_System( system, control, workspace, data->step );
s_matvecs = dual_CG( system, workspace, &workspace->H, workspace->b,
control->cm_solver_q_err, workspace->x, mpi_data, out_control->log, data );
t_matvecs = 0;
//fprintf (stderr, "Host: First CG complated with iterations: %d \n", s_matvecs);
//s_matvecs = CG(system, workspace, workspace->H, workspace->b_s, //newQEq sCG
// control->cm_solver_q_err, workspace->s, mpi_data, out_control->log );
//s_matvecs = PCG( system, workspace, workspace->H, workspace->b_s,
// control->cm_solver_q_err, workspace->L, workspace->U, workspace->s,
fprintf( stderr, "p%d: first CG completed\n", system->my_rank );
//t_matvecs = CG(system, workspace, workspace->H, workspace->b_t, //newQEq sCG
// control->cm_solver_q_err, workspace->t, mpi_data, out_control->log );
//t_matvecs = PCG( system, workspace, workspace->H, workspace->b_t,
// control->cm_solver_q_err, workspace->L, workspace->U, workspace->t,
fprintf( stderr, "p%d: second CG completed\n", system->my_rank );
fprintf( stderr, "p%d: computed charges\n", system->my_rank );
//Print_Charges( system );
if ( system->my_rank == MASTER_NODE )
{
data->timing.s_matvecs += s_matvecs;
data->timing.t_matvecs += t_matvecs;
}