Skip to content
Snippets Groups Projects
charges.c 51.3 KiB
Newer Older
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
/*----------------------------------------------------------------------
  SerialReax - Reax Force Field Simulator
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  Copyright (2010) Purdue University
  Hasan Metin Aktulga, haktulga@cs.purdue.edu
  Joseph Fogarty, jcfogart@mail.usf.edu
  Sagar Pandit, pandit@usf.edu
  Ananth Y Grama, ayg@cs.purdue.edu
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License as
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  published by the Free Software Foundation; either version 2 of
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  the License, or (at your option) any later version.
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
  See the GNU General Public License for more details:
  <http://www.gnu.org/licenses/>.
  ----------------------------------------------------------------------*/

Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#include "allocate.h"
#include "list.h"
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
#include "print_utils.h"
#if defined(HAVE_SUPERLU_MT)
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

static void Extrapolate_Charges_QEq( const reax_system * const system,
        const control_params * const control,
        simulation_data * const data, static_storage * const workspace )
{
    int i;
    real s_tmp, t_tmp;

    /* extrapolation for s & t */
    //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer
    #pragma omp parallel for schedule(static) \
        default(none) private(i, s_tmp, t_tmp)
#endif
    for ( i = 0; i < system->N_cm; ++i )
    {
        // no extrapolation
        //s_tmp = workspace->s[0][i];
        //t_tmp = workspace->t[0][i];

        // linear
        //s_tmp = 2 * workspace->s[0][i] - workspace->s[1][i];
        //t_tmp = 2 * workspace->t[0][i] - workspace->t[1][i];

        // quadratic
        //s_tmp = workspace->s[2][i] + 3 * (workspace->s[0][i]-workspace->s[1][i]);
        t_tmp = workspace->t[2][i] + 3 * (workspace->t[0][i] - workspace->t[1][i]);

        // cubic
        s_tmp = 4 * (workspace->s[0][i] + workspace->s[2][i]) -
                (6 * workspace->s[1][i] + workspace->s[3][i] );
        //t_tmp = 4 * (workspace->t[0][i] + workspace->t[2][i]) -
        //  (6 * workspace->t[1][i] + workspace->t[3][i] );

        // 4th order
        //s_tmp = 5 * (workspace->s[0][i] - workspace->s[3][i]) +
        //  10 * (-workspace->s[1][i] + workspace->s[2][i] ) + workspace->s[4][i];
        //t_tmp = 5 * (workspace->t[0][i] - workspace->t[3][i]) +
        //  10 * (-workspace->t[1][i] + workspace->t[2][i] ) + workspace->t[4][i];

        workspace->s[4][i] = workspace->s[3][i];
        workspace->s[3][i] = workspace->s[2][i];
        workspace->s[2][i] = workspace->s[1][i];
        workspace->s[1][i] = workspace->s[0][i];
        workspace->s[0][i] = s_tmp;

        workspace->t[4][i] = workspace->t[3][i];
        workspace->t[3][i] = workspace->t[2][i];
        workspace->t[2][i] = workspace->t[1][i];
        workspace->t[1][i] = workspace->t[0][i];
        workspace->t[0][i] = t_tmp;
    }
}


static void Extrapolate_Charges_EE( const reax_system * const system,
        const control_params * const control,
        simulation_data * const data, static_storage * const workspace )
{
    int i;
    real s_tmp;

    /* extrapolation for s */
    //TODO: good candidate for vectorization, avoid moving data with head pointer and circular buffer
    #pragma omp parallel for schedule(static) \
        default(none) private(i, s_tmp)
    for ( i = 0; i < system->N_cm; ++i )
    {
        // no extrapolation
        //s_tmp = workspace->s[0][i];

        // linear
        //s_tmp = 2 * workspace->s[0][i] - workspace->s[1][i];

        // quadratic
        //s_tmp = workspace->s[2][i] + 3 * (workspace->s[0][i]-workspace->s[1][i]);

        // cubic
        s_tmp = 4 * (workspace->s[0][i] + workspace->s[2][i]) -
                (6 * workspace->s[1][i] + workspace->s[3][i] );

        // 4th order
        //s_tmp = 5 * (workspace->s[0][i] - workspace->s[3][i]) +
        //  10 * (-workspace->s[1][i] + workspace->s[2][i] ) + workspace->s[4][i];

        workspace->s[4][i] = workspace->s[3][i];
        workspace->s[3][i] = workspace->s[2][i];
        workspace->s[2][i] = workspace->s[1][i];
        workspace->s[1][i] = workspace->s[0][i];
        workspace->s[0][i] = s_tmp;
    }
}


/* Compute preconditioner for QEq
static void Compute_Preconditioner_QEq( const reax_system * const system,
        const control_params * const control,
        simulation_data * const data, static_storage * const workspace,
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
{
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

    if ( control->cm_domain_sparsify_enabled == TRUE )
#if defined(TEST_MAT)
    Hptr = create_test_mat( );
#endif

    time = Get_Time( );
    if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA )
    {
        setup_graph_coloring( control, workspace, Hptr, &workspace->H_full, &workspace->H_p );
        Sort_Matrix_Rows( workspace->H_p );
        Hptr = workspace->H_p;
    }
    data->timing.cm_sort_mat_rows += Get_Timing_Info( time );

    switch ( control->cm_solver_pre_comp_type )
    {
        case DIAG_PC:
            data->timing.cm_solver_pre_comp +=
                diag_pre_comp( Hptr, workspace->Hdia_inv );
            break;
        case ICHOLT_PC:
            data->timing.cm_solver_pre_comp +=
                ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U );
            break;
        case ILU_PAR_PC:
            data->timing.cm_solver_pre_comp +=
                ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U );
            break;

        case ILUT_PAR_PC:
            data->timing.cm_solver_pre_comp +=
                ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps,
                        workspace->L, workspace->U );
            break;
#if defined(HAVE_SUPERLU_MT)
            data->timing.cm_solver_pre_comp +=
                SuperLU_Factorize( Hptr, workspace->L, workspace->U );
            fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" );
            exit( INVALID_INPUT );
#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
            data->timing.cm_solver_pre_comp +=
                sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full,
                        &workspace->H_app_inv );
#else
            fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" );
            exit( INVALID_INPUT );
#endif
        default:
            fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" );
            exit( INVALID_INPUT );
            break;
#define SIZE (1000)
    char fname[SIZE];

    if ( control->cm_solver_pre_comp_type != NONE_PC && 
            control->cm_solver_pre_comp_type != DIAG_PC )
    {
        fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) );

#if defined(DEBUG_FOCUS)
        snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step );
        Print_Sparse_Matrix2( workspace->L, fname, NULL );
        snprintf( fname, SIZE + 10, "%s.U%d.out", control->sim_name, data->step );
        Print_Sparse_Matrix2( workspace->U, fname, NULL );
#undef SIZE
/* Compute preconditioner for EE
//static void Compute_Preconditioner_EE( const reax_system * const system,
//        const control_params * const control,
//        simulation_data * const data, static_storage * const workspace,
//{
//    int i, top;
//    static real * ones = NULL, * x = NULL, * y = NULL;
//    sparse_matrix *Hptr;
//
//    Hptr = workspace->H_EE;
//
//#if defined(TEST_MAT)
//    Hptr = create_test_mat( );
//#endif
//
//    if ( ones == NULL )
//    {
//        ones = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::ones" );
//        x = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::x" );
//        y = (real*) smalloc( system->N * sizeof(real), "Compute_Preconditioner_EE::y" );
//
//        for ( i = 0; i < system->N; ++i )
//        {
//            ones[i] = 1.0;
//        }
//    }
//
//    switch ( control->cm_solver_pre_comp_type )
//    {
//    case DIAG_PC:
//        data->timing.cm_solver_pre_comp +=
//            diag_pre_comp( Hptr, workspace->Hdia_inv );
//        break;
//
//    case ICHOLT_PC:
//        data->timing.cm_solver_pre_comp +=
//            ICHOLT( Hptr, workspace->droptol, workspace->L_EE, workspace->U_EE );
//        break;
//
//    case ILU_PAR_PC:
//        data->timing.cm_solver_pre_comp +=
//            ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L_EE, workspace->U_EE );
//        break;
//
//    case ILUT_PAR_PC:
//        data->timing.cm_solver_pre_comp +=
//            ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps,
//                    workspace->L_EE, workspace->U_EE );
//        break;
//
//    case ILU_SUPERLU_MT_PC:
//#if defined(HAVE_SUPERLU_MT)
//        data->timing.cm_solver_pre_comp +=
//            SuperLU_Factorize( Hptr, workspace->L_EE, workspace->U_EE );
//#else
//        fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" );
//        exit( INVALID_INPUT );
//#endif
//        break;
//
//    default:
//        fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" );
//        exit( INVALID_INPUT );
//        break;
//    }
//
//    if ( control->cm_solver_pre_comp_type != DIAG_PC )
//    {
//        switch ( control->cm_solver_pre_app_type )
//        {
//            case TRI_SOLVE_PA:
//                tri_solve( workspace->L_EE, ones, x, workspace->L_EE->n, LOWER );
//                Transpose_I( workspace->U_EE );
//                tri_solve( workspace->U_EE, ones, y, workspace->U_EE->n, LOWER );
//                Transpose_I( workspace->U_EE );
//
//                memcpy( workspace->L->start, workspace->L_EE->start, sizeof(unsigned int) * (system->N + 1) );
//                memcpy( workspace->L->j, workspace->L_EE->j, sizeof(unsigned int) * workspace->L_EE->start[workspace->L_EE->n] );
//                memcpy( workspace->L->val, workspace->L_EE->val, sizeof(real) * workspace->L_EE->start[workspace->L_EE->n] );
//
//                top = workspace->L->start[system->N];
//                for ( i = 0; i < system->N; ++i )
//                {
//                    workspace->L->j[top] = i;
//                    workspace->L->val[top] = x[i];
//                    ++top;
//                }
//
//                workspace->L->j[top] = system->N_cm - 1;
//                workspace->L->val[top] = 1.0;
//                ++top;
//
//                workspace->L->start[system->N_cm] = top;
//
//                top = 0;
//                for ( i = 0; i < system->N; ++i )
//                {
//                    workspace->U->start[i] = top;
//                    memcpy( workspace->U->j + top, workspace->U_EE->j + workspace->U_EE->start[i],
//                            sizeof(unsigned int) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) );
//                    memcpy( workspace->U->val + top, workspace->U_EE->val + workspace->U_EE->start[i],
//                            sizeof(real) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) );
//                    top += (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]);
//
//                    workspace->U->j[top] = system->N_cm - 1;
//                    workspace->U->val[top] = y[i];
//                    ++top;
//                }
//
//                workspace->U->start[system->N_cm - 1] = top;
//
//                workspace->U->j[top] = system->N_cm - 1;
//                workspace->U->val[top] = -Dot( x, y, system->N );
//                ++top;
//
//                workspace->U->start[system->N_cm] = top;
//                break;
//
//            case TRI_SOLVE_LEVEL_SCHED_PA:
//                tri_solve_level_sched( workspace->L_EE, ones, x, workspace->L_EE->n, LOWER, TRUE );
//                Transpose_I( workspace->U_EE );
//                tri_solve_level_sched( workspace->U_EE, ones, y, workspace->U_EE->n, LOWER, TRUE );
//                Transpose_I( workspace->U_EE );
//
//                memcpy( workspace->L->start, workspace->L_EE->start, sizeof(unsigned int) * (system->N + 1) );
//                memcpy( workspace->L->j, workspace->L_EE->j, sizeof(unsigned int) * workspace->L_EE->start[workspace->L_EE->n] );
//                memcpy( workspace->L->val, workspace->L_EE->val, sizeof(real) * workspace->L_EE->start[workspace->L_EE->n] );
//
//                top = workspace->L->start[system->N];
//                for ( i = 0; i < system->N; ++i )
//                {
//                    workspace->L->j[top] = i;
//                    workspace->L->val[top] = x[i];
//                    ++top;
//                }
//
//                workspace->L->j[top] = system->N_cm - 1;
//                workspace->L->val[top] = 1.0;
//                ++top;
//
//                workspace->L->start[system->N_cm] = top;
//
//                top = 0;
//                for ( i = 0; i < system->N; ++i )
//                {
//                    workspace->U->start[i] = top;
//                    memcpy( workspace->U->j + top, workspace->U_EE->j + workspace->U_EE->start[i],
//                            sizeof(unsigned int) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) );
//                    memcpy( workspace->U->val + top, workspace->U_EE->val + workspace->U_EE->start[i],
//                            sizeof(real) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) );
//                    top += (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]);
//
//                    workspace->U->j[top] = system->N_cm - 1;
//                    workspace->U->val[top] = y[i];
//                    ++top;
//                }
//
//                workspace->U->start[system->N_cm - 1] = top;
//
//                workspace->U->j[top] = system->N_cm - 1;
//                workspace->U->val[top] = -Dot( x, y, system->N );
//                ++top;
//
//                workspace->U->start[system->N_cm] = top;
//                break;
//
//            //TODO: add Jacobi iter, etc.?
//            default:
//                tri_solve( workspace->L_EE, ones, x, workspace->L_EE->n, LOWER );
//                Transpose_I( workspace->U_EE );
//                tri_solve( workspace->U_EE, ones, y, workspace->U_EE->n, LOWER );
//                Transpose_I( workspace->U_EE );
//
//                memcpy( workspace->L->start, workspace->L_EE->start, sizeof(unsigned int) * (system->N + 1) );
//                memcpy( workspace->L->j, workspace->L_EE->j, sizeof(unsigned int) * workspace->L_EE->start[workspace->L_EE->n] );
//                memcpy( workspace->L->val, workspace->L_EE->val, sizeof(real) * workspace->L_EE->start[workspace->L_EE->n] );
//
//                top = workspace->L->start[system->N];
//                for ( i = 0; i < system->N; ++i )
//                {
//                    workspace->L->j[top] = i;
//                    workspace->L->val[top] = x[i];
//                    ++top;
//                }
//
//                workspace->L->j[top] = system->N_cm - 1;
//                workspace->L->val[top] = 1.0;
//                ++top;
//
//                workspace->L->start[system->N_cm] = top;
//
//                top = 0;
//                for ( i = 0; i < system->N; ++i )
//                {
//                    workspace->U->start[i] = top;
//                    memcpy( workspace->U->j + top, workspace->U_EE->j + workspace->U_EE->start[i],
//                            sizeof(unsigned int) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) );
//                    memcpy( workspace->U->val + top, workspace->U_EE->val + workspace->U_EE->start[i],
//                            sizeof(real) * (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]) );
//                    top += (workspace->U_EE->start[i + 1] - workspace->U_EE->start[i]);
//
//                    workspace->U->j[top] = system->N_cm - 1;
//                    workspace->U->val[top] = y[i];
//                    ++top;
//                }
//
//                workspace->U->start[system->N_cm - 1] = top;
//
//                workspace->U->j[top] = system->N_cm - 1;
//                workspace->U->val[top] = -Dot( x, y, system->N );
//                ++top;
//
//                workspace->U->start[system->N_cm] = top;
//                break;
//        }
//    }
//
//#if defined(DEBUG)
//#define SIZE (1000)
//    char fname[SIZE];
//
//    if ( control->cm_solver_pre_comp_type != DIAG_PC )
//    {
//        fprintf( stderr, "condest = %f\n", condest(workspace->L) );
//
//#if defined(DEBUG_FOCUS)
//        snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step );
//        Print_Sparse_Matrix2( workspace->L, fname, NULL );
//        snprintf( fname, SIZE + 10, "%s.U%d.out", control->sim_name, data->step );
//        Print_Sparse_Matrix2( workspace->U, fname, NULL );
//
//        fprintf( stderr, "icholt-" );
//        snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step );
//        Print_Sparse_Matrix2( workspace->L, fname, NULL );
//        Print_Sparse_Matrix( U );
//#endif
//    }
//#undef SIZE
//#endif
//}


/* Compute preconditioner for EE
 */
static void Compute_Preconditioner_EE( const reax_system * const system,
        const control_params * const control,
        simulation_data * const data, static_storage * const workspace,
    if ( control->cm_domain_sparsify_enabled == TRUE )
    {
        Hptr = workspace->H_sp;
    }
    else
    {
        Hptr = workspace->H;
    }
#if defined(TEST_MAT)
    Hptr = create_test_mat( );
#endif

    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
    {
        Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0;
    }

    time = Get_Time( );
    if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA )
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed
    {
        setup_graph_coloring( control, workspace, Hptr, &workspace->H_full, &workspace->H_p );
        Sort_Matrix_Rows( workspace->H_p );
        Hptr = workspace->H_p;
    data->timing.cm_sort_mat_rows += Get_Timing_Info( time );
    
    switch ( control->cm_solver_pre_comp_type )
    {
        case DIAG_PC:
            data->timing.cm_solver_pre_comp +=
                diag_pre_comp( Hptr, workspace->Hdia_inv );
            break;
        case ICHOLT_PC:
            data->timing.cm_solver_pre_comp +=
                ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U );
            break;
        case ILU_PAR_PC:
            data->timing.cm_solver_pre_comp +=
                ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U );
            break;

        case ILUT_PAR_PC:
            data->timing.cm_solver_pre_comp +=
                ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps,
                        workspace->L, workspace->U );
            break;
#if defined(HAVE_SUPERLU_MT)
            data->timing.cm_solver_pre_comp +=
                SuperLU_Factorize( Hptr, workspace->L, workspace->U );
            fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" );
            exit( INVALID_INPUT );
#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
            data->timing.cm_solver_pre_comp +=
                sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full,
                        &workspace->H_app_inv );
#else
            fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" );
            exit( INVALID_INPUT );
#endif
        default:
            fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" );
            exit( INVALID_INPUT );
            break;
    if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA )
    {
        if ( control->cm_domain_sparsify_enabled == TRUE )
        {
            Hptr = workspace->H_sp;
        }
        else
        {
            Hptr = workspace->H;
        }
    }

    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
    {
        Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0;
    }
#define SIZE (1000)
    char fname[SIZE];

    if ( control->cm_solver_pre_comp_type != NONE_PC && 
            control->cm_solver_pre_comp_type != DIAG_PC )
        fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) );
        snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step );
        Print_Sparse_Matrix2( workspace->L, fname, NULL );
        snprintf( fname, SIZE + 10, "%s.U%d.out", control->sim_name, data->step );
        Print_Sparse_Matrix2( workspace->U, fname, NULL );
#undef SIZE
/* Compute preconditioner for ACKS2
 */
static void Compute_Preconditioner_ACKS2( const reax_system * const system,
        const control_params * const control,
        simulation_data * const data, static_storage * const workspace,
    sparse_matrix *Hptr;

    if ( control->cm_domain_sparsify_enabled == TRUE )
    {
        Hptr = workspace->H_sp;
    }
    else
    {
        Hptr = workspace->H;
    }

#if defined(TEST_MAT)
    Hptr = create_test_mat( );
#endif

    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
    {
        Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0;
        Hptr->val[Hptr->start[system->N_cm] - 1] = 1.0;
    }

    time = Get_Time( );
    if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA )
    {
        setup_graph_coloring( control, workspace, Hptr, &workspace->H_full, &workspace->H_p );
        Sort_Matrix_Rows( workspace->H_p );
        Hptr = workspace->H_p;
    }
    data->timing.cm_sort_mat_rows += Get_Timing_Info( time );
        case DIAG_PC:
            data->timing.cm_solver_pre_comp +=
                diag_pre_comp( Hptr, workspace->Hdia_inv );
            break;
        case ICHOLT_PC:
            data->timing.cm_solver_pre_comp +=
                ICHOLT( Hptr, workspace->droptol, workspace->L, workspace->U );
            break;
        case ILU_PAR_PC:
            data->timing.cm_solver_pre_comp +=
                ILU_PAR( Hptr, control->cm_solver_pre_comp_sweeps, workspace->L, workspace->U );
            break;

        case ILUT_PAR_PC:
            data->timing.cm_solver_pre_comp +=
                ILUT_PAR( Hptr, workspace->droptol, control->cm_solver_pre_comp_sweeps,
                        workspace->L, workspace->U );
            break;
            data->timing.cm_solver_pre_comp +=
                SuperLU_Factorize( Hptr, workspace->L, workspace->U );
            fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" );
            exit( INVALID_INPUT );
#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
            data->timing.cm_solver_pre_comp +=
                sparse_approx_inverse( workspace->H_full, workspace->H_spar_patt_full,
                        &workspace->H_app_inv );
#else
            fprintf( stderr, "LAPACKE support disabled. Re-compile before enabling. Terminating...\n" );
            exit( INVALID_INPUT );
#endif
        default:
            fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" );
            exit( INVALID_INPUT );
            break;
    if ( control->cm_solver_pre_app_type == TRI_SOLVE_GC_PA )
    {
        if ( control->cm_domain_sparsify_enabled == TRUE )
        {
            Hptr = workspace->H_sp;
        }
        else
        {
            Hptr = workspace->H;
        }
    }

    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
    {
        Hptr->val[Hptr->start[system->N + 1] - 1] = 0.0;
        Hptr->val[Hptr->start[system->N_cm] - 1] = 0.0;
    }
#define SIZE (1000)
    char fname[SIZE];

    if ( control->cm_solver_pre_comp_type != NONE_PC || 
            control->cm_solver_pre_comp_type != DIAG_PC )
    {
        fprintf( stderr, "condest = %f\n", condest(workspace->L, workspace->U) );

#if defined(DEBUG_FOCUS)
        snprintf( fname, SIZE + 10, "%s.L%d.out", control->sim_name, data->step );
        Print_Sparse_Matrix2( workspace->L, fname, NULL );
        snprintf( fname, SIZE + 10, "%s.U%d.out", control->sim_name, data->step );
        Print_Sparse_Matrix2( workspace->U, fname, NULL );
#undef SIZE
static void Setup_Preconditioner_QEq( const reax_system * const system,
        const control_params * const control,
        simulation_data * const data, static_storage * const workspace,
    if ( control->cm_domain_sparsify_enabled == TRUE )
    {
        Hptr = workspace->H_sp;
    }
    else
    {
        Hptr = workspace->H;
    }

    /* sort H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */
    time = Get_Time( );
    Sort_Matrix_Rows( workspace->H );
    if ( control->cm_domain_sparsify_enabled == TRUE )
    {
        Sort_Matrix_Rows( workspace->H_sp );
    }
    data->timing.cm_sort_mat_rows += Get_Timing_Info( time );

    switch ( control->cm_solver_pre_comp_type )
    {
        case NONE_PC:
            break;

        case DIAG_PC:
            if ( workspace->Hdia_inv == NULL )
                workspace->Hdia_inv = (real *) scalloc( Hptr->n, sizeof( real ),
                        "Setup_Preconditioner_QEq::workspace->Hdiv_inv" );
        case ICHOLT_PC:
            Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol );
            fillin = Estimate_LU_Fill( Hptr, workspace->droptol );
                if ( Allocate_Matrix( &(workspace->L), Hptr->n, fillin ) == FAILURE ||
                        Allocate_Matrix( &(workspace->U), Hptr->n, fillin ) == FAILURE )
                {
                    fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                    exit( INSUFFICIENT_MEMORY );
                }
        case ILU_PAR_PC:
            if ( workspace->L == NULL )
                /* factors have sparsity pattern as H */
                if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == FAILURE ||
                        Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == FAILURE )
                {
                    fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                    exit( INSUFFICIENT_MEMORY );
                }
Kurt A. O'Hearn's avatar
Kurt A. O'Hearn committed

        case ILUT_PAR_PC:
            Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol );
                /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */
                if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == FAILURE ||
                        Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == FAILURE )
                {
                    fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                    exit( INSUFFICIENT_MEMORY );
                }
        case ILU_SUPERLU_MT_PC:
            if ( workspace->L == NULL )
                /* factors have sparsity pattern as H */
                if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == FAILURE ||
                        Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == FAILURE )
                {
                    fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                    exit( INSUFFICIENT_MEMORY );
                }
            setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt,
                    &workspace->H_spar_patt_full, &workspace->H_app_inv,
                    control->cm_solver_pre_comp_sai_thres );
        default:
            fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" );
            exit( INVALID_INPUT );
            break;
/* Setup routines before computing the preconditioner for EE
static void Setup_Preconditioner_EE( const reax_system * const system,
        const control_params * const control,
        simulation_data * const data, static_storage * const workspace,
    if ( control->cm_domain_sparsify_enabled == TRUE )
    {
        Hptr = workspace->H_sp;
    }
    else
    {
        Hptr = workspace->H;
    }

    /* sorted H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */
    time = Get_Time( );
    Sort_Matrix_Rows( workspace->H );
    if ( control->cm_domain_sparsify_enabled == TRUE )
    {
        Sort_Matrix_Rows( workspace->H_sp );
    }
    data->timing.cm_sort_mat_rows += Get_Timing_Info( time );
    if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
            control->cm_solver_pre_comp_type == ILU_PAR_PC ||
            control->cm_solver_pre_comp_type == ILUT_PAR_PC )
    {
        Hptr->val[Hptr->start[system->N + 1] - 1] = 1.0;
    }
    switch ( control->cm_solver_pre_comp_type )
    {
        case NONE_PC:
            break;

        case DIAG_PC:
            if ( workspace->Hdia_inv == NULL )
                workspace->Hdia_inv = (real *) scalloc( system->N_cm, sizeof( real ),
                        "Setup_Preconditioner_QEq::workspace->Hdiv_inv" );
        case ICHOLT_PC:
            Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol );
            fillin = Estimate_LU_Fill( Hptr, workspace->droptol );
                if ( Allocate_Matrix( &(workspace->L), system->N_cm, fillin + system->N_cm ) == FAILURE ||
                        Allocate_Matrix( &(workspace->U), system->N_cm, fillin + system->N_cm ) == FAILURE )
                {
                    fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                    exit( INSUFFICIENT_MEMORY );
                }
        case ILU_PAR_PC:
            if ( workspace->L == NULL )
            {
                /* factors have sparsity pattern as H */
                if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == FAILURE ||
                        Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == FAILURE )
                {
                    fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                    exit( INSUFFICIENT_MEMORY );
                }
            }
            else
        case ILUT_PAR_PC:
            Calculate_Droptol( Hptr, workspace->droptol, control->cm_solver_pre_comp_droptol );
            if ( workspace->L == NULL )
            {
                /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */
                if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == FAILURE ||
                        Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == FAILURE )
                {
                    fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                    exit( INSUFFICIENT_MEMORY );
                }
            }
            else
        case ILU_SUPERLU_MT_PC:
            if ( workspace->L == NULL )
                /* factors have sparsity pattern as H */
                if ( Allocate_Matrix( &(workspace->L), Hptr->n, Hptr->m ) == FAILURE ||
                        Allocate_Matrix( &(workspace->U), Hptr->n, Hptr->m ) == FAILURE )
                {
                    fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" );
                    exit( INSUFFICIENT_MEMORY );
                }
            setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt,
                    &workspace->H_spar_patt_full, &workspace->H_app_inv,