From 6a2464f4160d9459e680db44745ff3c6c5ef7b37 Mon Sep 17 00:00:00 2001
From: Abdullah Alperen <alperena@msu.edu>
Date: Mon, 29 Jan 2018 14:49:06 -0500
Subject: [PATCH] SAI setup and computation bugs fixed

---
 sPuReMD/src/charges.c |   6 +-
 sPuReMD/src/init_md.c |   2 +
 sPuReMD/src/lin_alg.c | 631 +++++++++++++++++++++---------------------
 sPuReMD/src/lin_alg.h |   2 +-
 sPuReMD/src/mytypes.h |   2 +-
 5 files changed, 328 insertions(+), 315 deletions(-)

diff --git a/sPuReMD/src/charges.c b/sPuReMD/src/charges.c
index 237faa55..9d2742cd 100644
--- a/sPuReMD/src/charges.c
+++ b/sPuReMD/src/charges.c
@@ -862,7 +862,7 @@ static void Setup_Preconditioner_QEq( const reax_system * const system,
 
         case SAI_PC:
             Setup_Sparsity_Pattern( Hptr, control->cm_solver_pre_comp_sai_thres,
-                    workspace->H_spar_patt );
+                    &workspace->H_spar_patt );
             break;
 
         default:
@@ -1014,7 +1014,7 @@ static void Setup_Preconditioner_EE( const reax_system * const system,
 
         case SAI_PC:
             Setup_Sparsity_Pattern( Hptr, control->cm_solver_pre_comp_sai_thres,
-                    workspace->H_spar_patt );
+                    &workspace->H_spar_patt );
             break;
 
         default:
@@ -1168,7 +1168,7 @@ static void Setup_Preconditioner_ACKS2( const reax_system * const system,
 
         case SAI_PC:
             Setup_Sparsity_Pattern( Hptr, control->cm_solver_pre_comp_sai_thres,
-                    workspace->H_spar_patt );
+                    &workspace->H_spar_patt );
             break;
 
         default:
diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c
index fc9c46ba..d392aa3c 100644
--- a/sPuReMD/src/init_md.c
+++ b/sPuReMD/src/init_md.c
@@ -321,6 +321,8 @@ void Init_Workspace( reax_system *system, control_params *control,
     workspace->H = NULL;
     workspace->H_sp = NULL;
     workspace->L = NULL;
+    workspace->H_spar_patt = NULL;
+    workspace->H_app_inv = NULL;
     workspace->U = NULL;
     workspace->Hdia_inv = NULL;
     if ( control->cm_solver_pre_comp_type == ICHOLT_PC ||
diff --git a/sPuReMD/src/lin_alg.c b/sPuReMD/src/lin_alg.c
index b59d8f5c..ccdfce52 100644
--- a/sPuReMD/src/lin_alg.c
+++ b/sPuReMD/src/lin_alg.c
@@ -20,17 +20,18 @@
   ----------------------------------------------------------------------*/
 
 #include "lin_alg.h"
-
 #include "allocate.h"
 #include "tool_box.h"
 #include "vector.h"
 
+#include "print_utils.h"
+
 /* Intel MKL */
 #if defined(HAVE_LAPACKE_MKL)
-  #include "mkl.h"
+#include "mkl.h"
 /* reference LAPACK */
 #elif defined(HAVE_LAPACKE)
-  #include "lapacke.h"
+#include "lapacke.h"
 #endif
 
 typedef struct
@@ -194,7 +195,7 @@ void Sort_Matrix_Rows( sparse_matrix * const A )
  *   A has non-zero diagonals
  *   Each row of A has at least one non-zero (i.e., no rows with all zeros) */
 static void compute_full_sparse_matrix( const sparse_matrix * const A,
-                                        sparse_matrix ** A_full )
+        sparse_matrix ** A_full )
 {
     int count, i, pj;
     sparse_matrix *A_t;
@@ -207,6 +208,8 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A,
             exit( INSUFFICIENT_MEMORY );
         }
     }
+
+
     if ( Allocate_Matrix( &A_t, A->n, A->m ) == FAILURE )
     {
         fprintf( stderr, "not enough memory for full A. terminating.\n" );
@@ -219,6 +222,9 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A,
     count = 0;
     for ( i = 0; i < A->n; ++i )
     {
+
+        if((*A_full)->start == NULL){
+        }
         (*A_full)->start[i] = count;
 
         /* A: symmetric, lower triangular portion only stored */
@@ -253,7 +259,7 @@ static void compute_full_sparse_matrix( const sparse_matrix * const A,
  *   A has non-zero diagonals
  *   Each row of A has at least one non-zero (i.e., no rows with all zeros) */
 void Setup_Sparsity_Pattern( const sparse_matrix * const A,
-                             const real filter, sparse_matrix * A_spar_patt )
+        const real filter, sparse_matrix ** A_spar_patt )
 {
     int i, pj, size;
     real min, max, threshold, val;
@@ -261,18 +267,18 @@ void Setup_Sparsity_Pattern( const sparse_matrix * const A,
     min = 0.0;
     max = 0.0;
 
-    if ( A_spar_patt == NULL )
+    if ( *A_spar_patt == NULL )
     {
-        if ( Allocate_Matrix( &A_spar_patt, A->n, A->m ) == FAILURE )
+        if ( Allocate_Matrix( A_spar_patt, A->n, A->m ) == FAILURE )
         {
             fprintf( stderr, "[SAI] Not enough memory for preconditioning matrices. terminating.\n" );
             exit( INSUFFICIENT_MEMORY );
         }
     }
-    else if ( (A_spar_patt->m) < (A->m) )
+    else if ( ((*A_spar_patt)->m) < (A->m) )
     {
-        Deallocate_Matrix( A_spar_patt );
-        if ( Allocate_Matrix( &A_spar_patt, A->n, A->m ) == FAILURE )
+        Deallocate_Matrix( *A_spar_patt );
+        if ( Allocate_Matrix( A_spar_patt, A->n, A->m ) == FAILURE )
         {
             fprintf( stderr, "[SAI] Not enough memory for preconditioning matrices. terminating.\n" );
             exit( INSUFFICIENT_MEMORY );
@@ -304,46 +310,45 @@ void Setup_Sparsity_Pattern( const sparse_matrix * const A,
         }
     }
 
-    threshold = min + ( max - min ) * filter;
-
+    threshold = min + (max-min)*(1.0-filter);
     // calculate the nnz of the sparsity pattern
-//    for ( size = 0, i = 0; i < A->n; ++i )
-//    {
-//        for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj )
-//        {
-//            if ( threshold <= A->val[pj] )
-//                size++;
-//        }
-//    }
-//
-//    if ( Allocate_Matrix( &A_spar_patt, A->n, size ) == NULL )
-//    {
-//        fprintf( stderr, "[SAI] Not enough memory for preconditioning matrices. terminating.\n" );
-//        exit( INSUFFICIENT_MEMORY );
-//    }
-
-    //A_spar_patt->start[A_spar_patt->n] = size;
+    //    for ( size = 0, i = 0; i < A->n; ++i )
+    //    {
+    //        for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj )
+    //        {
+    //            if ( threshold <= A->val[pj] )
+    //                size++;
+    //        }
+    //    }
+    //
+    //    if ( Allocate_Matrix( A_spar_patt, A->n, size ) == NULL )
+    //    {
+    //        fprintf( stderr, "[SAI] Not enough memory for preconditioning matrices. terminating.\n" );
+    //        exit( INSUFFICIENT_MEMORY );
+    //    }
+
+    //(*A_spar_patt)->start[(*A_spar_patt)->n] = size;
 
     // fill the sparsity pattern
     for ( size = 0, i = 0; i < A->n; ++i )
     {
-        A_spar_patt->start[i] = size;
+        (*A_spar_patt)->start[i] = size;
 
         for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj )
         {
-            if ( threshold <= A->val[pj] )
+            if ( ( A->val[pj] >= threshold )  || ( A->j[pj]==i ) )
             {
-                A_spar_patt->val[size] = A->val[pj];
-                A_spar_patt->j[size] = A->j[pj];
+                (*A_spar_patt)->val[size] = A->val[pj];
+                (*A_spar_patt)->j[size] = A->j[pj];
                 size++;
             }
         }
     }
-    A_spar_patt->start[A->n] = size;
+    (*A_spar_patt)->start[A->n] = size;
 }
 
 void Calculate_Droptol( const sparse_matrix * const A,
-                        real * const droptol, const real dtol )
+        real * const droptol, const real dtol )
 {
     int i, j, k;
     real val;
@@ -353,13 +358,13 @@ void Calculate_Droptol( const sparse_matrix * const A,
 #endif
 
 #ifdef _OPENMP
-    #pragma omp parallel default(none) private(i, j, k, val, tid), shared(droptol_local, stderr)
+#pragma omp parallel default(none) private(i, j, k, val, tid), shared(droptol_local, stderr)
 #endif
     {
 #ifdef _OPENMP
         tid = omp_get_thread_num();
 
-        #pragma omp master
+#pragma omp master
         {
             /* keep b_local for program duration to avoid allocate/free
              * overhead per Sparse_MatVec call*/
@@ -373,7 +378,7 @@ void Calculate_Droptol( const sparse_matrix * const A,
             }
         }
 
-        #pragma omp barrier
+#pragma omp barrier
 #endif
 
         /* init droptol to 0 */
@@ -387,12 +392,12 @@ void Calculate_Droptol( const sparse_matrix * const A,
         }
 
 #ifdef _OPENMP
-        #pragma omp barrier
+#pragma omp barrier
 #endif
 
         /* calculate sqaure of the norm of each row */
 #ifdef _OPENMP
-        #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
         for ( i = 0; i < A->n; ++i )
         {
@@ -420,9 +425,9 @@ void Calculate_Droptol( const sparse_matrix * const A,
         }
 
 #ifdef _OPENMP
-        #pragma omp barrier
+#pragma omp barrier
 
-        #pragma omp for schedule(static)
+#pragma omp for schedule(static)
         for ( i = 0; i < A->n; ++i )
         {
             droptol[i] = 0.0;
@@ -432,13 +437,13 @@ void Calculate_Droptol( const sparse_matrix * const A,
             }
         }
 
-        #pragma omp barrier
+#pragma omp barrier
 #endif
 
         /* calculate local droptol for each row */
         //fprintf( stderr, "droptol: " );
 #ifdef _OPENMP
-        #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
         for ( i = 0; i < A->n; ++i )
         {
@@ -460,7 +465,7 @@ int Estimate_LU_Fill( const sparse_matrix * const A, const real * const droptol
     fillin = 0;
 
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) private(i, pj, val) reduction(+: fillin)
 #endif
     for ( i = 0; i < A->n; ++i )
@@ -482,7 +487,7 @@ int Estimate_LU_Fill( const sparse_matrix * const A, const real * const droptol
 
 #if defined(HAVE_SUPERLU_MT)
 real SuperLU_Factorize( const sparse_matrix * const A,
-                        sparse_matrix * const L, sparse_matrix * const U )
+        sparse_matrix * const L, sparse_matrix * const U )
 {
     unsigned int i, pj, count, *Ltop, *Utop, r;
     sparse_matrix *A_t;
@@ -511,10 +516,10 @@ real SuperLU_Factorize( const sparse_matrix * const A,
     /* Default parameters to control factorization. */
 #ifdef _OPENMP
     //TODO: set as global parameter and use
-    #pragma omp parallel \
+#pragma omp parallel \
     default(none) shared(nprocs)
     {
-        #pragma omp master
+#pragma omp master
         {
             /* SuperLU_MT spawns threads internally, so set and pass parameter */
             nprocs = omp_get_num_threads();
@@ -601,7 +606,7 @@ real SuperLU_Factorize( const sparse_matrix * const A,
     xa[i] = count;
 
     dCompRow_to_CompCol( A->n, A->n, 2 * A->start[A->n] - A->n, a, asub, xa,
-                         &at, &atsub, &xat );
+            &at, &atsub, &xat );
 
     for ( i = 0; i < (2 * A->start[A->n] - A->n); ++i )
         fprintf( stderr, "%6d", asub[i] );
@@ -656,8 +661,8 @@ real SuperLU_Factorize( const sparse_matrix * const A,
        Apply perm_c to the columns of original A to form AC.
        ------------------------------------------------------------*/
     pdgstrf_init( nprocs, fact, trans, refact, panel_size, relax,
-                  u, usepr, drop_tol, perm_c, perm_r,
-                  work, lwork, &A_S, &AC_S, &superlumt_options, &Gstat );
+            u, usepr, drop_tol, perm_c, perm_r,
+            work, lwork, &A_S, &AC_S, &superlumt_options, &Gstat );
 
     for ( i = 0; i < ((NCPformat*)AC_S.Store)->nnz; ++i )
         fprintf( stderr, "%6.1f", ((real*)(((NCPformat*)AC_S.Store)->nzval))[i] );
@@ -787,7 +792,7 @@ real diag_pre_comp( const sparse_matrix * const H, real * const Hdia_inv )
     start = Get_Time( );
 
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) private(i)
 #endif
     for ( i = 0; i < H->n; ++i )
@@ -808,7 +813,7 @@ real diag_pre_comp( const sparse_matrix * const H, real * const Hdia_inv )
 
 /* Incomplete Cholesky factorization with dual thresholding */
 real ICHOLT( const sparse_matrix * const A, const real * const droptol,
-             sparse_matrix * const L, sparse_matrix * const U )
+        sparse_matrix * const L, sparse_matrix * const U )
 {
     int *tmp_j;
     real *tmp_val;
@@ -961,7 +966,7 @@ real ICHOLT( const sparse_matrix * const A, const real * const droptol,
  * SIAM J. Sci. Comp. */
 #if defined(TESTING)
 real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
-                sparse_matrix * const U_t, sparse_matrix * const U )
+        sparse_matrix * const U_t, sparse_matrix * const U )
 {
     unsigned int i, j, k, pj, x = 0, y = 0, ei_x, ei_y;
     real *D, *D_inv, sum, start;
@@ -980,7 +985,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     }
 
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) shared(D_inv, D) private(i)
 #endif
     for ( i = 0; i < A->n; ++i )
@@ -996,7 +1001,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
      * transformation DAD, where D = D(1./SQRT(D(A))) */
     memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) );
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(guided) \
+#pragma omp parallel for schedule(guided) \
     default(none) shared(DAD, D_inv, D) private(i, pj)
 #endif
     for ( i = 0; i < A->n; ++i )
@@ -1022,7 +1027,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     {
         /* for each nonzero */
 #ifdef _OPENMP
-        #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
         default(none) shared(DAD, stderr) private(sum, ei_x, ei_y, k) firstprivate(x, y)
 #endif
         for ( j = 0; j < A->start[A->n]; ++j )
@@ -1077,7 +1082,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
                     fprintf( stderr, "Numeric breakdown in ICHOL_PAR. Terminating.\n");
 #if defined(DEBUG_FOCUS)
                     fprintf( stderr, "A(%5d,%5d) = %10.3f\n",
-                             k - 1, A->entries[j].j, A->entries[j].val );
+                            k - 1, A->entries[j].j, A->entries[j].val );
                     fprintf( stderr, "sum = %10.3f\n", sum);
 #endif
                     exit(NUMERIC_BREAKDOWN);
@@ -1097,7 +1102,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
      * since DAD \approx U^{T}U, so
      * D^{-1}DADD^{-1} = A \approx D^{-1}U^{T}UD^{-1} */
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(guided) \
+#pragma omp parallel for schedule(guided) \
     default(none) shared(D_inv) private(i, pj)
 #endif
     for ( i = 0; i < A->n; ++i )
@@ -1140,7 +1145,7 @@ real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps,
  * sweeps: number of loops over non-zeros for computation
  * L / U: factorized triangular matrices (A \approx LU), CSR format */
 real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
-              sparse_matrix * const L, sparse_matrix * const U )
+        sparse_matrix * const L, sparse_matrix * const U )
 {
     unsigned int i, j, k, pj, x, y, ei_x, ei_y;
     real *D, *D_inv, sum, start;
@@ -1157,7 +1162,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     }
 
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) shared(D, D_inv) private(i)
 #endif
     for ( i = 0; i < A->n; ++i )
@@ -1171,7 +1176,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
      * transformation DAD, where D = D(1./SQRT(abs(D(A)))) */
     memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) );
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) shared(DAD, D) private(i, pj)
 #endif
     for ( i = 0; i < A->n; ++i )
@@ -1199,7 +1204,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
 
     /* L has unit diagonal, by convention */
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) default(none) private(i)
+#pragma omp parallel for schedule(static) default(none) private(i)
 #endif
     for ( i = 0; i < A->n; ++i )
     {
@@ -1210,7 +1215,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
     {
         /* for each nonzero in L */
 #ifdef _OPENMP
-        #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
         default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum)
 #endif
         for ( j = 0; j < DAD->start[DAD->n]; ++j )
@@ -1262,7 +1267,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
         }
 
 #ifdef _OPENMP
-        #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
         default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum)
 #endif
         for ( j = 0; j < DAD->start[DAD->n]; ++j )
@@ -1315,7 +1320,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
      * since DAD \approx LU, then
      * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) shared(DAD, D_inv) private(i, pj)
 #endif
     for ( i = 0; i < DAD->n; ++i )
@@ -1355,7 +1360,7 @@ real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps,
  * sweeps: number of loops over non-zeros for computation
  * L / U: factorized triangular matrices (A \approx LU), CSR format */
 real ILUT_PAR( const sparse_matrix * const A, const real * droptol,
-               const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U )
+        const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U )
 {
     unsigned int i, j, k, pj, x, y, ei_x, ei_y, Ltop, Utop;
     real *D, *D_inv, sum, start;
@@ -1379,7 +1384,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol,
     }
 
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) shared(D, D_inv) private(i)
 #endif
     for ( i = 0; i < A->n; ++i )
@@ -1392,7 +1397,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol,
      * transformation DAD, where D = D(1./SQRT(D(A))) */
     memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) );
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) shared(DAD, D) private(i, pj)
 #endif
     for ( i = 0; i < A->n; ++i )
@@ -1420,7 +1425,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol,
 
     /* L has unit diagonal, by convention */
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) private(i) shared(L_temp)
 #endif
     for ( i = 0; i < A->n; ++i )
@@ -1432,7 +1437,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol,
     {
         /* for each nonzero in L */
 #ifdef _OPENMP
-        #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
         default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum)
 #endif
         for ( j = 0; j < DAD->start[DAD->n]; ++j )
@@ -1484,7 +1489,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol,
         }
 
 #ifdef _OPENMP
-        #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
         default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum)
 #endif
         for ( j = 0; j < DAD->start[DAD->n]; ++j )
@@ -1537,7 +1542,7 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol,
      * since DAD \approx LU, then
      * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */
 #ifdef _OPENMP
-    #pragma omp parallel for schedule(static) \
+#pragma omp parallel for schedule(static) \
     default(none) shared(DAD, L_temp, U_temp, D_inv) private(i, pj)
 #endif
     for ( i = 0; i < DAD->n; ++i )
@@ -1611,16 +1616,17 @@ real ILUT_PAR( const sparse_matrix * const A, const real * droptol,
 
 #if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL)
 real Sparse_Approx_Inverse( const sparse_matrix * const A,
-                            const sparse_matrix * const A_spar_patt,
-                            sparse_matrix ** A_app_inv )
+        const sparse_matrix * const A_spar_patt,
+        sparse_matrix ** A_app_inv )
 {
+    //trial
     int i, k, pj, j_temp, identity_pos;
     int N, M, d_i, d_j;
     lapack_int m, n, nrhs, lda, ldb, info;
     int *pos_x, *pos_y;
     real start;
     real *e_j, *dense_matrix;
-    sparse_matrix *A_full, *A_spar_patt_full;
+    sparse_matrix *A_full = NULL, *A_spar_patt_full = NULL;
     char *X, *Y;
 
     start = Get_Time( );
@@ -1642,24 +1648,28 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A,
     compute_full_sparse_matrix( A, &A_full );
     compute_full_sparse_matrix( A_spar_patt, &A_spar_patt_full );
 
+    // char file[100] = "H_spar_patt_full";
+    // Print_Sparse_Matrix2(A_spar_patt_full, file, NULL);
     // A_app_inv will be the same as A_spar_patt_full except the val array
     if ( Allocate_Matrix( A_app_inv, A_spar_patt_full->n, A_spar_patt_full->m ) == FAILURE )
     {
         fprintf( stderr, "not enough memory for approximate inverse matrix. terminating.\n" );
         exit( INSUFFICIENT_MEMORY );
     }
+
+
     (*A_app_inv)->start[(*A_app_inv)->n] = A_spar_patt_full->start[A_spar_patt_full->n];
 
     // For each row of full A_spar_patt
     for ( i = 0; i < A_spar_patt_full->n; ++i )
     {
-        // N = A_spar_patt_full->start[i + 1] - A_spar_patt_full->start[i];
         N = 0;
         M = 0;
 
         // find column indices of nonzeros (which will be the columns indices of the dense matrix)
         for ( pj = A_spar_patt_full->start[i]; pj < A_spar_patt_full->start[i + 1]; ++pj )
         {
+
             j_temp = A_spar_patt_full->j[pj];
 
             Y[j_temp] = 1;
@@ -1691,7 +1701,7 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A,
         }
 
         // allocate memory for NxM dense matrix
-        smalloc( sizeof(real) * N * M,
+        dense_matrix = (real *) smalloc( sizeof(real) * N * M,
                 "Sparse_Approx_Inverse::dense_matrix" );
 
         // fill in the entries of dense matrix
@@ -1702,22 +1712,22 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A,
             {
                 dense_matrix[d_i * N + d_j] = 0.0;
             }
-
             // change the value if any of the column indices is seen
             for ( d_j = A_full->start[pos_x[d_i]];
                     d_j < A_full->start[pos_x[d_i + 1]]; ++d_j )
             {
                 if ( Y[A_full->j[d_j]] == 1 )
                 {
-                    dense_matrix[d_i * N + pos_y[d_j]] = A_full->val[d_j];
+                    dense_matrix[d_i * N + pos_y[A_full->j[d_j]]] = A_full->val[d_j];
                 }
             }
+
         }
 
         /* create the right hand side of the linear equation
            that is the full column of the identity matrix*/
-        smalloc( sizeof(char) * M,
-                "Sparse_Approx_Inverse::M" );
+        e_j = (real *) smalloc( sizeof(real) * M,
+                "Sparse_Approx_Inverse::e_j" );
 
         for ( k = 0; k < M; ++k )
         {
@@ -1734,6 +1744,7 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A,
         /* Executable statements */
         // printf( "LAPACKE_dgels (row-major, high-level) Example Program Results\n" );
         /* Solve the equations A*X = B */
+
         info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, nrhs, dense_matrix, lda,
                 e_j, ldb );
         /* Check for the full rank */
@@ -1756,24 +1767,24 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A,
         }
 
         //empty variables that will be used next iteration
-        srealloc( dense_matrix, 0, "Sparse_Approx_Inverse::dense_matrix" );
-        srealloc( e_j, 0, "Sparse_Approx_Inverse::e_j"  );
-        for ( i = 0; i < A->n; ++i )
+        sfree( dense_matrix, "Sparse_Approx_Inverse::dense_matrix" );
+        sfree( e_j, "Sparse_Approx_Inverse::e_j"  );
+        for ( k = 0; k < A->n; ++k )
         {
-            X[i] = 0;
-            Y[i] = 0;
-            pos_x[i] = 0;
-            pos_y[i] = 0;
+            X[k] = 0;
+            Y[k] = 0;
+            pos_x[k] = 0;
+            pos_y[k] = 0;
         }
     }
 
-    // Deallocate?
+    // Deallocate
     Deallocate_Matrix( A_full );
     Deallocate_Matrix( A_spar_patt_full );
-    srealloc( X, 0, "Sparse_Approx_Inverse::X" );
-    srealloc( Y, 0, "Sparse_Approx_Inverse::Y" );
-    srealloc( pos_x, 0, "Sparse_Approx_Inverse::pos_x" );
-    srealloc( pos_y, 0, "Sparse_Approx_Inverse::pos_y" );
+    sfree( X, "Sparse_Approx_Inverse::X" );
+    sfree( Y, "Sparse_Approx_Inverse::Y" );
+    sfree( pos_x, "Sparse_Approx_Inverse::pos_x" );
+    sfree( pos_y, "Sparse_Approx_Inverse::pos_y" );
 
     return Get_Timing_Info( start );
 }
@@ -1786,7 +1797,7 @@ real Sparse_Approx_Inverse( const sparse_matrix * const A,
  *   x: vector
  *   b: vector (result) */
 static void Sparse_MatVec( const sparse_matrix * const A,
-                           const real * const x, real * const b )
+        const real * const x, real * const b )
 {
     int i, j, k, n, si, ei;
     real H;
@@ -1800,7 +1811,7 @@ static void Sparse_MatVec( const sparse_matrix * const A,
 #ifdef _OPENMP
     tid = omp_get_thread_num( );
 
-    #pragma omp single
+#pragma omp single
     {
         /* keep b_local for program duration to avoid allocate/free
          * overhead per Sparse_MatVec call*/
@@ -1815,7 +1826,7 @@ static void Sparse_MatVec( const sparse_matrix * const A,
 
     Vector_MakeZero( (real * const)b_local, omp_get_num_threads() * n );
 
-    #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
     for ( i = 0; i < n; ++i )
     {
@@ -1844,7 +1855,7 @@ static void Sparse_MatVec( const sparse_matrix * const A,
     }
 
 #ifdef _OPENMP
-    #pragma omp for schedule(static)
+#pragma omp for schedule(static)
     for ( i = 0; i < n; ++i )
     {
         for ( j = 0; j < omp_get_num_threads(); ++j )
@@ -1936,12 +1947,12 @@ void Transpose_I( sparse_matrix * const A )
  * N: dimensions of preconditioner and vectors (# rows in H)
  */
 static void diag_pre_app( const real * const Hdia_inv, const real * const y,
-                          real * const x, const int N )
+        real * const x, const int N )
 {
     unsigned int i;
 
 #ifdef _OPENMP
-    #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
     for ( i = 0; i < N; ++i )
     {
@@ -1962,13 +1973,13 @@ static void diag_pre_app( const real * const Hdia_inv, const real * const y,
  *   LU has non-zero diagonals
  *   Each row of LU has at least one non-zero (i.e., no rows with all zeros) */
 void tri_solve( const sparse_matrix * const LU, const real * const y,
-                real * const x, const int N, const TRIANGULARITY tri )
+        real * const x, const int N, const TRIANGULARITY tri )
 {
     int i, pj, j, si, ei;
     real val;
 
 #ifdef _OPENMP
-    #pragma omp single
+#pragma omp single
 #endif
     {
         if ( tri == LOWER )
@@ -2020,13 +2031,13 @@ void tri_solve( const sparse_matrix * const LU, const real * const y,
  *   LU has non-zero diagonals
  *   Each row of LU has at least one non-zero (i.e., no rows with all zeros) */
 void tri_solve_level_sched( const sparse_matrix * const LU,
-                            const real * const y, real * const x, const int N,
-                            const TRIANGULARITY tri, int find_levels )
+        const real * const y, real * const x, const int N,
+        const TRIANGULARITY tri, int find_levels )
 {
     int i, j, pj, local_row, local_level;
 
 #ifdef _OPENMP
-    #pragma omp single
+#pragma omp single
 #endif
     {
         if ( tri == LOWER )
@@ -2133,7 +2144,7 @@ void tri_solve_level_sched( const sparse_matrix * const LU,
         for ( i = 0; i < levels; ++i )
         {
 #ifdef _OPENMP
-            #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
             for ( j = level_rows_cnt[i]; j < level_rows_cnt[i + 1]; ++j )
             {
@@ -2153,7 +2164,7 @@ void tri_solve_level_sched( const sparse_matrix * const LU,
         for ( i = 0; i < levels; ++i )
         {
 #ifdef _OPENMP
-            #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
             for ( j = level_rows_cnt[i]; j < level_rows_cnt[i + 1]; ++j )
             {
@@ -2170,7 +2181,7 @@ void tri_solve_level_sched( const sparse_matrix * const LU,
     }
 
 #ifdef _OPENMP
-    #pragma omp single
+#pragma omp single
 #endif
     {
         /* save level info for re-use if performing repeated triangular solves via preconditioning */
@@ -2250,7 +2261,7 @@ static void compute_H_full( const sparse_matrix * const H )
 void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
 {
 #ifdef _OPENMP
-    #pragma omp parallel
+#pragma omp parallel
 #endif
     {
 #define MAX_COLOR (500)
@@ -2267,7 +2278,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
 #endif
 
 #ifdef _OPENMP
-        #pragma omp single
+#pragma omp single
 #endif
         {
             memset( color, 0, sizeof(unsigned int) * A->n );
@@ -2279,7 +2290,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
         if ( tri == LOWER )
         {
 #ifdef _OPENMP
-            #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
             for ( i = 0; i < A->n; ++i )
             {
@@ -2289,7 +2300,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
         else
         {
 #ifdef _OPENMP
-            #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
             for ( i = 0; i < A->n; ++i )
             {
@@ -2305,7 +2316,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
         }
 
 #ifdef _OPENMP
-        #pragma omp barrier
+#pragma omp barrier
 #endif
 
         while ( recolor_cnt > 0 )
@@ -2314,7 +2325,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
 
             /* color vertices */
 #ifdef _OPENMP
-            #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
             for ( i = 0; i < recolor_cnt; ++i )
             {
@@ -2342,14 +2353,14 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
             recolor_cnt_local = 0;
 
 #ifdef _OPENMP
-            #pragma omp single
+#pragma omp single
 #endif
             {
                 recolor_cnt = 0;
             }
 
 #ifdef _OPENMP
-            #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
             for ( i = 0; i < temp; ++i )
             {
@@ -2371,9 +2382,9 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
             conflict_cnt[tid + 1] = recolor_cnt_local;
 
 #ifdef _OPENMP
-            #pragma omp barrier
+#pragma omp barrier
 
-            #pragma omp master
+#pragma omp master
 #endif
             {
                 conflict_cnt[0] = 0;
@@ -2385,7 +2396,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
             }
 
 #ifdef _OPENMP
-            #pragma omp barrier
+#pragma omp barrier
 #endif
 
             /* copy thread-local conflicts into shared buffer */
@@ -2396,9 +2407,9 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
             }
 
 #ifdef _OPENMP
-            #pragma omp barrier
+#pragma omp barrier
 
-            #pragma omp single
+#pragma omp single
 #endif
             {
                 temp_ptr = to_color;
@@ -2421,7 +2432,7 @@ void graph_coloring( const sparse_matrix * const A, const TRIANGULARITY tri )
         //#endif
 
 #ifdef _OPENMP
-        #pragma omp barrier
+#pragma omp barrier
 #endif
     }
 }
@@ -2475,12 +2486,12 @@ void sort_colors( const unsigned int n, const TRIANGULARITY tri )
  * tri: coloring to triangular factor to use (lower/upper)
  */
 static void permute_vector( real * const x, const unsigned int n, const int invert_map,
-                            const TRIANGULARITY tri )
+        const TRIANGULARITY tri )
 {
     unsigned int i;
 
 #ifdef _OPENMP
-    #pragma omp single
+#pragma omp single
 #endif
     {
         if ( x_p == NULL )
@@ -2503,7 +2514,7 @@ static void permute_vector( real * const x, const unsigned int n, const int inve
     }
 
 #ifdef _OPENMP
-    #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
     for ( i = 0; i < n; ++i )
     {
@@ -2511,7 +2522,7 @@ static void permute_vector( real * const x, const unsigned int n, const int inve
     }
 
 #ifdef _OPENMP
-    #pragma omp single
+#pragma omp single
 #endif
     {
         memcpy( x, x_p, sizeof(real) * n );
@@ -2667,7 +2678,7 @@ sparse_matrix * setup_graph_coloring( sparse_matrix * const H )
     if ( color == NULL )
     {
 #ifdef _OPENMP
-        #pragma omp parallel
+#pragma omp parallel
         {
             num_thread = omp_get_num_threads();
         }
@@ -2719,15 +2730,15 @@ sparse_matrix * setup_graph_coloring( sparse_matrix * const H )
  * Note: Newmann series arises from series expansion of the inverse of
  * the coefficient matrix in the triangular system */
 void jacobi_iter( const sparse_matrix * const R, const real * const Dinv,
-                  const real * const b, real * const x, const TRIANGULARITY tri, const
-                  unsigned int maxiter )
+        const real * const b, real * const x, const TRIANGULARITY tri, const
+        unsigned int maxiter )
 {
     unsigned int i, k, si = 0, ei = 0, iter;
 
     iter = 0;
 
 #ifdef _OPENMP
-    #pragma omp single
+#pragma omp single
 #endif
     {
         if ( Dinv_b == NULL )
@@ -2760,7 +2771,7 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv,
 
     /* precompute and cache, as invariant in loop below */
 #ifdef _OPENMP
-    #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
     for ( i = 0; i < R->n; ++i )
     {
@@ -2771,7 +2782,7 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv,
     {
         // x_{k+1} = G*x_{k} + Dinv*b;
 #ifdef _OPENMP
-        #pragma omp for schedule(guided)
+#pragma omp for schedule(guided)
 #endif
         for ( i = 0; i < R->n; ++i )
         {
@@ -2799,7 +2810,7 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv,
         }
 
 #ifdef _OPENMP
-        #pragma omp single
+#pragma omp single
 #endif
         {
             rp3 = rp;
@@ -2827,7 +2838,7 @@ void jacobi_iter( const sparse_matrix * const R, const real * const Dinv,
  *   Matrices have non-zero diagonals
  *   Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */
 static void apply_preconditioner( const static_storage * const workspace, const control_params * const control,
-                                  const real * const y, real * const x, const int fresh_pre )
+        const real * const y, real * const x, const int fresh_pre )
 {
     int i, si;
 
@@ -2840,157 +2851,157 @@ static void apply_preconditioner( const static_storage * const workspace, const
     {
         switch ( control->cm_solver_pre_app_type )
         {
-        case TRI_SOLVE_PA:
-            switch ( control->cm_solver_pre_comp_type )
-            {
-            case DIAG_PC:
-                diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
-                break;
-            case ICHOLT_PC:
-            case ILU_PAR_PC:
-            case ILUT_PAR_PC:
-                tri_solve( workspace->L, y, x, workspace->L->n, LOWER );
-                tri_solve( workspace->U, x, x, workspace->U->n, UPPER );
-                break;
-            case SAI_PC:
-                //TODO: add code to compute SAI first
-                //                Sparse_MatVec( SAI, y, x );
-                break;
-            default:
-                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
-                exit( INVALID_INPUT );
-                break;
-            }
-            break;
-        case TRI_SOLVE_LEVEL_SCHED_PA:
-            switch ( control->cm_solver_pre_comp_type )
-            {
-            case DIAG_PC:
-                diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
-                break;
-            case ICHOLT_PC:
-            case ILU_PAR_PC:
-            case ILUT_PAR_PC:
-                tri_solve_level_sched( workspace->L, y, x, workspace->L->n, LOWER, fresh_pre );
-                tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre );
-                break;
-            case SAI_PC:
-            //TODO: add code to compute SAI first
-            //                Sparse_MatVec( SAI, y, x );
-            default:
-                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
-                exit( INVALID_INPUT );
+            case TRI_SOLVE_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                    case DIAG_PC:
+                        diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
+                        break;
+                    case ICHOLT_PC:
+                    case ILU_PAR_PC:
+                    case ILUT_PAR_PC:
+                        tri_solve( workspace->L, y, x, workspace->L->n, LOWER );
+                        tri_solve( workspace->U, x, x, workspace->U->n, UPPER );
+                        break;
+                    case SAI_PC:
+                        //TODO: add code to compute SAI first
+                        //                Sparse_MatVec( SAI, y, x );
+                        break;
+                    default:
+                        fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                        exit( INVALID_INPUT );
+                        break;
+                }
                 break;
-            }
-            break;
-        case TRI_SOLVE_GC_PA:
-            switch ( control->cm_solver_pre_comp_type )
-            {
-            case DIAG_PC:
-            case SAI_PC:
-                fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
-                exit( INVALID_INPUT );
+            case TRI_SOLVE_LEVEL_SCHED_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                    case DIAG_PC:
+                        diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n );
+                        break;
+                    case ICHOLT_PC:
+                    case ILU_PAR_PC:
+                    case ILUT_PAR_PC:
+                        tri_solve_level_sched( workspace->L, y, x, workspace->L->n, LOWER, fresh_pre );
+                        tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre );
+                        break;
+                    case SAI_PC:
+                        //TODO: add code to compute SAI first
+                        //                Sparse_MatVec( SAI, y, x );
+                    default:
+                        fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                        exit( INVALID_INPUT );
+                        break;
+                }
                 break;
-            case ICHOLT_PC:
-            case ILU_PAR_PC:
-            case ILUT_PAR_PC:
+            case TRI_SOLVE_GC_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                    case DIAG_PC:
+                    case SAI_PC:
+                        fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                        exit( INVALID_INPUT );
+                        break;
+                    case ICHOLT_PC:
+                    case ILU_PAR_PC:
+                    case ILUT_PAR_PC:
 #ifdef _OPENMP
-                #pragma omp single
+#pragma omp single
 #endif
-            {
-                memcpy( y_p, y, sizeof(real) * workspace->H->n );
-            }
+                        {
+                            memcpy( y_p, y, sizeof(real) * workspace->H->n );
+                        }
 
-            permute_vector( y_p, workspace->H->n, FALSE, LOWER );
-            tri_solve_level_sched( workspace->L, y_p, x, workspace->L->n, LOWER, fresh_pre );
-            tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre );
-            permute_vector( x, workspace->H->n, TRUE, UPPER );
-            break;
-            default:
-                fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
-                exit( INVALID_INPUT );
-                break;
-            }
-            break;
-        case JACOBI_ITER_PA:
-            switch ( control->cm_solver_pre_comp_type )
-            {
-            case DIAG_PC:
-            case SAI_PC:
-                fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
-                exit( INVALID_INPUT );
+                        permute_vector( y_p, workspace->H->n, FALSE, LOWER );
+                        tri_solve_level_sched( workspace->L, y_p, x, workspace->L->n, LOWER, fresh_pre );
+                        tri_solve_level_sched( workspace->U, x, x, workspace->U->n, UPPER, fresh_pre );
+                        permute_vector( x, workspace->H->n, TRUE, UPPER );
+                        break;
+                    default:
+                        fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                        exit( INVALID_INPUT );
+                        break;
+                }
                 break;
-            case ICHOLT_PC:
-            case ILU_PAR_PC:
-            case ILUT_PAR_PC:
+            case JACOBI_ITER_PA:
+                switch ( control->cm_solver_pre_comp_type )
+                {
+                    case DIAG_PC:
+                    case SAI_PC:
+                        fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" );
+                        exit( INVALID_INPUT );
+                        break;
+                    case ICHOLT_PC:
+                    case ILU_PAR_PC:
+                    case ILUT_PAR_PC:
 #ifdef _OPENMP
-                #pragma omp single
+#pragma omp single
 #endif
-            {
-                if ( Dinv_L == NULL )
-                {
-                    if ( (Dinv_L = (real*) malloc(sizeof(real) * workspace->L->n)) == NULL )
-                    {
-                        fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
-                        exit( INSUFFICIENT_MEMORY );
-                    }
-                }
-            }
+                        {
+                            if ( Dinv_L == NULL )
+                            {
+                                if ( (Dinv_L = (real*) malloc(sizeof(real) * workspace->L->n)) == NULL )
+                                {
+                                    fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
+                                    exit( INSUFFICIENT_MEMORY );
+                                }
+                            }
+                        }
 
-                /* construct D^{-1}_L */
-            if ( fresh_pre == TRUE )
-            {
+                        /* construct D^{-1}_L */
+                        if ( fresh_pre == TRUE )
+                        {
 #ifdef _OPENMP
-                #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
-                for ( i = 0; i < workspace->L->n; ++i )
-                {
-                    si = workspace->L->start[i + 1] - 1;
-                    Dinv_L[i] = 1. / workspace->L->val[si];
-                }
-            }
+                            for ( i = 0; i < workspace->L->n; ++i )
+                            {
+                                si = workspace->L->start[i + 1] - 1;
+                                Dinv_L[i] = 1. / workspace->L->val[si];
+                            }
+                        }
 
-            jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters );
+                        jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->cm_solver_pre_app_jacobi_iters );
 
 #ifdef _OPENMP
-            #pragma omp single
+#pragma omp single
 #endif
-            {
-                if ( Dinv_U == NULL )
-                {
-                    if ( (Dinv_U = (real*) malloc(sizeof(real) * workspace->U->n)) == NULL )
-                    {
-                        fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
-                        exit( INSUFFICIENT_MEMORY );
-                    }
-                }
-            }
+                        {
+                            if ( Dinv_U == NULL )
+                            {
+                                if ( (Dinv_U = (real*) malloc(sizeof(real) * workspace->U->n)) == NULL )
+                                {
+                                    fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" );
+                                    exit( INSUFFICIENT_MEMORY );
+                                }
+                            }
+                        }
 
-                /* construct D^{-1}_U */
-            if ( fresh_pre == TRUE )
-            {
+                        /* construct D^{-1}_U */
+                        if ( fresh_pre == TRUE )
+                        {
 #ifdef _OPENMP
-                #pragma omp for schedule(static)
+#pragma omp for schedule(static)
 #endif
-                for ( i = 0; i < workspace->U->n; ++i )
-                {
-                    si = workspace->U->start[i];
-                    Dinv_U[i] = 1. / workspace->U->val[si];
-                }
-            }
+                            for ( i = 0; i < workspace->U->n; ++i )
+                            {
+                                si = workspace->U->start[i];
+                                Dinv_U[i] = 1. / workspace->U->val[si];
+                            }
+                        }
 
-            jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->cm_solver_pre_app_jacobi_iters );
-            break;
+                        jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->cm_solver_pre_app_jacobi_iters );
+                        break;
+                    default:
+                        fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
+                        exit( INVALID_INPUT );
+                        break;
+                }
+                break;
             default:
                 fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
                 exit( INVALID_INPUT );
                 break;
-            }
-            break;
-        default:
-            fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" );
-            exit( INVALID_INPUT );
-            break;
 
         }
     }
@@ -2999,8 +3010,8 @@ static void apply_preconditioner( const static_storage * const workspace, const
 
 /* generalized minimual residual iterative solver for sparse linear systems */
 int GMRES( const static_storage * const workspace, const control_params * const control,
-           simulation_data * const data, const sparse_matrix * const H, const real * const b,
-           const real tol, real * const x, const int fresh_pre )
+        simulation_data * const data, const sparse_matrix * const H, const real * const b,
+        const real tol, real * const x, const int fresh_pre )
 {
     int i, j, k, itr, N, g_j, g_itr;
     real cc, tmp1, tmp2, temp, ret_temp, bnorm, time_start;
@@ -3008,7 +3019,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
     N = H->n;
 
 #ifdef _OPENMP
-    #pragma omp parallel default(none) private(i, j, k, itr, bnorm, ret_temp) \
+#pragma omp parallel default(none) private(i, j, k, itr, bnorm, ret_temp) \
     shared(N, cc, tmp1, tmp2, temp, time_start, g_itr, g_j, stderr)
 #endif
     {
@@ -3016,14 +3027,14 @@ int GMRES( const static_storage * const workspace, const control_params * const
         itr = 0;
 
 #ifdef _OPENMP
-        #pragma omp master
+#pragma omp master
 #endif
         {
             time_start = Get_Time( );
         }
         bnorm = Norm( b, N );
 #ifdef _OPENMP
-        #pragma omp master
+#pragma omp master
 #endif
         {
             data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start );
@@ -3033,14 +3044,14 @@ int GMRES( const static_storage * const workspace, const control_params * const
         {
             /* apply preconditioner to residual */
 #ifdef _OPENMP
-            #pragma omp master
+#pragma omp master
 #endif
             {
                 time_start = Get_Time( );
             }
             apply_preconditioner( workspace, control, b, workspace->b_prc, fresh_pre );
 #ifdef _OPENMP
-            #pragma omp master
+#pragma omp master
 #endif
             {
                 data->timing.cm_solver_pre_app += Get_Timing_Info( time_start );
@@ -3052,14 +3063,14 @@ int GMRES( const static_storage * const workspace, const control_params * const
         {
             /* calculate r0 */
 #ifdef _OPENMP
-            #pragma omp master
+#pragma omp master
 #endif
             {
                 time_start = Get_Time( );
             }
             Sparse_MatVec( H, x, workspace->b_prm );
 #ifdef _OPENMP
-            #pragma omp master
+#pragma omp master
 #endif
             {
                 data->timing.cm_solver_spmv += Get_Timing_Info( time_start );
@@ -3068,14 +3079,14 @@ int GMRES( const static_storage * const workspace, const control_params * const
             if ( control->cm_solver_pre_comp_type == DIAG_PC )
             {
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     time_start = Get_Time( );
                 }
                 apply_preconditioner( workspace, control, workspace->b_prm, workspace->b_prm, FALSE );
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     data->timing.cm_solver_pre_app += Get_Timing_Info( time_start );
@@ -3085,14 +3096,14 @@ int GMRES( const static_storage * const workspace, const control_params * const
             if ( control->cm_solver_pre_comp_type == DIAG_PC )
             {
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     time_start = Get_Time( );
                 }
                 Vector_Sum( workspace->v[0], 1., workspace->b_prc, -1., workspace->b_prm, N );
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start );
@@ -3101,14 +3112,14 @@ int GMRES( const static_storage * const workspace, const control_params * const
             else
             {
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     time_start = Get_Time( );
                 }
                 Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N );
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start );
@@ -3118,15 +3129,15 @@ int GMRES( const static_storage * const workspace, const control_params * const
             if ( control->cm_solver_pre_comp_type != DIAG_PC )
             {
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     time_start = Get_Time( );
                 }
                 apply_preconditioner( workspace, control, workspace->v[0], workspace->v[0],
-                                      itr == 0 ? fresh_pre : FALSE );
+                        itr == 0 ? fresh_pre : FALSE );
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     data->timing.cm_solver_pre_app += Get_Timing_Info( time_start );
@@ -3134,14 +3145,14 @@ int GMRES( const static_storage * const workspace, const control_params * const
             }
 
 #ifdef _OPENMP
-            #pragma omp master
+#pragma omp master
 #endif
             {
                 time_start = Get_Time( );
             }
             ret_temp = Norm( workspace->v[0], N );
 #ifdef _OPENMP
-            #pragma omp single
+#pragma omp single
 #endif
             {
                 workspace->g[0] = ret_temp;
@@ -3149,7 +3160,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
 
             Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N );
 #ifdef _OPENMP
-            #pragma omp master
+#pragma omp master
 #endif
             {
                 data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start );
@@ -3160,7 +3171,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
             {
                 /* matvec */
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     time_start = Get_Time( );
@@ -3168,14 +3179,14 @@ int GMRES( const static_storage * const workspace, const control_params * const
                 Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] );
 
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     data->timing.cm_solver_spmv += Get_Timing_Info( time_start );
                 }
 
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     time_start = Get_Time( );
@@ -3183,7 +3194,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
                 apply_preconditioner( workspace, control, workspace->v[j + 1], workspace->v[j + 1], FALSE );
 
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     data->timing.cm_solver_pre_app += Get_Timing_Info( time_start );
@@ -3193,7 +3204,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
                 //                {
                 /* apply modified Gram-Schmidt to orthogonalize the new residual */
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     time_start = Get_Time( );
@@ -3203,7 +3214,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
                     ret_temp = Dot( workspace->v[i], workspace->v[j + 1], N );
 
 #ifdef _OPENMP
-                    #pragma omp single
+#pragma omp single
 #endif
                     {
                         workspace->h[i][j] = ret_temp;
@@ -3213,7 +3224,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
 
                 }
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start );
@@ -3260,31 +3271,31 @@ int GMRES( const static_storage * const workspace, const control_params * const
                 //                }
 
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     time_start = Get_Time( );
                 }
                 ret_temp = Norm( workspace->v[j + 1], N );
 #ifdef _OPENMP
-                #pragma omp single
+#pragma omp single
 #endif
                 {
                     workspace->h[j + 1][j] = ret_temp;
                 }
 
                 Vector_Scale( workspace->v[j + 1],
-                              1.0 / workspace->h[j + 1][j], workspace->v[j + 1], N );
+                        1.0 / workspace->h[j + 1][j], workspace->v[j + 1], N );
 
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start );
                 }
 
 #ifdef _OPENMP
-                #pragma omp master
+#pragma omp master
 #endif
                 {
                     time_start = Get_Time( );
@@ -3302,9 +3313,9 @@ int GMRES( const static_storage * const workspace, const control_params * const
                         }
 
                         tmp1 =  workspace->hc[i] * workspace->h[i][j] +
-                                workspace->hs[i] * workspace->h[i + 1][j];
+                            workspace->hs[i] * workspace->h[i + 1][j];
                         tmp2 = -workspace->hs[i] * workspace->h[i][j] +
-                               workspace->hc[i] * workspace->h[i + 1][j];
+                            workspace->hc[i] * workspace->h[i + 1][j];
 
                         workspace->h[i][j] = tmp1;
                         workspace->h[i + 1][j] = tmp2;
@@ -3343,13 +3354,13 @@ int GMRES( const static_storage * const workspace, const control_params * const
                 }
 
 #ifdef _OPENMP
-                #pragma omp barrier
+#pragma omp barrier
 #endif
             }
 
             /* solve Hy = g: H is now upper-triangular, do back-substitution */
 #ifdef _OPENMP
-            #pragma omp master
+#pragma omp master
 #endif
             {
                 time_start = Get_Time( );
@@ -3381,7 +3392,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
             Vector_Add( x, 1., workspace->p, N );
 
 #ifdef _OPENMP
-            #pragma omp master
+#pragma omp master
 #endif
             {
                 data->timing.cm_solver_vector_ops += Get_Timing_Info( time_start );
@@ -3395,7 +3406,7 @@ int GMRES( const static_storage * const workspace, const control_params * const
         }
 
 #ifdef _OPENMP
-        #pragma omp master
+#pragma omp master
 #endif
         {
             g_itr = itr;
@@ -3414,9 +3425,9 @@ int GMRES( const static_storage * const workspace, const control_params * const
 
 
 int GMRES_HouseHolder( const static_storage * const workspace,
-                       const control_params * const control, simulation_data * const data,
-                       const sparse_matrix * const H, const real * const b, real tol,
-                       real * const x, const int fresh_pre )
+        const control_params * const control, simulation_data * const data,
+        const sparse_matrix * const H, const real * const b, real tol,
+        real * const x, const int fresh_pre )
 {
     int i, j, k, itr, N;
     real cc, tmp1, tmp2, temp, bnorm;
@@ -3623,7 +3634,7 @@ int CG( const static_storage * const workspace, const control_params * const con
     z = workspace->p;
 
 #ifdef _OPENMP
-    #pragma omp parallel default(none) private(i, tmp, alpha, beta, b_norm, r_norm, sig_old, sig_new) \
+#pragma omp parallel default(none) private(i, tmp, alpha, beta, b_norm, r_norm, sig_old, sig_new) \
     shared(itr, N, d, r, p, z)
 #endif
     {
@@ -3659,7 +3670,7 @@ int CG( const static_storage * const workspace, const control_params * const con
         }
 
 #ifdef _OPENMP
-        #pragma omp single
+#pragma omp single
 #endif
         itr = i;
     }
@@ -3676,8 +3687,8 @@ int CG( const static_storage * const workspace, const control_params * const con
 
 /* Steepest Descent */
 int SDM( const static_storage * const workspace, const control_params * const control,
-         const sparse_matrix * const H, const real * const b, const real tol,
-         real * const x, const int fresh_pre )
+        const sparse_matrix * const H, const real * const b, const real tol,
+        real * const x, const int fresh_pre )
 {
     int i, itr, N;
     real tmp, alpha, b_norm;
@@ -3686,7 +3697,7 @@ int SDM( const static_storage * const workspace, const control_params * const co
     N = H->n;
 
 #ifdef _OPENMP
-    #pragma omp parallel default(none) private(i, tmp, alpha, b_norm, sig) \
+#pragma omp parallel default(none) private(i, tmp, alpha, b_norm, sig) \
     shared(itr, N)
 #endif
     {
@@ -3710,7 +3721,7 @@ int SDM( const static_storage * const workspace, const control_params * const co
              * (Dot function has persistent state in the form
              * of a shared global variable for the OpenMP version) */
 #ifdef _OPENMP
-            #pragma omp barrier
+#pragma omp barrier
 #endif
 
             tmp = Dot( workspace->d, workspace->q, N );
@@ -3723,7 +3734,7 @@ int SDM( const static_storage * const workspace, const control_params * const co
         }
 
 #ifdef _OPENMP
-        #pragma omp single
+#pragma omp single
 #endif
         itr = i;
     }
diff --git a/sPuReMD/src/lin_alg.h b/sPuReMD/src/lin_alg.h
index 683b0c44..6caf7a1a 100644
--- a/sPuReMD/src/lin_alg.h
+++ b/sPuReMD/src/lin_alg.h
@@ -33,7 +33,7 @@ typedef enum
 void Sort_Matrix_Rows( sparse_matrix * const );
 
 void Setup_Sparsity_Pattern( const sparse_matrix * const, 
-        const real, sparse_matrix * );
+        const real, sparse_matrix ** );
 
 int Estimate_LU_Fill( const sparse_matrix * const, const real * const );
 
diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h
index a66758b1..51f24d40 100644
--- a/sPuReMD/src/mytypes.h
+++ b/sPuReMD/src/mytypes.h
@@ -607,7 +607,7 @@ typedef struct
     unsigned int cm_solver_pre_comp_refactor;
     real cm_solver_pre_comp_droptol;
     unsigned int cm_solver_pre_comp_sweeps;
-    unsigned int cm_solver_pre_comp_sai_thres;
+    real cm_solver_pre_comp_sai_thres;
     unsigned int cm_solver_pre_app_type;
     unsigned int cm_solver_pre_app_jacobi_iters;
 
-- 
GitLab