diff --git a/sPuReMD/src/GMRES.c b/sPuReMD/src/GMRES.c index b4387244785fb56c1869d1f5937ba0c247ad6cde..c08019ff0d4f6d6b000782a8a59163d6ee81fea6 100644 --- a/sPuReMD/src/GMRES.c +++ b/sPuReMD/src/GMRES.c @@ -74,7 +74,7 @@ static void Sparse_MatVec( const sparse_matrix * const A, #pragma omp barrier #endif - #pragma omp for schedule(guided) + #pragma omp for schedule(static) for ( i = 0; i < n; ++i ) { si = A->start[i]; @@ -101,7 +101,7 @@ static void Sparse_MatVec( const sparse_matrix * const A, #endif } #ifdef _OPENMP - #pragma omp for schedule(guided) + #pragma omp for schedule(static) for ( i = 0; i < n; ++i ) { for ( j = 0; j < omp_get_num_threads(); ++j ) @@ -120,8 +120,8 @@ static void diag_pre_app( const real * const Hdia_inv, const real * const y, { unsigned int i; - #pragma omp parallel for schedule(guided) \ - default(none) private(i) + #pragma omp parallel for schedule(static) \ + default(none) private(i) for ( i = 0; i < N; ++i ) { x[i] = y[i] * Hdia_inv[i]; @@ -198,6 +198,7 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real * static int levels_L = 1, levels_U = 1; static unsigned int *row_levels_L = NULL, *level_rows_L = NULL, *level_rows_cnt_L = NULL; static unsigned int *row_levels_U = NULL, *level_rows_U = NULL, *level_rows_cnt_U = NULL; + static unsigned int *top = NULL; unsigned int *row_levels, *level_rows, *level_rows_cnt; if ( tri == LOWER ) @@ -218,8 +219,17 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real * if ( row_levels == NULL || level_rows == NULL || level_rows_cnt == NULL ) { if ( (row_levels = (unsigned int*) malloc((size_t)LU->n * sizeof(unsigned int))) == NULL - || (level_rows = (unsigned int*) malloc(MAX_ROWS_PER_LEVEL * (size_t)LU->n * sizeof(unsigned int))) == NULL - || (level_rows_cnt = (unsigned int*) malloc((size_t)LU->n * sizeof(unsigned int))) == NULL ) + || (level_rows = (unsigned int*) malloc((size_t)LU->n * sizeof(unsigned int))) == NULL + || (level_rows_cnt = (unsigned int*) malloc((size_t)(LU->n + 1) * sizeof(unsigned int))) == NULL ) + { + fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + if ( top == NULL ) + { + if ( (top = (unsigned int*) malloc((size_t)(LU->n + 1) * sizeof(unsigned int))) == NULL ) { fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" ); exit( INSUFFICIENT_MEMORY ); @@ -229,52 +239,57 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real * /* find levels (row dependencies in substitutions) */ if ( find_levels ) { - memset( row_levels, 0, LU->n * sizeof( unsigned int) ); - memset( level_rows_cnt, 0, LU->n * sizeof( unsigned int) ); + memset( row_levels, 0, LU->n * sizeof(unsigned int) ); + memset( level_rows_cnt, 0, LU->n * sizeof(unsigned int) ); + memset( top, 0, LU->n * sizeof(unsigned int) ); if ( tri == LOWER ) { for ( i = 0; i < LU->n; ++i ) { - local_level = 0; + local_level = 1; for ( pj = LU->start[i]; pj < LU->start[i + 1] - 1; ++pj ) { local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); } - levels = MAX( levels, local_level + 1 ); + levels = MAX( levels, local_level ); row_levels[i] = local_level; - level_rows[local_level * MAX_ROWS_PER_LEVEL + level_rows_cnt[local_level]] = i; ++level_rows_cnt[local_level]; - if ( level_rows_cnt[local_level] >= MAX_ROWS_PER_LEVEL ) - { - fprintf( stderr, "Not enough space for triangular solve via level scheduling" ); - fprintf( stderr, " (MAX_ROWS_PER_LEVEL). Terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); - } } + + printf("levels(L): %d\n", levels); + printf("NNZ(L): %d\n", LU->start[LU->n]); } else { for ( i = LU->n - 1; i >= 0; --i ) { - local_level = 0; + local_level = 1; for ( pj = LU->start[i] + 1; pj < LU->start[i + 1]; ++pj ) { local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); } - levels = MAX( levels, local_level + 1 ); + levels = MAX( levels, local_level ); row_levels[i] = local_level; - level_rows[local_level * MAX_ROWS_PER_LEVEL + level_rows_cnt[local_level]] = i; ++level_rows_cnt[local_level]; - if ( level_rows_cnt[local_level] >= MAX_ROWS_PER_LEVEL ) - { - fprintf( stderr, "Not enough space for triangular solve via level scheduling" ); - fprintf( stderr, " (MAX_ROWS_PER_LEVEL). Terminating...\n" ); - exit( INSUFFICIENT_MEMORY ); - } } + + printf("levels(U): %d\n", levels); + printf("NNZ(U): %d\n", LU->start[LU->n]); + } + + for ( i = 1; i < levels + 1; ++i ) + { + level_rows_cnt[i] += level_rows_cnt[i - 1]; + top[i] = level_rows_cnt[i]; + } + + for ( i = 0; i < LU->n; ++i ) + { + level_rows[top[row_levels[i] - 1]] = i; + ++top[row_levels[i] - 1]; } } @@ -286,9 +301,9 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real * for ( i = 0; i < levels; ++i ) { #pragma omp for schedule(static) - for ( j = 0; j < level_rows_cnt[i]; ++j ) + for ( j = level_rows_cnt[i]; j < level_rows_cnt[i + 1]; ++j ) { - local_row = level_rows[i * MAX_ROWS_PER_LEVEL + j]; + local_row = level_rows[j]; x[local_row] = y[local_row]; for ( pj = LU->start[local_row]; pj < LU->start[local_row + 1] - 1; ++pj ) { @@ -304,9 +319,9 @@ static void tri_solve_level_sched( const sparse_matrix * const LU, const real * for ( i = 0; i < levels; ++i ) { #pragma omp for schedule(static) - for ( j = 0; j < level_rows_cnt[i]; ++j ) + for ( j = level_rows_cnt[i]; j < level_rows_cnt[i + 1]; ++j ) { - local_row = level_rows[i * MAX_ROWS_PER_LEVEL + j]; + local_row = level_rows[j]; x[local_row] = y[local_row]; for ( pj = LU->start[local_row] + 1; pj < LU->start[local_row + 1]; ++pj ) { @@ -411,7 +426,7 @@ static void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, #pragma omp barrier /* precompute and cache, as invariant in loop below */ - #pragma omp for schedule(guided) + #pragma omp for schedule(static) for ( i = 0; i < R->n; ++i ) { Dinv_b[i] = Dinv[i] * b[i]; @@ -432,7 +447,7 @@ static void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, #pragma omp barrier - #pragma omp for schedule(guided) + #pragma omp for schedule(static) for ( i = 0; i < R->n; ++i ) { if (tri == LOWER) @@ -464,7 +479,7 @@ static void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, #pragma omp barrier - #pragma omp for schedule(guided) + #pragma omp for schedule(static) for ( i = 0; i < R->n; ++i ) { #ifdef _OPENMP diff --git a/sPuReMD/src/QEq.c b/sPuReMD/src/QEq.c index 5c5b47791b965846fddded0fa24e82b8c09ccb9c..514330bdb342caa65b3446f10cd463cabca75b43 100644 --- a/sPuReMD/src/QEq.c +++ b/sPuReMD/src/QEq.c @@ -198,7 +198,7 @@ static void Calculate_Droptol( const sparse_matrix * const A, real * const dropt #pragma omp barrier /* calculate sqaure of the norm of each row */ - #pragma omp for schedule(guided) + #pragma omp for schedule(static) for ( i = 0; i < A->n; ++i ) { for ( k = A->start[i]; k < A->start[i + 1] - 1; ++k ) @@ -226,7 +226,7 @@ static void Calculate_Droptol( const sparse_matrix * const A, real * const dropt #pragma omp barrier #ifdef _OPENMP - #pragma omp for schedule(guided) + #pragma omp for schedule(static) for ( i = 0; i < A->n; ++i ) { droptol[i] = 0.0; @@ -241,7 +241,7 @@ static void Calculate_Droptol( const sparse_matrix * const A, real * const dropt /* calculate local droptol for each row */ //fprintf( stderr, "droptol: " ); - #pragma omp for schedule(guided) + #pragma omp for schedule(static) for ( i = 0; i < A->n; ++i ) { //fprintf( stderr, "%f-->", droptol[i] ); @@ -261,8 +261,8 @@ static int Estimate_LU_Fill( const sparse_matrix * const A, const real * const d fillin = 0; - #pragma omp parallel for schedule(guided) \ - default(none) private(i, j, pj, val) reduction(+: fillin) + #pragma omp parallel for schedule(static) \ + default(none) private(i, j, pj, val) reduction(+: fillin) for ( i = 0; i < A->n; ++i ) { for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) @@ -587,7 +587,7 @@ static real diag_pre_comp( const reax_system * const system, real * const Hdia_i start = Get_Time( ); - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) private(i) for ( i = 0; i < system->N; ++i ) { @@ -811,7 +811,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, for ( i = 0; i < sweeps; ++i ) { /* for each nonzero */ - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(DAD, stderr) private(sum, ei_x, ei_y, k) firstprivate(x, y) for ( j = 0; j < A->start[A->n]; ++j ) { @@ -971,7 +971,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, exit( INSUFFICIENT_MEMORY ); } - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(D, D_inv) private(i) for ( i = 0; i < A->n; ++i ) { @@ -982,7 +982,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, /* to get convergence, A must have unit diagonal, so apply * transformation DAD, where D = D(1./sqrt(D(A))) */ memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(DAD, D) private(i, pj) for ( i = 0; i < A->n; ++i ) { @@ -1008,8 +1008,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, memcpy( U->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); /* L has unit diagonal, by convention */ - #pragma omp parallel for schedule(guided) \ - default(none) private(i) + #pragma omp parallel for schedule(static) default(none) private(i) for ( i = 0; i < A->n; ++i ) { L->val[L->start[i + 1] - 1] = 1.0; @@ -1018,8 +1017,8 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, for ( i = 0; i < sweeps; ++i ) { /* for each nonzero in L */ - #pragma omp parallel for schedule(guided) \ - default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) + #pragma omp parallel for schedule(static) \ + default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) for ( j = 0; j < DAD->start[DAD->n]; ++j ) { sum = ZERO; @@ -1068,7 +1067,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, } } - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(DAD) private(j, k, x, y, ei_x, ei_y, sum) for ( j = 0; j < DAD->start[DAD->n]; ++j ) { @@ -1119,7 +1118,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, /* apply inverse transformation: * since DAD \approx LU, then * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */ - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(DAD, D_inv) private(i, pj) for ( i = 0; i < DAD->n; ++i ) { @@ -1181,7 +1180,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, exit( INSUFFICIENT_MEMORY ); } - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(D, D_inv) private(i) for ( i = 0; i < A->n; ++i ) { @@ -1192,7 +1191,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, /* to get convergence, A must have unit diagonal, so apply * transformation DAD, where D = D(1./sqrt(D(A))) */ memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(DAD, D) private(i, pj) for ( i = 0; i < A->n; ++i ) { @@ -1218,7 +1217,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, memcpy( U_temp->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); /* L has unit diagonal, by convention */ - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) private(i) shared(L_temp) for ( i = 0; i < A->n; ++i ) { @@ -1228,7 +1227,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, for ( i = 0; i < sweeps; ++i ) { /* for each nonzero in L */ - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) for ( j = 0; j < DAD->start[DAD->n]; ++j ) { @@ -1278,7 +1277,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, } } - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) for ( j = 0; j < DAD->start[DAD->n]; ++j ) { @@ -1329,7 +1328,7 @@ static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, /* apply inverse transformation: * since DAD \approx LU, then * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */ - #pragma omp parallel for schedule(guided) \ + #pragma omp parallel for schedule(static) \ default(none) shared(DAD, L_temp, U_temp, D_inv) private(i, pj) for ( i = 0; i < DAD->n; ++i ) { diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index 9d2aea4cf107a48d00113ebe1383f615f637f75a..7821462a05ac2c7224a1a86a96db26d72f15e920 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -538,7 +538,7 @@ void Init_Out_Controls(reax_system *system, control_params *control, strcpy( temp, control->sim_name ); strcat( temp, ".log" ); out_control->log = fopen( temp, "w" ); - fprintf( out_control->log, "%-6s%10s%10s%10s%10s%10s%10s%10s%10s%10s%10s%10s%10s%10s%10s\n", + fprintf( out_control->log, "%-6s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n", "step", "total", "neighbors", "init", "bonded", "nonbonded", "QEq", "QEq Sort", "S iters", "Pre Comp", "Pre App", "S spmv", "S vec ops", "S orthog", "S tsolve" ); diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index b225c508a54f14d86c59a64122f52c2f77babbda..9be7cf757d54471195b5afa892a08d2d6bb8ed53 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -81,17 +81,17 @@ #define C_ele 332.06371 //#define K_B 503.398008 // kcal/mol/K #define K_B 0.831687 // amu A^2 / ps^2 / K -#define F_CONV 1e6 / 48.88821291 / 48.88821291 // --> amu A / ps^2 +#define F_CONV (1e6 / 48.88821291 / 48.88821291) // --> amu A / ps^2 #define E_CONV 0.002391 // amu A^2 / ps^2 --> kcal/mol #define EV_to_KCALpMOL 14.400000 // ElectronVolt --> KCAL per MOLe #define KCALpMOL_to_EV 23.060549 // 23.020000//KCAL per MOLe --> ElectronVolt #define ECxA_to_DEBYE 4.803204 // elem. charge * angstrom -> debye conv #define CAL_to_JOULES 4.184000 // CALories --> JOULES -#define JOULES_to_CAL 1/4.184000 // JOULES --> CALories +#define JOULES_to_CAL (1/4.184000) // JOULES --> CALories #define AMU_to_GRAM 1.6605e-24 #define ANG_to_CM 1.0e-8 #define AVOGNR 6.0221367e23 -#define P_CONV 1.0e-24 * AVOGNR * JOULES_to_CAL +#define P_CONV (1.0e-24 * AVOGNR * JOULES_to_CAL) #define MAX_STR 1024 #define MAX_LINE 1024 @@ -115,7 +115,6 @@ #define MAX_ITR 10 #define RESTART 50 -#define MAX_ROWS_PER_LEVEL 10000 /* triangular solve using level scheduling */ #define ZERO 0.000000000000000e+00 #define ALMOST_ZERO 1e-10 diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c index 647b3701a391e75f8623a08fcfe1e6359e86f3ce..579ba6290a5c9812cd8b1342f96f959f597d6709 100644 --- a/sPuReMD/src/print_utils.c +++ b/sPuReMD/src/print_utils.c @@ -616,7 +616,7 @@ void Output_Results( reax_system *system, control_params *control, f_update = 1; else f_update = out_control->energy_update_freq; - fprintf( out_control->log, "%6d%10.2f%10.2f%10.2f%10.2f%10.2f%10.6f%10.6f%10.2f%10.6f%10.6f%10.6f%10.6f%10.6f%10.6f\n", + fprintf( out_control->log, "%6d %10.2f %10.2f %10.2f %10.2f %10.2f %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f\n", data->step, t_elapsed / f_update, data->timing.nbrs / f_update, data->timing.init_forces / f_update, diff --git a/sPuReMD/src/vector.c b/sPuReMD/src/vector.c index f784ee11e73d4e2ce0b59fd00e67af6d025f903c..1525d94d45428b6eb19e4bf4b5e4ca1e6d97cfee 100644 --- a/sPuReMD/src/vector.c +++ b/sPuReMD/src/vector.c @@ -26,7 +26,7 @@ inline int Vector_isZero( const real * const v, const unsigned int k ) { unsigned int i, ret = TRUE; - #pragma omp parallel for default(none) private(i) reduction(&&: ret) schedule(guided) + #pragma omp parallel for default(none) private(i) reduction(&&: ret) schedule(static) for ( i = 0; i < k; ++i ) { if ( fabs( v[i] ) > ALMOST_ZERO ) @@ -65,7 +65,7 @@ inline void Vector_Scale( real * const dest, const real c, const real * const v, { unsigned int i; - #pragma omp parallel for default(none) private(i) schedule(guided) + #pragma omp parallel for default(none) private(i) schedule(static) for ( i = 0; i < k; ++i ) { dest[i] = c * v[i]; @@ -78,7 +78,7 @@ inline void Vector_Sum( real * const dest, const real c, const real * const v, c { unsigned int i; - #pragma omp parallel for default(none) private(i) schedule(guided) + #pragma omp parallel for default(none) private(i) schedule(static) for ( i = 0; i < k; ++i ) { dest[i] = c * v[i] + d * y[i]; @@ -90,7 +90,7 @@ inline void Vector_Add( real * const dest, const real c, const real * const v, c { unsigned int i; - #pragma omp parallel for default(none) private(i) schedule(guided) + #pragma omp parallel for default(none) private(i) schedule(static) for ( i = 0; i < k; ++i ) { dest[i] += c * v[i]; @@ -117,7 +117,7 @@ inline real Dot( const real * const v1, const real * const v2, const unsigned in real ret = ZERO; unsigned int i; - #pragma omp parallel for default(none) private(i) reduction(+: ret) schedule(guided) + #pragma omp parallel for default(none) private(i) reduction(+: ret) schedule(static) for ( i = 0; i < k; ++i ) { ret += v1[i] * v2[i]; @@ -132,7 +132,7 @@ inline real Norm( const real * const v1, const unsigned int k ) real ret = ZERO; unsigned int i; - #pragma omp parallel for default(none) private(i) reduction(+: ret) schedule(guided) + #pragma omp parallel for default(none) private(i) reduction(+: ret) schedule(static) for ( i = 0; i < k; ++i ) { ret += SQR( v1[i] ); diff --git a/tools/run_sim.py b/tools/run_sim.py index 842beacfbf08a38c74065d97b0d638f87fcae19e..8bcf19bd89e14865d51da49dfdaf316e61e8e1c0 100644 --- a/tools/run_sim.py +++ b/tools/run_sim.py @@ -115,10 +115,7 @@ class TestCase(): print(stderr) else: - #TODO: fix - start = 0. - stop = 0. - self._process_result(fout, stop - start, param_dict) + self._process_result(fout, param_dict) fout.close() if path.exists(temp_file): @@ -126,7 +123,8 @@ class TestCase(): if path.exists(temp_dir): rmdir(temp_dir) - def _process_result(self, fout, time, param): + def _process_result(self, fout, param): + time = 0. qeq = 0. iters = 0. pre_comp = 0. @@ -143,25 +141,35 @@ class TestCase(): line = line.split() try: qeq = qeq + float(line[6]) - iters = iters + float(line[7]) - pre_comp = pre_comp + float(line[8]) - pre_app = pre_app + float(line[9]) - spmv = spmv + float(line[10]) + iters = iters + float(line[8]) + pre_comp = pre_comp + float(line[9]) + pre_app = pre_app + float(line[10]) + spmv = spmv + float(line[11]) cnt = cnt + 1 + pass except Exception: pass + if line[0] == 'total:': + try: + time = float(line[1]) + except Exception: + pass cnt = cnt - 1 - qeq = qeq / cnt - iters = iters / cnt - pre_comp = pre_comp / cnt - pre_app = pre_app / cnt - spmv = spmv / cnt + if cnt > 0: + qeq = qeq / cnt + iters = iters / cnt + pre_comp = pre_comp / cnt + pre_app = pre_app / cnt + spmv = spmv / cnt - fout.write(self.__result_body_fmt.format(path.basename(self.__geo_file).split('.')[0], - param['nsteps'], param['qeq_solver_type'], param['qeq_solver_q_err'], - param['pre_comp_type'], param['pre_comp_droptol'], param['pre_comp_sweeps'], - param['pre_app_type'], param['pre_app_jacobi_iters'], pre_comp, pre_app, iters, spmv, - qeq, param['threads'], time)) + if cnt == int(param['nsteps']): + fout.write(self.__result_body_fmt.format(path.basename(self.__geo_file).split('.')[0], + param['nsteps'], param['qeq_solver_type'], param['qeq_solver_q_err'], + param['pre_comp_type'], param['pre_comp_droptol'], param['pre_comp_sweeps'], + param['pre_app_type'], param['pre_app_jacobi_iters'], pre_comp, pre_app, iters, spmv, + qeq, param['threads'], time)) + else: + print('**WARNING: nsteps not correct in file {0}...'.format(log_file)) fout.flush() @@ -193,7 +201,7 @@ if __name__ == '__main__': data_dir = path.join(base_dir, 'data/benchmarks') header_fmt_str = '{:15}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:10}|{:10}|{:10}|{:10}|{:10}|{:3}|{:10}\n' - header_str = ['Data Set', 'Steps', 'Q Tol', 'QType', 'PreCT', 'PreCD', 'PreCS', 'PreAT', 'PreAJ', 'Pre Comp', + header_str = ['Data Set', 'Steps', 'QType', 'Q Tol', 'PreCT', 'PreCD', 'PreCS', 'PreAT', 'PreAJ', 'Pre Comp', 'Pre App', 'Iters', 'SpMV', 'QEq', 'Thd', 'Time (s)'] body_fmt_str = '{:15} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:3} {:10.3f}\n'