diff --git a/sPuReMD/src/GMRES.c b/sPuReMD/src/GMRES.c index 5eb2b12d9d310f59775bc36b4443023335ccc2b9..35269e9045dd2dfb65fc64239e29ca8b68f2b7cc 100644 --- a/sPuReMD/src/GMRES.c +++ b/sPuReMD/src/GMRES.c @@ -210,7 +210,7 @@ static void diag_pre_app( const real * const Hdia_inv, const real * const y, unsigned int i; #pragma omp parallel for schedule(guided) \ - default(none) private(i) shared(stderr) + default(none) private(i) for ( i = 0; i < N; ++i ) { x[i] = y[i] * Hdia_inv[i]; @@ -583,16 +583,11 @@ static void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, * Matrices have non-zero diagonals * Each row of a matrix has at least one non-zero (i.e., no rows with all zeros) */ void apply_preconditioner( const static_storage * const workspace, const control_params * const control, - real * const y, real * const x, const int fresh_pre ) + const real * const y, real * const x, const int fresh_pre ) { int i, si; static real *Dinv_L = NULL, *Dinv_U = NULL; - if ( control->pre_comp_type != DIAG_PC && workspace->L == NULL ) - { - return; - } - switch ( control->pre_app_type ) { case NONE_PA: @@ -653,10 +648,13 @@ void apply_preconditioner( const static_storage * const workspace, const control } /* construct D^{-1}_L */ - for ( i = 0; i < workspace->L->n; ++i ) + if ( fresh_pre ) { - si = workspace->L->start[i + 1] - 1; - Dinv_L[i] = 1. / workspace->L->val[si]; + for ( i = 0; i < workspace->L->n; ++i ) + { + si = workspace->L->start[i + 1] - 1; + Dinv_L[i] = 1. / workspace->L->val[si]; + } } jacobi_iter( workspace->L, Dinv_L, y, x, LOWER, control->pre_app_jacobi_iters ); @@ -671,10 +669,13 @@ void apply_preconditioner( const static_storage * const workspace, const control } /* construct D^{-1}_U */ - for ( i = 0; i < workspace->U->n; ++i ) + if ( fresh_pre ) { - si = workspace->U->start[i]; - Dinv_U[i] = 1. / workspace->U->val[si]; + for ( i = 0; i < workspace->U->n; ++i ) + { + si = workspace->U->start[i]; + Dinv_U[i] = 1. / workspace->U->val[si]; + } } jacobi_iter( workspace->U, Dinv_U, y, x, UPPER, control->pre_app_jacobi_iters ); @@ -709,8 +710,8 @@ int GMRES( const static_storage * const workspace, const control_params * const if ( control->pre_comp_type == DIAG_PC ) { - /* apply precondioner to RHS */ - apply_preconditioner( workspace, control, workspace->b, workspace->b_prc, fresh_pre ); + /* apply preconditioner to RHS */ + apply_preconditioner( workspace, control, b, workspace->b_prc, fresh_pre ); } /* GMRES outer-loop */ @@ -741,7 +742,8 @@ int GMRES( const static_storage * const workspace, const control_params * const if ( control->pre_comp_type != DIAG_PC ) { time_start = Get_Time( ); - apply_preconditioner( workspace, control, workspace->v[0], workspace->v[0], fresh_pre ); + apply_preconditioner( workspace, control, workspace->v[0], workspace->v[0], + itr == 0 ? fresh_pre : 0 ); *time += Get_Timing_Info( time_start ); } @@ -758,52 +760,93 @@ int GMRES( const static_storage * const workspace, const control_params * const *spmv_time += Get_Timing_Info( time_start ); time_start = Get_Time( ); - apply_preconditioner( workspace, control, workspace->v[j + 1], workspace->v[j + 1], fresh_pre ); + apply_preconditioner( workspace, control, workspace->v[j + 1], workspace->v[j + 1], 0 ); *time += Get_Timing_Info( time_start ); - /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for ( i = 0; i < j - 1; i++ ) + if ( control->pre_comp_type == DIAG_PC ) { - workspace->h[i][j] = 0; + /* apply modified Gram-Schmidt to orthogonalize the new residual */ + for( i = 0; i <= j; i++ ) + { + workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j+1], N ); + Vector_Add( workspace->v[j+1], -workspace->h[i][j], workspace->v[i], N ); + } + + workspace->h[j+1][j] = Norm( workspace->v[j+1], N ); + Vector_Scale( workspace->v[j+1], 1. / workspace->h[j+1][j], workspace->v[j+1], N ); + // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); + + /* Givens rotations on the upper-Hessenberg matrix to make it U */ + for( i = 0; i <= j; i++ ) + { + if( i == j ) + { + cc = SQRT( SQR(workspace->h[j][j])+SQR(workspace->h[j+1][j]) ); + workspace->hc[j] = workspace->h[j][j] / cc; + workspace->hs[j] = workspace->h[j+1][j] / cc; + } + + tmp1 = workspace->hc[i] * workspace->h[i][j] + + workspace->hs[i] * workspace->h[i+1][j]; + tmp2 = -workspace->hs[i] * workspace->h[i][j] + + workspace->hc[i] * workspace->h[i+1][j]; + + workspace->h[i][j] = tmp1; + workspace->h[i+1][j] = tmp2; + } + + /* apply Givens rotations to the rhs as well */ + tmp1 = workspace->hc[j] * workspace->g[j]; + tmp2 = -workspace->hs[j] * workspace->g[j]; + workspace->g[j] = tmp1; + workspace->g[j+1] = tmp2; } - - //for( i = 0; i <= j; i++ ) { - for ( i = MAX(j - 1, 0); i <= j; i++ ) + else { - workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N ); - Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); - } - - workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N ); - Vector_Scale( workspace->v[j + 1], - 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); - // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); - - /* Givens rotations on the upper-Hessenberg matrix to make it U */ - for ( i = MAX(j - 1, 0); i <= j; i++ ) - { - if ( i == j ) + /* apply modified Gram-Schmidt to orthogonalize the new residual */ + for ( i = 0; i < j - 1; i++ ) { - cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) ); - workspace->hc[j] = workspace->h[j][j] / cc; - workspace->hs[j] = workspace->h[j + 1][j] / cc; + workspace->h[i][j] = 0; } - - tmp1 = workspace->hc[i] * workspace->h[i][j] + - workspace->hs[i] * workspace->h[i + 1][j]; - tmp2 = -workspace->hs[i] * workspace->h[i][j] + - workspace->hc[i] * workspace->h[i + 1][j]; - - workspace->h[i][j] = tmp1; - workspace->h[i + 1][j] = tmp2; + + //for( i = 0; i <= j; i++ ) { + for ( i = MAX(j - 1, 0); i <= j; i++ ) + { + workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N ); + Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); + } + + workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N ); + Vector_Scale( workspace->v[j + 1], + 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); + // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); + + /* Givens rotations on the upper-Hessenberg matrix to make it U */ + for ( i = MAX(j - 1, 0); i <= j; i++ ) + { + if ( i == j ) + { + cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) ); + workspace->hc[j] = workspace->h[j][j] / cc; + workspace->hs[j] = workspace->h[j + 1][j] / cc; + } + + tmp1 = workspace->hc[i] * workspace->h[i][j] + + workspace->hs[i] * workspace->h[i + 1][j]; + tmp2 = -workspace->hs[i] * workspace->h[i][j] + + workspace->hc[i] * workspace->h[i + 1][j]; + + workspace->h[i][j] = tmp1; + workspace->h[i + 1][j] = tmp2; + } + + /* apply Givens rotations to the rhs as well */ + tmp1 = workspace->hc[j] * workspace->g[j]; + tmp2 = -workspace->hs[j] * workspace->g[j]; + workspace->g[j] = tmp1; + workspace->g[j + 1] = tmp2; } - /* apply Givens rotations to the rhs as well */ - tmp1 = workspace->hc[j] * workspace->g[j]; - tmp2 = -workspace->hs[j] * workspace->g[j]; - workspace->g[j] = tmp1; - workspace->g[j + 1] = tmp2; - //fprintf( stderr, "h: " ); //for( i = 0; i <= j+1; ++i ) //fprintf( stderr, "%.6f ", workspace->h[i][j] ); diff --git a/sPuReMD/src/QEq.c b/sPuReMD/src/QEq.c index 68d32b7ad5ca3977e7c0d1e212d2ee44b4a751ae..e8b129d918273381df17a86c1517231d42be8cf4 100644 --- a/sPuReMD/src/QEq.c +++ b/sPuReMD/src/QEq.c @@ -193,8 +193,7 @@ static void Calculate_Droptol( const sparse_matrix * const A, real * const dropt #pragma omp barrier /* calculate sqaure of the norm of each row */ - #pragma omp parallel for schedule(guided) \ - default(none) private(i, j, k, val, tid) shared(droptol_local) + #pragma omp for schedule(guided) for ( i = 0; i < A->n; ++i ) { for ( k = A->start[i]; k < A->start[i + 1] - 1; ++k ) @@ -222,7 +221,7 @@ static void Calculate_Droptol( const sparse_matrix * const A, real * const dropt #pragma omp barrier #ifdef _OPENMP - #pragma omp parallel for schedule(guided) default(none) private(i, k) shared(droptol_local) + #pragma omp for schedule(guided) for ( i = 0; i < A->n; ++i ) { droptol[i] = 0.0; @@ -237,7 +236,7 @@ static void Calculate_Droptol( const sparse_matrix * const A, real * const dropt /* calculate local droptol for each row */ //fprintf( stderr, "droptol: " ); - #pragma omp parallel for schedule(guided) default(none) private(i) + #pragma omp for schedule(guided) for ( i = 0; i < A->n; ++i ) { //fprintf( stderr, "%f-->", droptol[i] ); diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index 92a625e6c7882dd2e032524edbedfaa3ce201d05..6b768a74a40ddd23f36de24d27a6182bc5609c9b 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -290,6 +290,7 @@ void Init_Workspace( reax_system *system, control_params *control, workspace->H_sp = NULL; workspace->L = NULL; workspace->U = NULL; + workspace->Hdia_inv = NULL; workspace->droptol = (real *) calloc( system->N, sizeof( real ) ); workspace->w = (real *) calloc( system->N, sizeof( real ) ); workspace->b = (real *) calloc( system->N * 2, sizeof( real ) ); diff --git a/tools/run_sim.py b/tools/run_sim.py index bcea5dbadefcf0484b189109b438c6cef4df679b..ac8aa0cdbd7ec25a89b1a6646be6f45a07ef3bc8 100644 --- a/tools/run_sim.py +++ b/tools/run_sim.py @@ -30,12 +30,16 @@ class TestCase(): r'(?P<key>ensemble_type\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ 'nsteps': lambda l, x: sub( r'(?P<key>nsteps\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ + 'tabulate_long_range': lambda l, x: sub( + r'(?P<key>tabulate_long_range\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ 'qeq_solver_type': lambda l, x: sub( r'(?P<key>qeq_solver_type\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ 'qeq_solver_q_err': lambda l, x: sub( r'(?P<key>qeq_solver_q_err\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ 'pre_comp_type': lambda l, x: sub( r'(?P<key>pre_comp_type\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ + 'pre_comp_droptol': lambda l, x: sub( + r'(?P<key>pre_comp_droptol\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ 'pre_comp_refactor': lambda l, x: sub( r'(?P<key>pre_comp_refactor\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ 'pre_comp_sweeps': lambda l, x: sub( @@ -63,7 +67,7 @@ class TestCase(): fp_temp.write(lines) fp_temp.close() - def run(self, bin_file='sPuReMD/bin/spuremd'): + def run(self, bin_file='sPuReMD/bin/spuremd', process_results=False): base_dir = getcwd() bin_path = path.join(base_dir, bin_file) env = dict(environ) @@ -74,6 +78,7 @@ class TestCase(): fout = open(self.__result_file, 'a') if write_header: fout.write(self.__result_header_fmt.format(*self.__result_header)) + fout.flush() temp_dir = mkdtemp() temp_file = path.join(temp_dir, path.basename(self.__control_file)) @@ -81,20 +86,31 @@ class TestCase(): for p in product(*[self.__params[k] for k in self.__param_names]): param_dict = dict((k, v) for (k, v) in zip(self.__param_names, p)) param_dict['name'] = path.basename(self.__geo_file).split('.')[0] \ - + '_step' + param_dict['nsteps'] + '_tol' + param_dict['qeq_solver_q_err'] \ - + '_precomp' + param_dict['pre_comp_type'] + '_thread' + param_dict['threads'] + + '_s' + param_dict['nsteps'] \ + + '_q' + param_dict['qeq_solver_type'] \ + + '_qtol' + param_dict['qeq_solver_q_err'] \ + + '_pc' + param_dict['pre_comp_type'] \ + + '_pctol' + param_dict['pre_comp_droptol'] \ + + '_pcs' + param_dict['pre_comp_sweeps'] \ + + '_pa' + param_dict['pre_app_type'] \ + + '_paji' + param_dict['pre_app_jacobi_iters'] \ + + '_t' + param_dict['threads'] - self._setup(param_dict, temp_file) - env['OMP_NUM_THREADS'] = param_dict['threads'] - start = time() - proc_handle = Popen([bin_path, self.__geo_file, self.__ffield_file, temp_file], - stdout=PIPE, stderr=PIPE, env=env) - #TODO: handle outputs? - stdout, stderr = proc_handle.communicate() - stop = time() + if not process_results: + self._setup(param_dict, temp_file) + + env['OMP_NUM_THREADS'] = param_dict['threads'] + start = time() + proc_handle = Popen([bin_path, self.__geo_file, self.__ffield_file, temp_file], + stdout=PIPE, stderr=PIPE, env=env) + stdout, stderr = proc_handle.communicate() + stop = time() + if proc_handle.returncode < 0: + print("WARNING: process terminated with code {0}".format(proc_handle.returncode)) - self._process_result(fout, stop - start, param_dict) + else: + self._process_result(fout, stop - start, param_dict) fout.close() remove(temp_file) @@ -127,8 +143,12 @@ class TestCase(): spmv = spmv / cnt fout.write(self.__result_body_fmt.format(path.basename(self.__geo_file).split('.')[0], - param['nsteps'], param['qeq_solver_q_err'], param['pre_comp_type'], pre_comp, pre_app, iters, spmv, + param['nsteps'], param['qeq_solver_type'], param['qeq_solver_q_err'], + param['pre_comp_type'], param['pre_comp_droptol'], param['pre_comp_sweeps'], + param['pre_app_type'], param['pre_app_jacobi_iters'], pre_comp, pre_app, iters, spmv, qeq, param['threads'], time)) + fout.flush() + if __name__ == '__main__': @@ -141,6 +161,12 @@ if __name__ == '__main__': ] parser = argparse.ArgumentParser(description='Run molecular dynamics simulations on specified data sets.') + parser.add_argument('-f', '--out_file', metavar='out_file', default=None, nargs=1, + help='Output file to write results.') + parser.add_argument('-r', '--process_results', default=False, action='store_true', + help='Process simulation results only (do not perform simulations).') + parser.add_argument('-p', '--params', metavar='params', action='append', default=None, nargs=2, + help='Paramater name and value pairs for the simulation, which multiple values comma delimited.') parser.add_argument('data', nargs='+', choices=DATA, help='Data sets for which to run simulations.') @@ -151,30 +177,37 @@ if __name__ == '__main__': control_dir = path.join(base_dir, 'environ') data_dir = path.join(base_dir, 'data/benchmarks') - header_fmt_str = '{:20}|{:5}|{:5}|{:5}|{:10}|{:10}|{:10}|{:10}|{:10}|{:10}|{:10}\n' - header_str = ['Data Set', 'Steps', 'Q Tol', 'Pre T', 'Pre Comp', - 'Pre App', 'Iters', 'SpMV', 'QEq', 'Threads', 'Time (s)'] - body_fmt_str = '{:20} {:5} {:5} {:5} {:10.6f} {:10.6f} {:10.6f} {:10.6f} {:10.6f} {:10} {:10.6f}\n' + header_fmt_str = '{:15}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:10}|{:10}|{:10}|{:10}|{:10}|{:3}|{:10}\n' + header_str = ['Data Set', 'Steps', 'Q Tol', 'QType', 'PreCT', 'PreCD', 'PreCS', 'PreAT', 'PreAJ', 'Pre Comp', + 'Pre App', 'Iters', 'SpMV', 'QEq', 'Thd', 'Time (s)'] + body_fmt_str = '{:15} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:3} {:10.3f}\n' params = { 'ensemble_type': ['0'], 'nsteps': ['20'], -# 'nsteps': ['20', '100', '500', '1000'], + 'tabulate_long_range': ['0'], 'qeq_solver_type': ['0'], 'qeq_solver_q_err': ['1e-6'], -# 'qeq_solver_q_err': ['1e-6', '1e-10'], -# 'qeq_solver_q_err': ['1e-6', '1e-8', '1e-10', '1e-14'], 'pre_comp_type': ['2'], -# 'pre_comp_type': ['0', '1', '2'], 'pre_comp_refactor': ['100'], + 'pre_comp_droptol': ['0.0'], 'pre_comp_sweeps': ['3'], 'pre_app_type': ['2'], 'pre_app_jacobi_iters': ['50'], - 'threads': ['2'], -# 'threads': ['1', '2', '4', '12', '24'], + 'threads': ['1'], 'geo_format': [], } + if args.out_file: + result_file = args.out_file[0] + else: + result_file = 'result.txt' + + # overwrite default params, if supplied via command line args + if args.params: + for param in args.params: + params[param[0]] = param[1].split(',') + test_cases = [] if 'water_6540' in args.data: test_cases.append( @@ -183,7 +216,7 @@ if __name__ == '__main__': path.join(control_dir, 'param.gpu.water'), params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str, - geo_format=['1'])) + geo_format=['1'], result_file=result_file)) if 'water_78480' in args.data: test_cases.append( TestCase(path.join(data_dir, 'water/water_78480.pdb'), @@ -191,7 +224,7 @@ if __name__ == '__main__': path.join(control_dir, 'param.gpu.water'), params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str, - geo_format=['0'])) + geo_format=['0'], result_file=result_file)) if 'water_327000' in args.data: test_cases.append( TestCase(path.join(data_dir, 'water/water_327000.geo'), @@ -199,7 +232,7 @@ if __name__ == '__main__': path.join(control_dir, 'param.gpu.water'), params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str, - geo_format=['0'])) + geo_format=['0'], result_file=result_file)) if 'bilayer_56800' in args.data: test_cases.append( TestCase(path.join(data_dir, 'bilayer/bilayer_56800.pdb'), @@ -207,7 +240,7 @@ if __name__ == '__main__': path.join(control_dir, 'param.gpu.water'), params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str, - geo_format=['1'])) + geo_format=['1'], result_file=result_file)) if 'dna_19733' in args.data: test_cases.append( TestCase(path.join(data_dir, 'dna/dna_19733.pdb'), @@ -215,7 +248,7 @@ if __name__ == '__main__': path.join(control_dir, 'param.gpu.water'), params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str, - geo_format=['1'])) + geo_format=['1'], result_file=result_file)) if 'silica_6000' in args.data: test_cases.append( TestCase(path.join(data_dir, 'silica/silica_6000.pdb'), @@ -223,7 +256,7 @@ if __name__ == '__main__': path.join(control_dir, 'param.gpu.water'), params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str, - geo_format=['1'])) + geo_format=['1'], result_file=result_file)) if 'silica_72000' in args.data: test_cases.append( TestCase(path.join(data_dir, 'silica/silica_72000.geo'), @@ -231,7 +264,7 @@ if __name__ == '__main__': path.join(control_dir, 'param.gpu.water'), params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str, - geo_format=['0'])) + geo_format=['0'], result_file=result_file)) if 'silica_300000' in args.data: test_cases.append( TestCase(path.join(data_dir, 'silica/silica_300000.geo'), @@ -239,7 +272,7 @@ if __name__ == '__main__': path.join(control_dir, 'param.gpu.water'), params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str, - geo_format=['0'])) + geo_format=['0'], result_file=result_file)) if 'petn_48256' in args.data: test_cases.append( TestCase(path.join(data_dir, 'petn/petn_48256.pdb'), @@ -247,7 +280,7 @@ if __name__ == '__main__': path.join(control_dir, 'param.gpu.water'), params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str, - geo_format=['1'])) + geo_format=['1'], result_file=result_file)) for test in test_cases: - test.run(bin_file='sPuReMD/bin/spuremd') + test.run(bin_file='sPuReMD/bin/spuremd', process_results=args.process_results)