From 31ec0aee6e1709526ff7d7b861d9b0d1bdae02d3 Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnku@msu.edu>
Date: Fri, 25 Jan 2019 12:40:14 -0800
Subject: [PATCH] PuReMD-old: increase precision in log file to 4 decimal
 places for select fields. Merge MPI log parsing logic.

---
 PuReMD/src/io_tools.c |   2 +-
 tools/run_sim.py      | 321 ++++++++++++++++++++++++++++++++----------
 2 files changed, 248 insertions(+), 75 deletions(-)

diff --git a/PuReMD/src/io_tools.c b/PuReMD/src/io_tools.c
index 2b58ae63..b86f82a7 100644
--- a/PuReMD/src/io_tools.c
+++ b/PuReMD/src/io_tools.c
@@ -1118,7 +1118,7 @@ void Output_Results( reax_system *system, control_params *control,
                 denom = 1.0;
             }
 
-            fprintf( out_control->log, "%6d %10.2f %10.2f %10.2f %10.2f %10.2f %10.2f %10.4f %10.4f %10.4f %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f\n",
+            fprintf( out_control->log, "%6d %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f\n",
                     data->step,
                     t_elapsed * denom,
                     data->timing.comm * denom,
diff --git a/tools/run_sim.py b/tools/run_sim.py
index 10a7ab84..72c477c9 100644
--- a/tools/run_sim.py
+++ b/tools/run_sim.py
@@ -331,11 +331,17 @@ restart_freq            0                       ! 0: do not output any restart f
 
             start = time()
             proc_handle = Popen(cmd_args, stdout=PIPE, stderr=PIPE, env=env, universal_newlines=True)
-            stdout, stderr = proc_handle.communicate()
+            try:
+                stdout, stderr = proc_handle.communicate()
+            except KeyboardInterrupt:
+                # Slurm jobs require sending 2 SIGINT signals within 1 second to exit,
+                # so instead use a bigger hammer (SIGKILL)
+                proc_handle.kill()
+                raise
             stop = time()
 
             if proc_handle.returncode < 0:
-                print("WARNING: process terminated with code {0}".format(proc_handle.returncode))
+                print("[WARNING] process terminated with code {0}".format(proc_handle.returncode))
 
             print('stdout:\n{0}'.format(stdout), end='')
             print('stderr:\n{0}'.format(stderr), end='')
@@ -346,66 +352,184 @@ restart_freq            0                       ! 0: do not output any restart f
         if path.exists(temp_dir):
             rmdir(temp_dir)
 
-    def _process_result(self, fout, param, min_step, max_step):
-        time = 0.
-        cm = 0.
-        iters = 0.
-        pre_comp = 0.
-        pre_app = 0.
-        spmv = 0.
-        cnt = 0
-        cnt_valid = 0
-        line_cnt = 0
-        log_file = param['name'] + '.log'
-
-        if not path.exists(log_file):
-            print('***WARNING: {0} does not exist!'.format(log_file))
-            return
-        with open(log_file, 'r') as fp:
-            for line in fp:
-                line = line.split()
-                try:
-                    if (not min_step and not max_step) or \
-                    (min_step and not max_step and cnt_valid >= min_step) or \
-                    (not min_step and max_step and cnt_valid <= max_step) or \
-                    (cnt_valid >= min_step and cnt_valid <= max_step):
-                        cm = cm + float(line[6])
-                        iters = iters + float(line[8])
-                        pre_comp = pre_comp + float(line[9])
-                        pre_app = pre_app + float(line[10])
-                        spmv = spmv + float(line[11])
-                        cnt = cnt + 1
-                    cnt_valid = cnt_valid + 1
-                except Exception:
-                    pass
-                if line[0] == 'total:':
+    def _process_result(self, fout, param, min_step, max_step, run_type):
+        if run_type == 'serial' or run_type == 'openmp':
+            time = 0.
+            cm = 0.
+            iters = 0.
+            pre_comp = 0.
+            pre_app = 0.
+            spmv = 0.
+            cnt = 0
+            cnt_valid = 0
+            line_cnt = 0
+            log_file = param['name'] + '.log'
+
+            if not path.exists(log_file):
+                print('[WARNING] {0} does not exist!'.format(log_file))
+                return
+            with open(log_file, 'r') as fp:
+                for line in fp:
+                    line = line.split()
                     try:
-                        time = float(line[1])
+                        if (not min_step and not max_step) or \
+                        (min_step and not max_step and cnt_valid >= min_step) or \
+                        (not min_step and max_step and cnt_valid <= max_step) or \
+                        (cnt_valid >= min_step and cnt_valid <= max_step):
+                            cm = cm + float(line[6])
+                            iters = iters + float(line[8])
+                            pre_comp = pre_comp + float(line[9])
+                            pre_app = pre_app + float(line[10])
+                            spmv = spmv + float(line[11])
+                            cnt = cnt + 1
+                        cnt_valid = cnt_valid + 1
                     except Exception:
                         pass
-                line_cnt = line_cnt + 1
-            if cnt > 0:
-                cm = cm / cnt
-                iters = iters / cnt
-                pre_comp = pre_comp / cnt
-                pre_app = pre_app / cnt
-                spmv = spmv / cnt
-
-        # subtract for header, footer (total time), and extra step
-        # (e.g., 100 steps means steps 0 through 100, inclusive)
-        if (line_cnt - 3) == int(param['nsteps']):
-            fout.write(self.__result_body_fmt.format(path.basename(self.__geo_file).split('.')[0], 
-                param['nsteps'], param['charge_method'], param['cm_solver_type'],
-                param['cm_solver_q_err'], param['cm_domain_sparsity'],
-                param['cm_solver_pre_comp_type'], param['cm_solver_pre_comp_droptol'],
-                param['cm_solver_pre_comp_sweeps'], param['cm_solver_pre_comp_sai_thres'],
-                param['cm_solver_pre_app_type'], param['cm_solver_pre_app_jacobi_iters'],
-                pre_comp, pre_app, iters, spmv,
-                cm, param['threads'], time))
-        else:
-            print('**WARNING: nsteps not correct in file {0} (nsteps = {1:d}, counted steps = {2:d}).'.format(
-                log_file, int(param['nsteps']), max(line_cnt - 3, 0)))
-        fout.flush()
+                    if line[0] == 'total:':
+                        try:
+                            time = float(line[1])
+                        except Exception:
+                            pass
+                    line_cnt = line_cnt + 1
+                if cnt > 0:
+                    cm = cm / cnt
+                    iters = iters / cnt
+                    pre_comp = pre_comp / cnt
+                    pre_app = pre_app / cnt
+                    spmv = spmv / cnt
+
+            # subtract for header, footer (total time), and extra step
+            # (e.g., 100 steps means steps 0 through 100, inclusive)
+            if (line_cnt - 3) == int(param['nsteps']):
+                fout.write(self.__result_body_fmt.format(path.basename(self.__geo_file).split('.')[0], 
+                    param['nsteps'], param['charge_method'], param['cm_solver_type'],
+                    param['cm_solver_q_err'], param['cm_domain_sparsity'],
+                    param['cm_solver_pre_comp_type'], param['cm_solver_pre_comp_droptol'],
+                    param['cm_solver_pre_comp_sweeps'], param['cm_solver_pre_comp_sai_thres'],
+                    param['cm_solver_pre_app_type'], param['cm_solver_pre_app_jacobi_iters'],
+                    pre_comp, pre_app, iters, spmv,
+                    cm, param['threads'], time))
+            else:
+                print('[WARNING] nsteps not correct in file {0} (nsteps = {1:d}, counted steps = {2:d}).'.format(
+                    log_file, int(param['nsteps']), max(line_cnt - 3, 0)))
+            fout.flush()
+        elif run_type == 'mpi':
+            from operator import mul
+            from functools import reduce
+            
+            total = 0.0
+            comm = 0.0
+            neighbors = 0.0
+            init = 0.0
+            init_dist = 0.0
+            init_cm = 0.0
+            init_bond = 0.0
+            bonded = 0.0
+            nonbonded = 0.0
+            time = 0.0
+            cm = 0.0
+            cm_sort = 0.0
+            s_iters = 0.0
+            pre_comp = 0.0
+            pre_app = 0.0
+            s_comm = 0.0
+            s_allr = 0.0
+            s_spmv = 0.0
+            s_vec_ops = 0.0
+            cnt = 0
+            cnt_valid = 0
+            line_cnt = 0
+            log_file = param['name'] + '.log'
+
+            if not path.exists(log_file):
+                print('[WARNING] {0} does not exist!'.format(log_file))
+                return
+            with open(log_file, 'r') as fp:
+                for line in fp:
+                    line = line.split()
+                    try:
+                        if (not min_step and not max_step) or \
+                        (min_step and not max_step and cnt_valid >= min_step) or \
+                        (not min_step and max_step and cnt_valid <= max_step) or \
+                        (cnt_valid >= min_step and cnt_valid <= max_step):
+                            total = total + float(line[1])
+                            comm = comm + float(line[2])
+                            neighbors = neighbors + float(line[3])
+                            init = init + float(line[4])
+                            init_dist = init_dist + float(line[5])
+                            init_cm = init_cm + float(line[6])
+                            init_bond = init_bond + float(line[7])
+                            bonded = bonded + float(line[8])
+                            nonbonded = nonbonded + float(line[9])
+                            cm = cm + float(line[10])
+                            cm_sort = cm_sort + float(line[11])
+                            s_iters = s_iters + float(line[12])
+                            pre_comp = pre_comp + float(line[13])
+                            pre_app = pre_app + float(line[14])
+                            s_comm = s_comm + float(line[15])
+                            s_allr = s_allr + float(line[16])
+                            s_spmv = s_spmv + float(line[17])
+                            s_vec_ops = s_vec_ops + float(line[18])
+                            cnt = cnt + 1
+                        cnt_valid = cnt_valid + 1
+                    except Exception:
+                        pass
+                    if line[0] == 'total:':
+                        try:
+                            time = float(line[1])
+                        except Exception:
+                            pass
+                    line_cnt = line_cnt + 1
+                if cnt > 0:
+                    comm = comm / cnt
+                    neighbors = neighbors / cnt
+                    init = init / cnt
+                    init_dist = init_dist / cnt
+                    init_cm = init_cm / cnt
+                    init_bond = init_bond / cnt
+                    bonded = bonded / cnt
+                    nonbonded = nonbonded / cnt
+                    cm = cm / cnt
+                    cm_sort = cm_sort / cnt
+                    s_iters = s_iters / cnt
+                    pre_comp = pre_comp / cnt
+                    pre_app = pre_app / cnt
+                    s_comm = s_comm / cnt
+                    s_allr = s_allr / cnt
+                    s_spmv = s_spmv / cnt
+                    s_vec_ops = s_vec_ops / cnt
+
+            # subtract for header, footer (total time), and extra step
+            # (e.g., 100 steps means steps 0 through 100, inclusive)
+            if (line_cnt - 1) >= int(param['nsteps']):
+                fout.write(self.__result_body_fmt.format(path.basename(self.__geo_file).split('.')[0],
+                    str(reduce(mul, map(int, param['proc_by_dim'].split(':')), 1)),
+                    param['nsteps'], param['cm_solver_pre_comp_type'],
+                    param['cm_solver_q_err'],
+                    param['reneighbor'],
+                    param['cm_solver_pre_comp_sai_thres'],
+                    total, comm, neighbors, init, init_dist, init_cm, init_bond,
+                    bonded, nonbonded, cm, cm_sort,
+                    s_iters, pre_comp, pre_app, s_comm, s_allr, s_spmv, s_vec_ops))
+            else:
+                fout.write(self.__result_body_fmt.format(path.basename(self.__geo_file).split('.')[0],
+                    str(reduce(mul, map(int, param['proc_by_dim'].split(':')), 1)),
+                    param['nsteps'], param['cm_solver_pre_comp_type'],
+                    param['cm_solver_q_err'],
+                    param['reneighbor'],
+                    param['cm_solver_pre_comp_sai_thres'],
+                    float('nan'), float('nan'), float('nan'), float('nan'),
+                    float('nan'), float('nan'), float('nan'), float('nan'),
+                    float('nan'), float('nan'), float('nan'), float('nan'),
+                    float('nan'), float('nan'), float('nan'), float('nan'),
+                    float('nan'), float('nan')))
+
+                print('[WARNING] nsteps not correct in file {0} (nsteps = {1:d}, counted steps = {2:d}).'.format(
+                    log_file, int(param['nsteps']), max(line_cnt - 3, 0)))
+            fout.flush()
+        elif run_type == 'mpi+gpu':
+            #TODO
+            pass
 
     def parse_results(self, run_type):
         from itertools import product
@@ -425,7 +549,7 @@ restart_freq            0                       ! 0: do not output any restart f
                 param_dict = dict((k, v) for (k, v) in zip(self.__param_names, p))
                 param_dict['name'] = self._create_output_file_base(run_type, param_dict)
 
-                self._process_result(fout, param_dict, self.__min_step, self.__max_step)
+                self._process_result(fout, param_dict, self.__min_step, self.__max_step, run_type)
 
     def _build_slurm_script(self, binary, run_type, mpi_cmd, param_values):
         from os import path
@@ -507,10 +631,16 @@ python3 {0}/tools/run_sim.py run_md \\
                 cmd_args = ['qsub']
 
             proc_handle = Popen(cmd_args, stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
-            stdout, stderr = proc_handle.communicate(job_script)
+            try:
+                stdout, stderr = proc_handle.communicate(job_script)
+            except KeyboardInterrupt:
+                # Slurm jobs require sending 2 SIGINT signals within 1 second to exit,
+                # so instead use a bigger hammer (SIGKILL)
+                proc_handle.kill()
+                raise
 
             if proc_handle.returncode < 0:
-                print("WARNING: process terminated with code {0}".format(proc_handle.returncode))
+                print("[WARNING] process terminated with code {0}".format(proc_handle.returncode))
                 print('stdout:\n{0}'.format(stdout), end='')
                 print('stderr:\n{0}'.format(stderr), end='')
             else:
@@ -571,6 +701,8 @@ if __name__ == '__main__':
                 help='Force field parameter file used for the MD simulation.')
         run_md_custom_parser.set_defaults(func=run_md_custom)
 
+        parse_results_parser.add_argument('-b', '--binary', metavar='binary', default=None, nargs=1,
+                help='Binary file to run.')
         parse_results_parser.add_argument('-f', '--out_file', metavar='out_file', default=None, nargs=1,
                 help='Output file to write results.')
         parse_results_parser.add_argument('-p', '--params', metavar='params', action='append', default=None, nargs=2,
@@ -581,8 +713,10 @@ if __name__ == '__main__':
                 help='Maxiumum simulation step for aggregating results.')
         parse_results_parser.add_argument('run_type', nargs=1,
                 choices=RUN_TYPES, help='Run type for the MD simulation(s).')
-        parse_results_parser.add_argument('data_sets', nargs='+',
-                choices=DATA_SETS, help='Data set(s) for which to parse MD simulation results.')
+        parse_results_parser.add_argument('geo_file', nargs=1,
+                help='Geometry file used for the MD simulation.')
+        parse_results_parser.add_argument('ffield_file', nargs=1,
+                help='Force field parameter file used for the MD simulation.')
         parse_results_parser.set_defaults(func=parse_results)
 
         submit_jobs_parser.add_argument('-b', '--binary', metavar='binary', default=None, nargs=1,
@@ -837,13 +971,52 @@ if __name__ == '__main__':
         test_case.run_md(binary, args.run_type[0], args.mpi_cmd[0].split(':'))
 
     def parse_results(args):
-        header_fmt_str = '{:15}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:10}|{:10}|{:10}|{:10}|{:10}|{:3}|{:10}\n'
-        header_str = ['Data Set', 'Steps', 'CM', 'Solvr', 'Q Tol', 'QDS', 'PreCT', 'PreCD', 'PreCS', 'PCSAI', 'PreAT', 'PreAJ', 'Pre Comp',
-                'Pre App', 'Iters', 'SpMV', 'CM', 'Thd', 'Time (s)']
-        body_fmt_str = '{:15} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:3} {:10.3f}\n'
+        if args.run_type[0] == 'serial' or args.run_type[0] == 'openmp':
+            header_fmt_str = '{:15}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:5}|{:10}|{:10}|{:10}|{:10}|{:10}|{:3}|{:10}\n'
+            header_str = ['Data Set', 'Steps', 'CM', 'Solvr', 'Q Tol', 'QDS', 'PreCT', 'PreCD', 'PreCS', 'PCSAI', 'PreAT', 'PreAJ', 'Pre Comp',
+                    'Pre App', 'Iters', 'SpMV', 'CM', 'Thd', 'Time (s)']
+            body_fmt_str = '{:15} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:5} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:3} {:10.3f}\n'
+        elif args.run_type[0] == 'mpi':
+            header_fmt_str = '{:15} {:5} {:5} {:5} {:5} {:5} {:5} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10} {:10}\n'
+            header_str = ['Data_Set', 'Proc', 'Steps', 'PreCt', 'Q_Tol', 'Ren', 'PCSAI',
+                    'total', 'comm', 'neighbors', 'init', 'init_dist', 'init_cm', 'init_bond',
+                    'bonded', 'nonbonded', 'cm', 'cm_sort',
+                    's_iters', 'pre_comm', 'pre_app', 's_comm', 's_allr', 's_spmv', 's_vec_ops']
+            body_fmt_str = '{:15} {:5} {:5} {:5} {:5} {:5} {:5} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f} {:10.3f}\n'
+        elif args.run_type[0] == 'mpi+gpu':
+            #TODO
+            pass
 
-        base_dir = getcwd()
-        data_dir, control_params_dict = setup_defaults(base_dir)
+        if args.binary:
+            binary = args.binary[0]
+            # remove executable and back up two directory levels
+            base_dir = path.dirname(path.dirname(path.dirname(path.abspath(binary))))
+        else:
+            base_dir = getcwd()
+
+        _, control_params_dict = setup_defaults(base_dir)
+
+        # overwrite default control file parameter values if supplied via command line args
+        if args.params:
+            for param in args.params:
+                if param[0] in control_params_dict:
+                    control_params_dict[param[0]] = param[1].split(',')
+                else:
+                    print("ERROR: Invalid parameter {0}. Terminating...".format(param[0]))
+                    exit(-1)
+
+        geo_base, geo_ext = path.splitext(args.geo_file[0])
+        if 'geo_format' in control_params_dict:
+            geo_format = control_params_dict['geo_format']
+        # infer geometry file format by file extension
+        else:
+            if geo_ext.lower() == '.pdb':
+                geo_format = ['1']
+            elif geo_ext.lower() == '.geo':
+                geo_format = ['0']
+            else:
+                print("ERROR: unrecognized geometry format {0}. Terminating...".format(ext))
+                exit(-1)
 
         if args.out_file:
             result_file = args.out_file[0]
@@ -860,12 +1033,12 @@ if __name__ == '__main__':
         else:
             max_step = None
 
-        test_cases = setup_test_cases(args.data_sets, data_dir, control_params_dict,
-                header_fmt_str=header_fmt_str, header_str=header_str, body_fmt_str=body_fmt_str,
+        test_case = TestCase(geo_base, args.geo_file[0], args.ffield_file[0],
+                params=control_params_dict, geo_format=geo_format,
+                result_header_fmt=header_fmt_str, result_header=header_str, result_body_fmt=body_fmt_str,
                 result_file=result_file, min_step=min_step, max_step=max_step)
 
-        for test in test_cases:
-            test.parse_results(args.run_type[0])
+        test_case.parse_results(args.run_type[0])
 
     def submit_jobs(args):
         if args.binary:
-- 
GitLab