benchmark.py: improve overall code quality

author: Florian Fischer <florian.fl.fischer@fau.de> 2020-05-10 09:51:18 +0200
committer: Florian Fischer <florian.fl.fischer@fau.de> 2020-06-02 11:18:47 +0200
commit: 40f2ae0fbfca831aae3b5996f08ca243d5b4d38e (patch)
tree: 54a39026bd9ee618e66e57db87ffbb8e171c7558
parent: 83c03e43465ac0a88a7775ed088a2e674e433cf8 (diff)
download: allocbench-40f2ae0fbfca831aae3b5996f08ca243d5b4d38e.tar.gz
allocbench-40f2ae0fbfca831aae3b5996f08ca243d5b4d38e.zip
1 files changed, 139 insertions, 107 deletions
diff --git a/allocbench/benchmark.py b/allocbench/benchmark.py
index b08118a..2a1e3f2 100644
--- a/allocbench/benchmark.py
+++ b/allocbench/benchmark.py
@@ -22,6 +22,7 @@ import errno
 import copy
 import csv
 import itertools
+import json
 import multiprocessing
 import os
 import subprocess
@@ -34,7 +35,7 @@ import allocbench.facter as facter
 import allocbench.globalvars as globalvars
 from allocbench.util import print_status, print_error, print_warn
 from allocbench.util import print_info0, print_info, print_debug
-from allocbench.util import find_cmd, prefix_cmd_with_abspath
+from allocbench.util import find_cmd, prefix_cmd_with_abspath, run_cmd
 
 
 class Benchmark:
@@ -99,21 +100,64 @@ class Benchmark:
         """raise an exception if perf is not allowed on this system"""
         if Benchmark.perf_allowed is None:
             print_info("Check if you are allowed to use perf ...")
-            res = subprocess.run(["perf", "stat", "ls"],
-                                 stdout=subprocess.PIPE,
-                                 stderr=subprocess.PIPE,
-                                 universal_newlines=True)
-
-            if res.returncode != 0:
-                print_error(f"Test perf run failed with exit status: {res.returncode}")
-                print_debug(res.stderr)
-                Benchmark.perf_allowed = False
-            else:
+            try:
+                run_cmd(["perf", "stat", "ls"], capture=True)
                 Benchmark.perf_allowed = True
+            except subprocess.CalledProcessError as err:
+                print_error(f"Test perf run failed with exit status: {err.returncode}")
+                print_debug(err.stderr)
+                Benchmark.perf_allowed = False
 
         if not Benchmark.perf_allowed:
             raise Exception("You don't have the needed permissions to use perf")
 
+    @staticmethod
+    def save_values_from_proc_status(result, keys, status_file="status", status_content=None, key_prefix=""):
+        """Parse a /proc/status file or its content and extract requested keys from it"""
+        assert(status_file or status_content)
+
+        if status_content is None:
+            if hasattr(status_file, "read"):
+                status_content = status_file.read()
+            else:
+                with open(status_file, "r") as opened_status_file:
+                    status_content = opened_status_file.read()
+
+        for line in status_content.splitlines():
+            key, value = line.split(':')
+            value = value.replace("kB", "")
+            value = value.strip()
+
+            if key in keys:
+                result[f"{key_prefix}{key}"] = value
+
+    @staticmethod
+    def save_server_status_and_values(result, server, keys):
+        """Read, save and extract values from a server process /proc/status file
+
+        The whole status is stored in result with the key {server.name}_status
+        and every extracted key in keys is stored as {server.name}_{key}.
+        """
+        with open(f"/proc/{server['popen'].pid}/status", "r") as status_file:
+            server_name = server.get('name', 'Server')
+            server_status = status_file.read()
+            result[f"{server_name}_status"] = server_status
+
+            Benchmark.save_values_from_proc_status(result, keys, status_content=server_status)
+
+    @staticmethod
+    def parse_and_save_perf_output(result, output, alloc_name, perm):
+        """Parse and store csv output from perf -x,"""
+        csvreader = csv.reader(output.splitlines(), delimiter=',')
+        for row in csvreader:
+            try:
+                # Split of the user/kernel space info to be better portable
+                datapoint = row[2].split(":")[0]
+                value = row[0]
+                result[datapoint] = value
+            except IndexError as err:
+                print_warn(f"Exception {err} occured on {row} for {alloc_name} and {perm}")
+
     def __str__(self):
         return self.name
 
@@ -151,6 +195,8 @@ class Benchmark:
 
         if not hasattr(self, "requirements"):
             self.requirements = []
+        else:
+            self.check_requirements()
 
         print_debug("Creating benchmark", self.name)
         print_debug("Cmd:", self.cmd)
@@ -160,9 +206,9 @@ class Benchmark:
         print_debug("Results dictionary:", self.results)
         print_debug("Results directory:", self.result_dir)
 
+
     def save(self, path=None):
         """Save benchmark results to a json file"""
-        import json
         if not path:
             path = self.name + ".json"
         elif os.path.isdir(path):
@@ -193,8 +239,8 @@ class Benchmark:
             if "stats" in self.results:
                 save_data["stats"][allocator] = stats
 
-        with open(path, "w") as f:
-            json.dump(save_data, f)
+        with open(path, "w") as save_file:
+            json.dump(save_data, save_file)
 
     def load(self, path=None):
         """Load benchmark results from file"""
@@ -207,15 +253,14 @@ class Benchmark:
                 filename = os.path.splitext(path)
 
         if os.path.exists(filename + ".json"):
-            import json
             filename += ".json"
-            with open(filename, "r") as f:
-                self.results = json.load(f)
+            with open(filename, "r") as load_file:
+                self.results = json.load(load_file)
         elif os.path.exists(filename + ".save"):
-            import pickle
+            import pickle # pylint: disable=import-outside-toplevel
             filename += ".save"
-            with open(filename, "rb") as f:
-                self.results = pickle.load(f)
+            with open(filename, "rb") as load_file:
+                self.results = pickle.load(load_file)
         else:
             raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename)
 
@@ -223,31 +268,31 @@ class Benchmark:
 
         # Build new named tuples
         for allocator in self.results["allocators"]:
-            d = {}
+            data = {}
             for perm, measures in self.results[allocator]:
-                d[self.Perm(**perm)] = measures
-            self.results[allocator] = d
+                data[self.Perm(**perm)] = measures
+            self.results[allocator] = data
 
-            d = {}
+            stats = {}
             if "stats" in self.results:
                 for perm, value in self.results["stats"][allocator]:
-                    d[self.Perm(**perm)] = value
-                self.results["stats"][allocator] = d
+                    stats[self.Perm(**perm)] = value
+                self.results["stats"][allocator] = stats
 
         # add eventual missing statistics
         if "stats" not in self.results:
             self.calc_desc_statistics()
 
-    def prepare(self):
-        """default prepare implementation raising an error if a requirement is not found"""
+    def check_requirements(self):
+        """raise an error if a requirement is not found"""
         os.environ["PATH"] += f"{os.pathsep}{globalvars.BUILDDIR}/benchmarks/{self.name}"
 
-        for r in self.requirements:
-            exe = find_cmd(r)
+        for requirement in self.requirements:
+            exe = find_cmd(requirement)
             if exe is not None:
-                self.results["facts"]["libcs"][r] = facter.libc_ver(executable=exe)
+                self.results["facts"]["libcs"][requirement] = facter.libc_ver(executable=exe)
             else:
-                raise Exception("Requirement: {} not found".format(r))
+                raise Exception("Requirement: {} not found".format(requirement))
 
     def iterate_args(self, args=None, fixed=None):
         """Iterator over each possible combination of args
@@ -291,8 +336,15 @@ class Benchmark:
             if is_fixed:
                 yield perm
 
-    def prepare_argv(self, cmd, env={}, alloc={}, substitutions={}, prepend=True):
+    def prepare_argv(self, cmd, env=None, alloc=None, substitutions=None, prepend=True):
         """Prepare an complete argv list for benchmarking"""
+        if env is None:
+            env = {}
+        if alloc is None:
+            alloc = {}
+        if substitutions is None:
+            substitutions = {}
+
         argv = []
         if prepend:
             if "cmd_prefix" in alloc:
@@ -318,21 +370,29 @@ class Benchmark:
             argv.extend(["-p", ld_preload])
 
             if "LD_LIBRARY_PATH" in env or alloc.get("LD_LIBRARY_PATH", ""):
-                argv.extend(["-l", f"{alloc.get('LD_LIBRARY_PATH', '')} {env.get('LD_LIBRARY_PATH', '')}"])
+                old_ld_lib_path = env.get('LD_LIBRARY_PATH', '')
+                ld_lib_path = alloc.get('LD_LIBRARY_PATH', '')
+                argv.extend(["-l", f"{ld_lib_path} {old_ld_lib_path}"])
 
-        cmd_argv = cmd.format(**substitutions)
-        cmd_argv = prefix_cmd_with_abspath(cmd_argv).split()
+        cmd_expanded = cmd.format(**substitutions)
+        cmd_argv = prefix_cmd_with_abspath(cmd_expanded).split()
 
         argv.extend(cmd_argv)
 
         return argv
 
-    def start_servers(self, env={}, alloc_name="None", alloc={"cmd_prefix": ""}):
+    def start_servers(self, env=None, alloc_name="None", alloc=None):
         """Start Servers
 
         Servers are not allowed to deamonize because then they can't
         be terminated using their Popen object."""
 
+        if env is None:
+            env = {}
+
+        if alloc is None:
+            alloc = {"cmd_prefix": ""}
+
         substitutions = {"alloc": alloc_name,
                          "perm": alloc_name,
                          "builddir": globalvars.BUILDDIR}
@@ -372,18 +432,12 @@ class Benchmark:
             print_info(f"Preparing {server_name}")
             for prep_cmd in server["prepare_cmds"]:
                 prep_cmd = prep_cmd.format(**substitutions)
-                print_debug(prep_cmd)
-
-                proc = subprocess.run(prep_cmd.split(), universal_newlines=True,
-                                      stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-                print_debug("Stdout:", proc.stdout)
-                print_debug("Stderr:", proc.stderr)
 
+                proc = run_cmd(prep_cmd.split(), output_verbosity=3)
 
     def shutdown_server(self, server):
         """Terminate a started server running its shutdown_cmds in advance"""
-        if server["popen"].poll() == None:
+        if server["popen"].poll() is None:
             server_name = server.get("name", "Server")
             print_info(f"Shutting down {server_name}")
 
@@ -394,21 +448,19 @@ class Benchmark:
             if "shutdown_cmds" in server:
                 for shutdown_cmd in server["shutdown_cmds"]:
                     shutdown_cmd = shutdown_cmd.format(**substitutions)
-                    print_debug(shutdown_cmd)
-
-                    proc = subprocess.run(shutdown_cmd.split(), universal_newlines=True,
-                                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
-                    print_debug("Stdout:", proc.stdout)
-                    print_debug("Stderr:", proc.stderr)
+                    run_cmd(shutdown_cmd.split(), output_verbosity=3)
 
                 # wait for server termination
                 sleep(5)
 
             outs, errs = Benchmark.terminate_subprocess(server["popen"])
         else:
-            outs = server["popen"].stdout.read()
-            errs = server["popen"].stderr.read()
+            outs, errs = "", ""
+            if not server["popen"].stdout.closed:
+                outs = server["popen"].stdout.read()
+            if not server["popen"].stderr.closed:
+                errs = server["popen"].stderr.read()
 
         server["stdout"] = outs
         server["stderr"] = errs
@@ -434,7 +486,7 @@ class Benchmark:
 
         self.results["facts"]["runs"] = runs
 
-        n = len(list(self.iterate_args())) * len(self.allocators)
+        total_executions = len(list(self.iterate_args())) * len(self.allocators)
         for run in range(1, runs + 1):
             print_status(run, ". run", sep='')
 
@@ -446,9 +498,9 @@ class Benchmark:
                 skip = False
                 try:
                     self.start_servers(alloc_name=alloc_name, alloc=alloc, env=os.environ)
-                except Exception as e:
+                except Exception as err:
                     print_debug(traceback.format_exc())
-                    print_error(e)
+                    print_error(err)
                     print_error("Skipping", alloc_name)
                     skip = True
 
@@ -469,7 +521,7 @@ class Benchmark:
                         self.results[alloc_name][perm].append({})
                         continue
 
-                    print_info0(i, "of", n, "\r", end='')
+                    print_info0(i, "of", total_executions, "\r", end='')
 
                     # Available substitutions in cmd
                     substitutions = {"run": run, "alloc": alloc_name}
@@ -494,10 +546,10 @@ class Benchmark:
                         os.chdir(run_dir)
                         print_debug("\nChange cwd to:", run_dir)
 
-                    print_debug("\nCmd:", argv)
-                    res = subprocess.run(argv, stderr=subprocess.PIPE,
-                                         stdout=subprocess.PIPE,
-                                         universal_newlines=True)
+                    try:
+                        res = run_cmd(argv, capture=True)
+                    except subprocess.CalledProcessError as err:
+                        res = err
 
                     result = {}
 
@@ -514,41 +566,20 @@ class Benchmark:
 
                     # parse and store results
                     else:
-                        if not self.servers:
+                        if self.servers:
+                            for server in self.servers:
+                                Benchmark.save_server_status_and_values(result, server, ["VmHWM"])
+                        else:
                             if os.path.isfile("status"):
                                 # Read VmHWM from status file. If our benchmark
                                 # didn't fork the first occurance of VmHWM is from
                                 # our benchmark
-                                with open("status", "r") as f:
-                                    for l in f.readlines():
-                                        if l.startswith("VmHWM:"):
-                                            result["VmHWM"] = l.split()[1]
-                                            break
-
+                                Benchmark.save_values_from_proc_status(result, ["VmHWM"])
                                 os.remove("status")
 
                             # parse perf output if available
                             if self.measure_cmd == Benchmark.measure_cmd or self.measure_cmd_csv:
-                                csvreader = csv.reader(res.stderr.splitlines(),
-                                                       delimiter=',')
-                                for row in csvreader:
-                                    # Split of the user/kernel space info to be better portable
-                                    try:
-                                        result[row[2].split(":")[0]] = row[0]
-                                    except Exception as e:
-                                        print_warn("Exception", e, "occured on", row, "for",
-                                                   alloc_name, "and", perm)
-                        else:
-                            result["server_status"] = []
-                            for server in self.servers:
-                                with open(f"/proc/{server['popen'].pid}/status", "r") as f:
-                                    server_status = f.read()
-                                    result["server_status"].append(server_status)
-
-                                    for l in server_status.splitlines():
-                                        if l.startswith("VmHWM:"):
-                                            result[f"{server.get('name', 'Server')}_vmhwm"] = l.split()[1]
-                                            break
+                                Benchmark.parse_and_save_perf_output(result, res.stderr, alloc_name, perm)
 
 
                         if hasattr(self, "process_output"):
@@ -560,6 +591,7 @@ class Benchmark:
                         if valid_result is None:
                             valid_result = result
 
+                    print_debug(f"Resulting in: {result}")
                     self.results[alloc_name][perm].append(result)
 
                     if os.getcwd() != cwd:
@@ -586,8 +618,8 @@ class Benchmark:
         if valid_result != {}:
             for allocator in self.allocators:
                 for perm in self.iterate_args():
-                    for i, m in enumerate(self.results[allocator][perm]):
-                        if m == {}:
+                    for i, measure in enumerate(self.results[allocator][perm]):
+                        if measure == {}:
                             self.results[allocator][perm][i] = {k: np.NaN for k in valid_result}
 
         self.calc_desc_statistics()
@@ -609,25 +641,25 @@ class Benchmark:
                                          "lower_quartile", "upper_quartile",
                                          "lower_whisker", "upper_whisker",
                                          "outliers"]}
-                for dp in self.results[alloc][perm][0]:
+                for key in self.results[alloc][perm][0]:
                     try:
-                        data = [float(m[dp]) for m in self.results[alloc][perm]]
-                    except (TypeError, ValueError) as e:
+                        data = [float(m[key]) for m in self.results[alloc][perm]]
+                    except (TypeError, ValueError):
                         continue
-                    stats["min"][dp] = np.min(data)
-                    stats["max"][dp] = np.max(data)
-                    stats["mean"][dp] = np.mean(data)
-                    stats["median"][dp] = np.median(data)
-                    stats["std"][dp] = np.std(data, ddof=1)
-                    stats["std_perc"][dp] = stats["std"][dp] / stats["mean"][dp]
-                    stats["lower_quartile"][dp], stats["upper_quartile"][dp] = np.percentile(data, [25, 75])
-                    trimmed_range = stats["upper_quartile"][dp] - stats["lower_quartile"][dp]
-                    stats["lower_whisker"][dp] = stats["lower_quartile"][dp] - trimmed_range
-                    stats["upper_whisker"][dp] = stats["upper_quartile"][dp] + trimmed_range
+                    stats["min"][key] = np.min(data)
+                    stats["max"][key] = np.max(data)
+                    stats["mean"][key] = np.mean(data)
+                    stats["median"][key] = np.median(data)
+                    stats["std"][key] = np.std(data, ddof=1)
+                    stats["std_perc"][key] = stats["std"][key] / stats["mean"][key]
+                    stats["lower_quartile"][key], stats["upper_quartile"][key] = np.percentile(data, [25, 75])
+                    trimmed_range = stats["upper_quartile"][key] - stats["lower_quartile"][key]
+                    stats["lower_whisker"][key] = stats["lower_quartile"][key] - trimmed_range
+                    stats["upper_whisker"][key] = stats["upper_quartile"][key] + trimmed_range
                     outliers = []
-                    for d in data:
-                        if d > stats["upper_whisker"][dp] or d < stats["lower_whisker"][dp]:
-                            outliers.append(d)
-                    stats["outliers"][dp] = outliers
+                    for value in data:
+                        if value > stats["upper_whisker"][key] or value < stats["lower_whisker"][key]:
+                            outliers.append(value)
+                    stats["outliers"][key] = outliers
 
                 self.results["stats"][alloc][perm] = stats
author	Florian Fischer <florian.fl.fischer@fau.de>	2020-05-10 09:51:18 +0200
committer	Florian Fischer <florian.fl.fischer@fau.de>	2020-06-02 11:18:47 +0200
commit	40f2ae0fbfca831aae3b5996f08ca243d5b4d38e (patch)
tree	54a39026bd9ee618e66e57db87ffbb8e171c7558
parent	83c03e43465ac0a88a7775ed088a2e674e433cf8 (diff)
download	allocbench-40f2ae0fbfca831aae3b5996f08ca243d5b4d38e.tar.gz allocbench-40f2ae0fbfca831aae3b5996f08ca243d5b4d38e.zip