make the whole project more python idiomatic

* rename src directory to allocbench * make global variable names UPPERCASE * format a lot of code using yapf * use lowercase ld_preload and ld_library_path as Allocator members * name expected Errors 'err' and don't raise a new Exception * disable some pylint messages
author: Florian Fischer <florian.fl.fischer@fau.de> 2020-05-06 16:56:32 +0200
committer: Florian Fischer <florian.fl.fischer@fau.de> 2020-06-02 11:18:47 +0200
commit: 8174a918ea3b7cb216bf7ea98cfdc10661b5c37d (patch)
tree: 0747ec3ccb9f8d7eeccfac35977fc17855ca3bbb /src/benchmarks/dj_trace.py
parent: 8f52e8fc02dd235582f5961941bcd564e9a681cd (diff)
download: allocbench-8174a918ea3b7cb216bf7ea98cfdc10661b5c37d.tar.gz
allocbench-8174a918ea3b7cb216bf7ea98cfdc10661b5c37d.zip
1 files changed, 0 insertions, 445 deletions
diff --git a/src/benchmarks/dj_trace.py b/src/benchmarks/dj_trace.py
deleted file mode 100644
index 22398e6..0000000
--- a/src/benchmarks/dj_trace.py
+++ /dev/null
@@ -1,445 +0,0 @@
-# Copyright 2018-2019 Florian Fischer <florian.fl.fischer@fau.de>
-#
-# This file is part of allocbench.
-#
-# allocbench is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# allocbench is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with allocbench.  If not, see <http://www.gnu.org/licenses/>.
-"""Benchmark definition using the traces collected by DJ Delorie"""
-
-import os
-import subprocess
-import sys
-import re
-from urllib.request import urlretrieve
-import matplotlib.pyplot as plt
-import numpy as np
-
-from src.artifact import ArchiveArtifact
-from src.benchmark import Benchmark
-from src.globalvars import summary_file_ext
-import src.plots as abplt
-from src.util import print_status
-
-COMMA_SEP_NUMBER_RE = "(?:\\d*(?:,\\d*)?)*"
-RSS_RE = f"(?P<rss>{COMMA_SEP_NUMBER_RE})"
-TIME_RE = f"(?P<time>{COMMA_SEP_NUMBER_RE})"
-
-CYCLES_RE = re.compile(f"^{TIME_RE} cycles$")
-CPU_TIME_RE = re.compile(f"^{TIME_RE} usec across.*threads$")
-
-MAX_RSS_RE = re.compile(f"^{RSS_RE} Kb Max RSS")
-IDEAL_RSS_RE = re.compile(f"^{RSS_RE} Kb Max Ideal RSS")
-
-MALLOC_RE = re.compile(f"^Avg malloc time:\\s*{TIME_RE} in.*calls$")
-CALLOC_RE = re.compile(f"^Avg calloc time:\\s*{TIME_RE} in.*calls$")
-REALLOC_RE = re.compile(f"^Avg realloc time:\\s*{TIME_RE} in.*calls$")
-FREE_RE = re.compile(f"^Avg free time:\\s*{TIME_RE} in.*calls$")
-
-
-class BenchmarkDJTrace(Benchmark):
-    """DJ Trace Benchmark
-
-    This benchmark uses the workload simulator written by DJ Delorie to
-    simulate workloads provided by him under https://delorie.com/malloc. Those
-    workloads are generated from traces of real aplications and are also used
-    by delorie to measure improvements in the glibc allocator.
-    """
-    def __init__(self):
-        name = "dj_trace"
-
-        self.cmd = "trace_run{binary_suffix} {workload_dir}/dj_workloads/{workload}.wl"
-        self.measure_cmd = ""
-
-        self.args = {
-            "workload": [
-                "389-ds-2", "dj", "dj2", "mt_test_one_alloc", "oocalc",
-                "qemu-virtio", "qemu-win7", "proprietary-1", "proprietary-2"
-            ]
-        }
-
-        self.results = {
-            "389-ds-2": {
-                "malloc": 170500018,
-                "calloc": 161787184,
-                "realloc": 404134,
-                "free": 314856324,
-                "threads": 41
-            },
-            "dj": {
-                "malloc": 2000000,
-                "calloc": 200,
-                "realloc": 0,
-                "free": 2003140,
-                "threads": 201
-            },
-            "dj2": {
-                "malloc": 29263321,
-                "calloc": 3798404,
-                "realloc": 122956,
-                "free": 32709054,
-                "threads": 36
-            },
-            "mt_test_one_alloc": {
-                "malloc": 524290,
-                "calloc": 1,
-                "realloc": 0,
-                "free": 594788,
-                "threads": 2
-            },
-            "oocalc": {
-                "malloc": 6731734,
-                "calloc": 38421,
-                "realloc": 14108,
-                "free": 6826686,
-                "threads": 88
-            },
-            "qemu-virtio": {
-                "malloc": 1772163,
-                "calloc": 146634,
-                "realloc": 59813,
-                "free": 1954732,
-                "threads": 3
-            },
-            "qemu-win7": {
-                "malloc": 980904,
-                "calloc": 225420,
-                "realloc": 89880,
-                "free": 1347825,
-                "threads": 6
-            },
-            "proprietary-1": {
-                "malloc": 316032131,
-                "calloc": 5642,
-                "realloc": 84,
-                "free": 319919727,
-                "threads": 20
-            },
-            "proprietary-2": {
-                "malloc": 9753948,
-                "calloc": 4693,
-                "realloc": 117,
-                "free": 10099261,
-                "threads": 19
-            }
-        }
-
-        self.requirements = ["trace_run"]
-        super().__init__(name)
-
-    def prepare(self):
-        super().prepare()
-
-        workloads = ArchiveArtifact(
-            "dj_workloads",
-            "https://www4.cs.fau.de/~flow/allocbench/dj_workloads.tar.xz",
-            "tar", "c9bc499eeba8023bca28a755fffbaf9200a335ad")
-
-        self.workload_dir = workloads.provide()
-
-    @staticmethod
-    def process_output(result, stdout, stderr, allocator, perm):
-        def to_int(string):
-            return int(string.replace(',', ""))
-
-        regexs = {7: MALLOC_RE, 8: CALLOC_RE, 9: REALLOC_RE, 10: FREE_RE}
-        functions = {7: "malloc", 8: "calloc", 9: "realloc", 10: "free"}
-        for i, line in enumerate(stdout.splitlines()):
-            if i == 0:
-                result["cycles"] = to_int(CYCLES_RE.match(line).group("time"))
-            elif i == 2:
-                result["cputime"] = to_int(
-                    CPU_TIME_RE.match(line).group("time"))
-            elif i == 3:
-                result["Max_RSS"] = to_int(MAX_RSS_RE.match(line).group("rss"))
-            elif i == 4:
-                result["Ideal_RSS"] = to_int(
-                    IDEAL_RSS_RE.match(line).group("rss"))
-            elif i in [7, 8, 9, 10]:
-                res = regexs[i].match(line)
-                fname = functions[i]
-                result["avg_" + fname] = to_int(res.group("time"))
-
-    def summary(self):
-        args = self.results["args"]
-        allocators = self.results["allocators"]
-
-        abplt.plot(self,
-                   "{cputime}/1000",
-                   plot_type='bar',
-                   fig_options={
-                       'ylabel': "time in ms",
-                       'title': "total runtime",
-                   },
-                   file_postfix="runtime")
-
-        # Function Times
-        func_times_means = {allocator: {} for allocator in allocators}
-        xa = np.arange(0, 6, 1.5)
-        for perm in self.iterate_args(args=args):
-            for i, allocator in enumerate(allocators):
-                x_vals = [x + i / len(allocators) for x in xa]
-
-                func_times_means[allocator][perm] = [0, 0, 0, 0]
-
-                func_times_means[allocator][perm][0] = np.mean(
-                    [x["avg_malloc"] for x in self.results[allocator][perm]])
-                func_times_means[allocator][perm][1] = np.mean(
-                    [x["avg_calloc"] for x in self.results[allocator][perm]])
-                func_times_means[allocator][perm][2] = np.mean(
-                    [x["avg_realloc"] for x in self.results[allocator][perm]])
-                func_times_means[allocator][perm][3] = np.mean(
-                    [x["avg_free"] for x in self.results[allocator][perm]])
-
-                plt.bar(x_vals,
-                        func_times_means[allocator][perm],
-                        width=0.25,
-                        align="center",
-                        label=allocator,
-                        color=allocators[allocator]["color"])
-
-            plt.legend(loc="best")
-            plt.xticks(xa + 1 / len(allocators) * 2, [
-                "malloc\n" + str(self.results[perm.workload]["malloc"]) +
-                "\ncalls", "calloc\n" +
-                str(self.results[perm.workload]["calloc"]) + "\ncalls",
-                "realloc\n" + str(self.results[perm.workload]["realloc"]) +
-                "\ncalls",
-                "free\n" + str(self.results[perm.workload]["free"]) + "\ncalls"
-            ])
-            plt.ylabel("cycles")
-            plt.title(f"Avg. runtime of API functions {perm.workload}")
-            plt.savefig(".".join(
-                [self.name, perm.workload, "apitimes", summary_file_ext]))
-            plt.clf()
-
-        # Memusage
-        # hack ideal rss in data set
-        allocators["Ideal_RSS"] = {"color": "xkcd:gold"}
-        self.results["stats"]["Ideal_RSS"] = {}
-        for perm in self.iterate_args(args=args):
-            ideal_rss = self.results[list(
-                allocators.keys())[0]][perm][0]["Ideal_RSS"] / 1000
-            self.results["stats"]["Ideal_RSS"][perm] = {
-                "mean": {
-                    "Max_RSS": ideal_rss
-                },
-                "std": {
-                    "Max_RSS": 0
-                }
-            }
-
-        abplt.plot(self,
-                   "{Max_RSS}/1000",
-                   plot_type='bar',
-                   fig_options={
-                       'ylabel': "Max RSS in MB",
-                       'title': "Max RSS (VmHWM)",
-                   },
-                   file_postfix="newrss")
-
-        # self.barplot_fixed_arg("{Max_RSS}/1000",
-        # ylabel='"Max RSS in MB"',
-        # title='"Highwatermark of Vm (VmHWM)"',
-        # file_postfix="newrss")
-
-        del allocators["Ideal_RSS"]
-        del self.results["stats"]["Ideal_RSS"]
-
-        rss_means = {allocator: {} for allocator in allocators}
-        for perm in self.iterate_args(args=args):
-            for i, allocator in enumerate(allocators):
-                d = [x["Max_RSS"] for x in self.results[allocator][perm]]
-                # data is in kB
-                rss_means[allocator][perm] = np.mean(d) / 1000
-
-                plt.bar([i],
-                        rss_means[allocator][perm],
-                        label=allocator,
-                        color=allocators[allocator]["color"])
-
-            # add ideal rss
-            y_val = self.results[list(
-                allocators.keys())[0]][perm][0]["Ideal_RSS"] / 1000
-            plt.bar([len(allocators)], y_val, label="Ideal RSS")
-
-            plt.legend(loc="best")
-            plt.ylabel("Max RSS in MB")
-            plt.title(f"Maximal RSS (VmHWM) {perm.workload}")
-            plt.savefig(".".join(
-                [self.name, perm.workload, "rss", summary_file_ext]))
-            plt.clf()
-
-        abplt.export_stats_to_csv(self, "Max_RSS")
-        abplt.export_stats_to_csv(self, "cputime")
-
-        abplt.export_stats_to_dataref(self, "Max_RSS")
-        abplt.export_stats_to_dataref(self, "cputime")
-
-        # Big table
-        abplt.write_tex_table(self, [{
-            "label": "Runtime [ms]",
-            "expression": "{cputime}/1000",
-            "sort": "<"
-        }, {
-            "label": "Max RSS [MB]",
-            "expression": "{Max_RSS}/1000",
-            "sort": "<"
-        }],
-                              file_postfix="table")
-
-        # Tables
-        for perm in self.iterate_args(args=args):
-            # collect data
-            d = {allocator: {} for allocator in allocators}
-            for i, allocator in enumerate(allocators):
-                d[allocator]["time"] = [
-                    x["cputime"] for x in self.results[allocator][perm]
-                ]
-                d[allocator]["rss"] = [
-                    x["Max_RSS"] for x in self.results[allocator][perm]
-                ]
-
-            times = {
-                allocator: np.mean(d[allocator]["time"])
-                for allocator in allocators
-            }
-            tmin = min(times.values())
-            tmax = max(times.values())
-
-            rss = {
-                allocator: np.mean(d[allocator]["rss"])
-                for allocator in allocators
-            }
-            rssmin = min(rss.values())
-            rssmax = max(rss.values())
-
-            fname = ".".join([self.name, perm.workload, "table.tex"])
-            with open(fname, "w") as f:
-                print("\\documentclass{standalone}", file=f)
-                print("\\usepackage{xcolor}", file=f)
-                print("\\begin{document}", file=f)
-                print("\\begin{tabular}{| l | l | l |}", file=f)
-                print(
-                    "& Zeit (ms) / $\\sigma$ (\\%) & VmHWM (KB) / $\\sigma$ (\\%) \\\\",
-                    file=f)
-                print("\\hline", file=f)
-
-                for allocator in allocators:
-                    print(allocator.replace("_", "\\_"), end=" & ", file=f)
-
-                    s = "\\textcolor{{{}}}{{{:.2f}}} / {:.4f}"
-
-                    t = d[allocator]["time"]
-                    m = times[allocator]
-                    if m == tmin:
-                        color = "green"
-                    elif m == tmax:
-                        color = "red"
-                    else:
-                        color = "black"
-                    print(s.format(color, m, np.std(t) / m), end=" & ", file=f)
-
-                    t = d[allocator]["rss"]
-                    m = rss[allocator]
-                    if m == rssmin:
-                        color = "green"
-                    elif m == rssmax:
-                        color = "red"
-                    else:
-                        color = "black"
-                    print(s.format(color, m,
-                                   np.std(t) / m if m else 0),
-                          "\\\\",
-                          file=f)
-
-                print("\\end{tabular}", file=f)
-                print("\\end{document}", file=f)
-
-        # Create summary similar to DJ's at
-        # https://sourceware.org/ml/libc-alpha/2017-01/msg00452.html
-        with open(self.name + "_plain.txt", "w") as f:
-            # Absolutes
-            fmt = "{:<20} {:>15} {:>7} {:>7} {:>7} {:>7} {:>7}"
-            for i, allocator in enumerate(allocators):
-                print("{0} {1} {0}".format("-" * 10, allocator), file=f)
-                print(fmt.format("Workload", "Total", "malloc", "calloc",
-                                 "realloc", "free", "RSS"),
-                      file=f)
-
-                for perm in self.iterate_args(args=args):
-                    cycles = abplt._get_y_data(self, "{cycles}", allocator, perm)[0]
-                    times = [t for t in func_times_means[allocator][perm]]
-                    rss = rss_means[allocator][perm]
-                    print(fmt.format(perm.workload, cycles, times[0], times[1],
-                                     times[2], times[3], rss),
-                          file=f)
-
-                print(file=f)
-
-            # Changes. First allocator in allocators is the reference
-            fmt_changes = "{:<20} {:>14.0f}% {:>6.0f}% {:>6.0f}% {:>6.0f}% {:>6.0f}% {:>6.0f}%"
-            for allocator in list(allocators)[1:]:
-                print("{0} Changes {1} {0}".format("-" * 10, allocator),
-                      file=f)
-                print(fmt.format("Workload", "Total", "malloc", "calloc",
-                                 "realloc", "free", "RSS"),
-                      file=f)
-
-                ref_alloc = list(allocators)[0]
-                cycles_change_means = []
-                times_change_means = []
-                rss_change_means = []
-                for perm in self.iterate_args(args=args):
-
-                    normal_cycles = cycles_means[ref_alloc][perm]
-                    if normal_cycles:
-                        cycles = np.round(cycles_means[allocator][perm] /
-                                          normal_cycles * 100)
-                    else:
-                        cycles = 0
-                    cycles_change_means.append(cycles)
-
-                    normal_times = func_times_means[ref_alloc][perm]
-                    times = [0, 0, 0, 0]
-                    for i in range(0, len(times)):
-                        t = func_times_means[allocator][perm][i]
-                        nt = normal_times[i]
-                        if nt != 0:
-                            times[i] = np.round(t / nt * 100)
-                    times_change_means.append(times)
-
-                    normal_rss = rss_means[ref_alloc][perm]
-                    if normal_rss:
-                        rss = np.round(rss_means[allocator][perm] /
-                                       normal_rss * 100)
-                    else:
-                        rss = 0
-                    rss_change_means.append(rss)
-
-                    print(fmt_changes.format(perm.workload, cycles, times[0],
-                                             times[1], times[2], times[3],
-                                             rss),
-                          file=f)
-                print(file=f)
-                tmeans = [0, 0, 0, 0]
-                for i in range(0, len(times)):
-                    tmeans[i] = np.mean(
-                        [times[i] for times in times_change_means])
-                print(fmt_changes.format("Mean:", np.mean(cycles_change_means),
-                                         tmeans[0], tmeans[1], tmeans[2],
-                                         tmeans[3], np.mean(rss_change_means)),
-                      '\n',
-                      file=f)
-
-
-dj_trace = BenchmarkDJTrace()
author	Florian Fischer <florian.fl.fischer@fau.de>	2020-05-06 16:56:32 +0200
committer	Florian Fischer <florian.fl.fischer@fau.de>	2020-06-02 11:18:47 +0200
commit	8174a918ea3b7cb216bf7ea98cfdc10661b5c37d (patch)
tree	0747ec3ccb9f8d7eeccfac35977fc17855ca3bbb /src/benchmarks/dj_trace.py
parent	8f52e8fc02dd235582f5961941bcd564e9a681cd (diff)
download	allocbench-8174a918ea3b7cb216bf7ea98cfdc10661b5c37d.tar.gz allocbench-8174a918ea3b7cb216bf7ea98cfdc10661b5c37d.zip