From 884f09cbde3882de8e7db3bd90d856f499a0e706 Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Thu, 29 Aug 2019 14:06:50 +0200 Subject: move urlretrieve report hook to util.py and use archive in dj_trace --- TODO | 7 ++--- src/benchmarks/dj_trace.py | 69 ++++++++++++++-------------------------------- src/benchmarks/lld.py | 13 ++------- src/util.py | 11 ++++++++ 4 files changed, 36 insertions(+), 64 deletions(-) diff --git a/TODO b/TODO index 22537c1..b0da8f9 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,5 @@ - check dependencies - log facilities -- dj_traces as archive from our servers, check checksum! - implement requirements for allocators @@ -8,7 +7,7 @@ - merge measurements if facts match -- Use https://www4.cs.fau.de/~flow/allocbench/dj_workloads.tar.xz -- verify (commit id, checksum) remote sources +- Implement external sources store + - verify (commit id, checksum) remote sources + - git recursive clone -- git recursive clone diff --git a/src/benchmarks/dj_trace.py b/src/benchmarks/dj_trace.py index ee2f161..e74df64 100644 --- a/src/benchmarks/dj_trace.py +++ b/src/benchmarks/dj_trace.py @@ -18,6 +18,7 @@ """Benchmark definition using the traces collected by DJ Delorie""" import os +import subprocess import sys import re from urllib.request import urlretrieve @@ -25,7 +26,7 @@ import matplotlib.pyplot as plt import numpy as np from src.benchmark import Benchmark -from src.util import print_status +from src.util import print_status, download_reporthook COMMA_SEP_NUMBER_RE = "(?:\\d*(?:,\\d*)?)*" @@ -104,61 +105,31 @@ class BenchmarkDJTrace(Benchmark): def prepare(self): super().prepare() - def reporthook(blocknum, blocksize, totalsize): - readsofar = blocknum * blocksize - if totalsize > 0: - percent = readsofar * 1e2 / totalsize - status = "\r%5.1f%% %*d / %d" % ( - percent, len(str(totalsize)), readsofar, totalsize) - sys.stderr.write(status) - else: # total size is unknown - sys.stderr.write(f"\rdownloaded {readsofar}") - - if not os.path.isdir("dj_workloads"): - os.mkdir("dj_workloads") - - download_all = None - wl_sizes = {"dj": "14M", "oocalc": "65M", "mt_test_one_alloc": "5.7M", - "proprietary-1": "2.8G", "qemu-virtio": "34M", - "proprietary-2": "92M", "qemu-win7": "23M", - "389-ds-2": "3.4G", "dj2": "294M"} - - for workload in self.args["workload"]: - file_name = workload + ".wl" - file_path = os.path.join("dj_workloads", file_name) - if not os.path.isfile(file_path): - if download_all is None: - choice = input(("Download all missing workloads" - " (upto 6.7GB) [Y/n/x] ")) - if choice == "x": - break - else: - download_all = choice in ['', 'Y', 'y'] - - if not download_all: - choice = input(f"want to download {workload} ({wl_sizes[workload]}) [Y/n] ") - if choice not in ['', 'Y', 'y']: - continue + workload_dir = "dj_workloads" + workload_archive = f"{workload_dir}.tar.xz" - else: - print_status(f"downloading {workload} ({wl_sizes[workload]}) ...") + if not os.path.isdir(workload_dir): + if not os.path.isfile(workload_archive): + choice = input("Download missing workloads (367M / ~6GB unpacked) [Y/n] ") + if not choice in ['', 'Y', 'y']: + return False - url = "http://www.delorie.com/malloc/" + file_name - urlretrieve(url, file_path, reporthook) + url = f"https://www4.cs.fau.de/~flow/allocbench/{workload_archive}" + urlretrieve(url, workload_archive, download_reporthook) sys.stderr.write("\n") - available_workloads = [] - for workload in self.args["workload"]: - file_name = workload + ".wl" - file_path = os.path.join("dj_workloads", file_name) - if os.path.isfile(file_path): - available_workloads.append(workload) + # Extract workloads + proc = subprocess.run(["tar", "xf", workload_archive], stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + + # delete archive + if proc.returncode == 0: + os.remove(workload_archive) + + self.args["workload"] = os.listdir(workload_) - if available_workloads: - self.args["workload"] = available_workloads return True - return False @staticmethod def process_output(result, stdout, stderr, allocator, perm): diff --git a/src/benchmarks/lld.py b/src/benchmarks/lld.py index c5c802b..bcedced 100644 --- a/src/benchmarks/lld.py +++ b/src/benchmarks/lld.py @@ -25,6 +25,7 @@ import sys import matplotlib.pyplot as plt from src.benchmark import Benchmark +from src.util import download_reporthook class BenchmarkLld(Benchmark): @@ -50,16 +51,6 @@ class BenchmarkLld(Benchmark): def prepare(self): super().prepare() - def reporthook(blocknum, blocksize, totalsize): - readsofar = blocknum * blocksize - if totalsize > 0: - percent = readsofar * 1e2 / totalsize - status = "\r%5.1f%% %*d / %d" % ( - percent, len(str(totalsize)), readsofar, totalsize) - sys.stderr.write(status) - else: # total size is unknown - sys.stderr.write(f"\rdownloaded {readsofar}") - test_dir = "lld-speed-test" test_archive = f"{test_dir}.tar.xz" if not os.path.isdir(test_dir): @@ -69,7 +60,7 @@ class BenchmarkLld(Benchmark): return False url = f"https://s3-us-west-2.amazonaws.com/linker-tests/{test_archive}" - urlretrieve(url, test_archive, reporthook) + urlretrieve(url, test_archive, download_reporthook) sys.stderr.write("\n") # Extract tests diff --git a/src/util.py b/src/util.py index aebdb00..9941d6f 100644 --- a/src/util.py +++ b/src/util.py @@ -23,6 +23,17 @@ import sys import src.globalvars +def download_reporthook(blocknum, blocksize, totalsize): + """Status report hook for urlretrieve""" + readsofar = blocknum * blocksize + if totalsize > 0: + percent = readsofar * 100 / totalsize + status = "\r%5.1f%% %*d / %d" % ( + percent, len(str(totalsize)), readsofar, totalsize) + sys.stderr.write(status) + else: # total size is unknown + sys.stderr.write(f"\rdownloaded {readsofar}") + def is_exe(fpath): """Check if the given path is an exexutable file""" -- cgit v1.2.3