From 884f09cbde3882de8e7db3bd90d856f499a0e706 Mon Sep 17 00:00:00 2001
From: Florian Fischer <florian.fl.fischer@fau.de>
Date: Thu, 29 Aug 2019 14:06:50 +0200
Subject: move urlretrieve report hook to util.py and use archive in dj_trace

---
 TODO                       |  7 ++---
 src/benchmarks/dj_trace.py | 69 ++++++++++++++--------------------------------
 src/benchmarks/lld.py      | 13 ++-------
 src/util.py                | 11 ++++++++
 4 files changed, 36 insertions(+), 64 deletions(-)

diff --git a/TODO b/TODO
index 22537c1..b0da8f9 100644
--- a/TODO
+++ b/TODO
@@ -1,6 +1,5 @@
 - check dependencies
 - log facilities
-- dj_traces as archive from our servers, check checksum!
 
 - implement requirements for allocators
 
@@ -8,7 +7,7 @@
 
 - merge measurements if facts match
 
-- Use https://www4.cs.fau.de/~flow/allocbench/dj_workloads.tar.xz
-- verify (commit id, checksum) remote sources
+- Implement external sources store
+	- verify (commit id, checksum) remote sources
+	- git recursive clone
 
-- git recursive clone
diff --git a/src/benchmarks/dj_trace.py b/src/benchmarks/dj_trace.py
index ee2f161..e74df64 100644
--- a/src/benchmarks/dj_trace.py
+++ b/src/benchmarks/dj_trace.py
@@ -18,6 +18,7 @@
 """Benchmark definition using the traces collected by DJ Delorie"""
 
 import os
+import subprocess
 import sys
 import re
 from urllib.request import urlretrieve
@@ -25,7 +26,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 
 from src.benchmark import Benchmark
-from src.util import print_status
+from src.util import print_status, download_reporthook
 
 
 COMMA_SEP_NUMBER_RE = "(?:\\d*(?:,\\d*)?)*"
@@ -104,61 +105,31 @@ class BenchmarkDJTrace(Benchmark):
     def prepare(self):
         super().prepare()
 
-        def reporthook(blocknum, blocksize, totalsize):
-            readsofar = blocknum * blocksize
-            if totalsize > 0:
-                percent = readsofar * 1e2 / totalsize
-                status = "\r%5.1f%% %*d / %d" % (
-                    percent, len(str(totalsize)), readsofar, totalsize)
-                sys.stderr.write(status)
-            else:  # total size is unknown
-                sys.stderr.write(f"\rdownloaded {readsofar}")
-
-        if not os.path.isdir("dj_workloads"):
-            os.mkdir("dj_workloads")
-
-        download_all = None
-        wl_sizes = {"dj": "14M", "oocalc": "65M", "mt_test_one_alloc": "5.7M",
-                    "proprietary-1": "2.8G", "qemu-virtio": "34M",
-                    "proprietary-2": "92M", "qemu-win7": "23M",
-                    "389-ds-2": "3.4G", "dj2": "294M"}
-
-        for workload in self.args["workload"]:
-            file_name = workload + ".wl"
-            file_path = os.path.join("dj_workloads", file_name)
-            if not os.path.isfile(file_path):
-                if download_all is None:
-                    choice = input(("Download all missing workloads"
-                                    " (upto 6.7GB) [Y/n/x] "))
-                    if choice == "x":
-                        break
-                    else:
-                        download_all = choice in ['', 'Y', 'y']
-
-                if not download_all:
-                    choice = input(f"want to download {workload} ({wl_sizes[workload]}) [Y/n] ")
-                    if choice not in ['', 'Y', 'y']:
-                        continue
+        workload_dir = "dj_workloads"
+        workload_archive = f"{workload_dir}.tar.xz"
 
-                else:
-                    print_status(f"downloading {workload} ({wl_sizes[workload]}) ...")
+        if not os.path.isdir(workload_dir):
+            if not os.path.isfile(workload_archive):
+                choice = input("Download missing workloads (367M / ~6GB unpacked) [Y/n] ")
+                if not choice in ['', 'Y', 'y']:
+                    return False
 
-                url = "http://www.delorie.com/malloc/" + file_name
-                urlretrieve(url, file_path, reporthook)
+                url = f"https://www4.cs.fau.de/~flow/allocbench/{workload_archive}"
+                urlretrieve(url, workload_archive, download_reporthook)
                 sys.stderr.write("\n")
 
-        available_workloads = []
-        for workload in self.args["workload"]:
-            file_name = workload + ".wl"
-            file_path = os.path.join("dj_workloads", file_name)
-            if os.path.isfile(file_path):
-                available_workloads.append(workload)
+            # Extract workloads
+            proc = subprocess.run(["tar", "xf", workload_archive], stdout=subprocess.PIPE,
+                                  stderr=subprocess.PIPE, universal_newlines=True)
+
+            # delete archive
+            if proc.returncode == 0:
+                os.remove(workload_archive)
+
+            self.args["workload"] = os.listdir(workload_)
 
-        if available_workloads:
-            self.args["workload"] = available_workloads
             return True
 
-        return False
 
     @staticmethod
     def process_output(result, stdout, stderr, allocator, perm):
diff --git a/src/benchmarks/lld.py b/src/benchmarks/lld.py
index c5c802b..bcedced 100644
--- a/src/benchmarks/lld.py
+++ b/src/benchmarks/lld.py
@@ -25,6 +25,7 @@ import sys
 import matplotlib.pyplot as plt
 
 from src.benchmark import Benchmark
+from src.util import download_reporthook
 
 
 class BenchmarkLld(Benchmark):
@@ -50,16 +51,6 @@ class BenchmarkLld(Benchmark):
     def prepare(self):
         super().prepare()
 
-        def reporthook(blocknum, blocksize, totalsize):
-            readsofar = blocknum * blocksize
-            if totalsize > 0:
-                percent = readsofar * 1e2 / totalsize
-                status = "\r%5.1f%% %*d / %d" % (
-                    percent, len(str(totalsize)), readsofar, totalsize)
-                sys.stderr.write(status)
-            else:  # total size is unknown
-                sys.stderr.write(f"\rdownloaded {readsofar}")
-
         test_dir = "lld-speed-test"
         test_archive = f"{test_dir}.tar.xz"
         if not os.path.isdir(test_dir):
@@ -69,7 +60,7 @@ class BenchmarkLld(Benchmark):
                     return False
 
                 url = f"https://s3-us-west-2.amazonaws.com/linker-tests/{test_archive}"
-                urlretrieve(url, test_archive, reporthook)
+                urlretrieve(url, test_archive, download_reporthook)
                 sys.stderr.write("\n")
 
             # Extract tests
diff --git a/src/util.py b/src/util.py
index aebdb00..9941d6f 100644
--- a/src/util.py
+++ b/src/util.py
@@ -23,6 +23,17 @@ import sys
 
 import src.globalvars
 
+def download_reporthook(blocknum, blocksize, totalsize):
+    """Status report hook for urlretrieve"""
+    readsofar = blocknum * blocksize
+    if totalsize > 0:
+        percent = readsofar * 100 / totalsize
+        status = "\r%5.1f%% %*d / %d" % (
+                  percent, len(str(totalsize)), readsofar, totalsize)
+        sys.stderr.write(status)
+    else:  # total size is unknown
+        sys.stderr.write(f"\rdownloaded {readsofar}")
+
 
 def is_exe(fpath):
     """Check if the given path is an exexutable file"""
-- 
cgit v1.2.3