From 42836c4e95159728b4a459404071cf046ada4a5f Mon Sep 17 00:00:00 2001 From: Florian Fischer Date: Sat, 18 Jul 2020 15:15:19 +0200 Subject: [plots.py] consider sample variances when calculating t-tests --- allocbench/plots.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/allocbench/plots.py b/allocbench/plots.py index 0c73661..87aa069 100644 --- a/allocbench/plots.py +++ b/allocbench/plots.py @@ -30,6 +30,7 @@ import matplotlib.pyplot as plt import numpy as np import scipy.stats +from allocbench.benchmark import Benchmark import allocbench.facter as facter from allocbench.util import get_logger @@ -610,14 +611,29 @@ def create_ascii_leaderboards(bench, datapoints: List[Tuple[str, str]]): return res[:-1] -def calc_ttests_for_alloc_pair(bench, alloc1, alloc2, datapoint: str) -> Dict: +def calc_ttests_for_alloc_pair(bench: Benchmark, + alloc1: str, + alloc2: str, + datapoint: str, + sig=0.005) -> Dict: """Calculate independent t-test between two allocators for each argument permutation""" ttest_results = {} for perm in bench.iterate_args(): data1 = [float(m[datapoint]) for m in bench.results[alloc1][perm]] data2 = [float(m[datapoint]) for m in bench.results[alloc2][perm]] - ttest_results[perm] = scipy.stats.ttest_ind(data1, data2) + var1 = scipy.stats.describe(data1).variance + var2 = scipy.stats.describe(data2).variance + # equal variance condition taken from wikipedia + # https://en.wikipedia.org/wiki/Student%27s_t-test + equal_variance = not (var1 > 2 * var2 or var2 > 2 * var1) + + ttest_result = scipy.stats.ttest_ind(data1, + data2, + equal_var=equal_variance) + result = f'{"un" if ttest_result.pvalue < sig else ""}equal means' + + ttest_results[perm] = (result, ttest_result, equal_variance) return ttest_results -- cgit v1.2.3