benchmarks/cache-thrash.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

///-*-C++-*-//////////////////////////////////////////////////////////////////
//
// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
//        for Shared-Memory Multiprocessors
// Contact author: Emery Berger, http://www.cs.umass.edu/~emery
//
// Copyright (c) 1998-2003, The University of Texas at Austin.
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Library General Public License as
// published by the Free Software Foundation, http://www.fsf.org.
//
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Library General Public License for more details.
//
//////////////////////////////////////////////////////////////////////////////

/**
 * @file  cache-thrash.cpp
 * @brief cache-thrash is a benchmark that exercises a heap's cache-locality.
 *
 * Try the following (on a P-processor machine):
 *
 *  cache-thrash 1 1000 1 1000000
 *  cache-thrash P 1000 1 1000000
 *
 *  cache-thrash-hoard 1 1000 1 1000000
 *  cache-thrash-hoard P 1000 1 1000000
 *
 *  The ideal is a P-fold speedup.
*/


#include <iostream>
#include <stdlib.h>

using namespace std;

#include "cpuinfo.h"
#include "fred.h"
#include "timer.h"

// This class just holds arguments to each thread.
class workerArg {
public:
	workerArg() {}
	workerArg (int objSize, int repetitions, int iterations)
	: _objSize (objSize),
	 _iterations (iterations),
	 _repetitions (repetitions)
	{}

	int _objSize;
	int _iterations;
	int _repetitions;
};


#if defined(_WIN32)
extern "C" void worker (void * arg)
#else
extern "C" void * worker (void * arg)
#endif
{
	// Repeatedly do the following:
	//   malloc a given-sized object,
	//   repeatedly write on it,
	//   then free it.
	workerArg * w = (workerArg *) arg;
	workerArg w1 = *w;
	for (int i = 0; i < w1._iterations; i++) {
	// Allocate the object.
		char * obj = new char[w1._objSize];
		//    printf ("obj = %p\n", obj);
		// Write into it a bunch of times.
		for (int j = 0; j < w1._repetitions; j++) {
			for (int k = 0; k < w1._objSize; k++) {
				obj[k] = (char) k;
				volatile char ch = obj[k];
				ch++;
			}
		}
		// Free the object.
		delete [] obj;
	}
#if !defined(_WIN32)
	return NULL;
#endif
}


int main (int argc, char * argv[])
{
	int nthreads;
	int iterations;
	int objSize;
	int repetitions;
	
	if (argc > 4) {
		nthreads = atoi(argv[1]);
		iterations = atoi(argv[2]);
		objSize = atoi(argv[3]);
		repetitions = atoi(argv[4]);
	} else {
	cerr << "Usage: " << argv[0] << " nthreads iterations objSize repetitions" << endl;
	exit(1);
	}

	HL::Fred * threads = new HL::Fred[nthreads];
	HL::Fred::setConcurrency (HL::CPUInfo::getNumProcessors());
    
	int i;
  
	HL::Timer t;
	t.start();
  
	workerArg * w = new workerArg[nthreads];
    
	for (i = 0; i < nthreads; i++) {
		w[i] = workerArg (objSize, repetitions / nthreads, iterations);
		threads[i].create (&worker, (void *) &w[i]);
	}
	for (i = 0; i < nthreads; i++) {
		threads[i].join();
	}
	t.stop();

	delete [] threads;
	delete [] w;

	cout << "Time elapsed = " << (double) t << " seconds." << endl;
}