diff options
Diffstat (limited to 'benchmarks')
| -rw-r--r-- | benchmarks/bench_loop.c | 87 | ||||
| -rw-r--r-- | benchmarks/cache-scratch.cc | 147 | ||||
| -rw-r--r-- | benchmarks/cache-thrash.cc | 134 | ||||
| -rw-r--r-- | benchmarks/cpuinfo.h | 202 | ||||
| -rw-r--r-- | benchmarks/fred.h | 97 | ||||
| -rw-r--r-- | benchmarks/larson.cc | 744 | ||||
| -rw-r--r-- | benchmarks/timer.h | 372 |
7 files changed, 0 insertions, 1783 deletions
diff --git a/benchmarks/bench_loop.c b/benchmarks/bench_loop.c deleted file mode 100644 index bc15808..0000000 --- a/benchmarks/bench_loop.c +++ /dev/null @@ -1,87 +0,0 @@ -#include <assert.h> -#include <malloc.h> -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - - -static size_t _rand() { - static __thread size_t seed = 123456789; - size_t a = 1103515245; - size_t c = 12345; - size_t m = 1 << 31; - seed = (a * seed + c) % m; - return seed; -} - -typedef struct ThreadArgs { - double benchmark; - int allocations; - int max_size; -} ThreadArgs; - -static void* malloc_then_write(size_t size) { - void* ptr = malloc(size); - // Write to ptr - /* *((char*)ptr) = '!'; */ - return ptr; -} - -static void read_then_free(void* ptr) { - // Read before free - /* char s __attribute__((unused)) = *((char*)ptr); */ - free(ptr); -} -static void* test_thread_func(void* arg) { - ThreadArgs* args = (ThreadArgs*)arg; - - for(int i = 0; i < args->allocations; i++) { - void* ptr = malloc_then_write((_rand() % args->max_size) + 1); - read_then_free(ptr); - } - return NULL; -} - -int main(int argc, char* argv[]) { - pthread_t* threads; - int num_threads; - struct ThreadArgs thread_args; - - if (argc < 4) { - fprintf(stderr, "Usage: %s <num threads> <num allocations> <max size>\n", argv[0]); - return 1; - } - - num_threads = atoi(argv[1]); - thread_args.allocations = atoi(argv[2]); - thread_args.max_size = atoi(argv[3]); - - threads = (pthread_t*)malloc(num_threads * sizeof(pthread_t)); - - for (int i = 0; i < num_threads; i++) { - if (0 != pthread_create(&threads[i], NULL, test_thread_func, &thread_args)) { - perror("pthread_create"); - return 1; - } - } - - for(int i = 0; i < num_threads; i++) { - if (0 != pthread_join(threads[i], NULL)) { - perror("pthread_join"); - return 1; - } - } - - if (argc == 5) - { - FILE* f = stdout; - if (strcmp(argv[4],"stdout") != 0) - f = fopen(argv[4], "w"); - malloc_info(0, f); - if (strcmp(argv[4],"stdout") != 0) - fclose(f); - } - - return 0; -} diff --git a/benchmarks/cache-scratch.cc b/benchmarks/cache-scratch.cc deleted file mode 100644 index 2cb9b28..0000000 --- a/benchmarks/cache-scratch.cc +++ /dev/null @@ -1,147 +0,0 @@ -///-*-C++-*-////////////////////////////////////////////////////////////////// -// -// Hoard: A Fast, Scalable, and Memory-Efficient Allocator -// for Shared-Memory Multiprocessors -// Contact author: Emery Berger, http://www.cs.umass.edu/~emery -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Library General Public License as -// published by the Free Software Foundation, http://www.fsf.org. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Library General Public License for more details. -// -////////////////////////////////////////////////////////////////////////////// - -/** - * @file cache-scratch.cpp - * - * cache-scratch is a benchmark that exercises a heap's cache locality. - * An allocator that allows multiple threads to re-use the same small - * object (possibly all in one cache-line) will scale poorly, while - * an allocator like Hoard will exhibit near-linear scaling. - * - * Try the following (on a P-processor machine): - * - * cache-scratch 1 1000 1 1000000 - * cache-scratch P 1000 1 1000000 - * - * cache-scratch-hoard 1 1000 1 1000000 - * cache-scratch-hoard P 1000 1 1000000 - * - * The ideal is a P-fold speedup. -*/ - -#include <stdio.h> -#include <stdlib.h> - -#include "fred.h" -#include "cpuinfo.h" -#include "timer.h" - -// This class just holds arguments to each thread. -class workerArg { -public: - - workerArg() {} - - workerArg (char * obj, int objSize, int repetitions, int iterations) - : _object (obj), - _objSize (objSize), - _iterations (iterations), - _repetitions (repetitions) - {} - - char * _object; - int _objSize; - int _iterations; - int _repetitions; -}; - - -#if defined(_WIN32) -extern "C" void worker (void * arg) -#else -extern "C" void * worker (void * arg) -#endif -{ - // free the object we were given. - // Then, repeatedly do the following: - // malloc a given-sized object, - // repeatedly write on it, - // then free it. - workerArg * w = (workerArg *) arg; - delete w->_object; - workerArg w1 = *w; - for (int i = 0; i < w1._iterations; i++) { - // Allocate the object. - char * obj = new char[w1._objSize]; - // Write into it a bunch of times. - for (int j = 0; j < w1._repetitions; j++) { - for (int k = 0; k < w1._objSize; k++) { - obj[k] = (char) k; - volatile char ch = obj[k]; - ch++; - } - } - // Free the object. - delete [] obj; - } - -#if !defined(_WIN32) - return NULL; -#endif -} - - -int main (int argc, char * argv[]) -{ - int nthreads; - int iterations; - int objSize; - int repetitions; - - if (argc > 4) { - nthreads = atoi(argv[1]); - iterations = atoi(argv[2]); - objSize = atoi(argv[3]); - repetitions = atoi(argv[4]); - } else { - fprintf (stderr, "Usage: %s nthreads iterations objSize repetitions\n", argv[0]); - return 1; - } - - HL::Fred * threads = new HL::Fred[nthreads]; - HL::Fred::setConcurrency (HL::CPUInfo::getNumProcessors()); - - workerArg * w = new workerArg[nthreads]; - - int i; - - // Allocate nthreads objects and distribute them among the threads. - char ** objs = new char * [nthreads]; - for (i = 0; i < nthreads; i++) { - objs[i] = new char[objSize]; - } - - HL::Timer t; - t.start(); - - for (i = 0; i < nthreads; i++) { - w[i] = workerArg (objs[i], objSize, repetitions / nthreads, iterations); - threads[i].create (&worker, (void *) &w[i]); - } - for (i = 0; i < nthreads; i++) { - threads[i].join(); - } - t.stop(); - - delete [] threads; - delete [] objs; - delete [] w; - - printf ("Time elapsed = %f seconds.\n", (double) t); - return 0; -} diff --git a/benchmarks/cache-thrash.cc b/benchmarks/cache-thrash.cc deleted file mode 100644 index 79242eb..0000000 --- a/benchmarks/cache-thrash.cc +++ /dev/null @@ -1,134 +0,0 @@ -///-*-C++-*-////////////////////////////////////////////////////////////////// -// -// Hoard: A Fast, Scalable, and Memory-Efficient Allocator -// for Shared-Memory Multiprocessors -// Contact author: Emery Berger, http://www.cs.umass.edu/~emery -// -// Copyright (c) 1998-2003, The University of Texas at Austin. -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Library General Public License as -// published by the Free Software Foundation, http://www.fsf.org. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Library General Public License for more details. -// -////////////////////////////////////////////////////////////////////////////// - -/** - * @file cache-thrash.cpp - * @brief cache-thrash is a benchmark that exercises a heap's cache-locality. - * - * Try the following (on a P-processor machine): - * - * cache-thrash 1 1000 1 1000000 - * cache-thrash P 1000 1 1000000 - * - * cache-thrash-hoard 1 1000 1 1000000 - * cache-thrash-hoard P 1000 1 1000000 - * - * The ideal is a P-fold speedup. -*/ - - -#include <iostream> -#include <stdlib.h> - -using namespace std; - -#include "cpuinfo.h" -#include "fred.h" -#include "timer.h" - -// This class just holds arguments to each thread. -class workerArg { -public: - workerArg() {} - workerArg (int objSize, int repetitions, int iterations) - : _objSize (objSize), - _iterations (iterations), - _repetitions (repetitions) - {} - - int _objSize; - int _iterations; - int _repetitions; -}; - - -#if defined(_WIN32) -extern "C" void worker (void * arg) -#else -extern "C" void * worker (void * arg) -#endif -{ - // Repeatedly do the following: - // malloc a given-sized object, - // repeatedly write on it, - // then free it. - workerArg * w = (workerArg *) arg; - workerArg w1 = *w; - for (int i = 0; i < w1._iterations; i++) { - // Allocate the object. - char * obj = new char[w1._objSize]; - // printf ("obj = %p\n", obj); - // Write into it a bunch of times. - for (int j = 0; j < w1._repetitions; j++) { - for (int k = 0; k < w1._objSize; k++) { - obj[k] = (char) k; - volatile char ch = obj[k]; - ch++; - } - } - // Free the object. - delete [] obj; - } -#if !defined(_WIN32) - return NULL; -#endif -} - - -int main (int argc, char * argv[]) -{ - int nthreads; - int iterations; - int objSize; - int repetitions; - - if (argc > 4) { - nthreads = atoi(argv[1]); - iterations = atoi(argv[2]); - objSize = atoi(argv[3]); - repetitions = atoi(argv[4]); - } else { - cerr << "Usage: " << argv[0] << " nthreads iterations objSize repetitions" << endl; - exit(1); - } - - HL::Fred * threads = new HL::Fred[nthreads]; - HL::Fred::setConcurrency (HL::CPUInfo::getNumProcessors()); - - int i; - - HL::Timer t; - t.start(); - - workerArg * w = new workerArg[nthreads]; - - for (i = 0; i < nthreads; i++) { - w[i] = workerArg (objSize, repetitions / nthreads, iterations); - threads[i].create (&worker, (void *) &w[i]); - } - for (i = 0; i < nthreads; i++) { - threads[i].join(); - } - t.stop(); - - delete [] threads; - delete [] w; - - cout << "Time elapsed = " << (double) t << " seconds." << endl; -} diff --git a/benchmarks/cpuinfo.h b/benchmarks/cpuinfo.h deleted file mode 100644 index 1ed1f36..0000000 --- a/benchmarks/cpuinfo.h +++ /dev/null @@ -1,202 +0,0 @@ -// -*- C++ -*-
-
-/*
-
- Heap Layers: An Extensible Memory Allocation Infrastructure
-
- Copyright (C) 2000-2003 by Emery Berger
- http://www.cs.umass.edu/~emery
- emery@cs.umass.edu
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-*/
-
-
-
-#ifndef HL_CPUINFO_H
-#define HL_CPUINFO_H
-
-#if defined(_WIN32)
-#include <windows.h>
-#include <process.h>
-#else
-#include <unistd.h>
-#endif
-
-
-#if !defined(_WIN32)
-#include <pthread.h>
-#endif
-
-#if defined(__SVR4) // Solaris
-#include <sys/lwp.h>
-extern "C" unsigned int lwp_self(void);
-#include <thread.h>
-extern "C" int _thr_self(void);
-#endif
-
-#if defined(__linux)
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
-#endif
-
-#if defined(__APPLE__)
-#include <sys/types.h>
-#include <sys/sysctl.h>
-#endif
-
-#if defined(__sgi)
-#include <sys/types.h>
-#include <sys/sysmp.h>
-#include <sys/sysinfo.h>
-#endif
-
-#if defined(hpux)
-#include <sys/mpctl.h>
-#endif
-
-#if defined(_WIN32)
-extern __declspec(thread) int localThreadId;
-#endif
-
-#if defined(__SVR4) && defined(MAP_ALIGN)
-extern volatile int anyThreadStackCreated;
-#endif
-
-namespace HL {
-
-/**
- * @class CPUInfo
- * @author Emery Berger <http://www.cs.umass.edu/~emery>
- *
- * @brief Architecture-independent wrapper to get number of CPUs.
- */
-
-class CPUInfo {
-public:
- CPUInfo (void)
- {}
-
- inline static int getNumProcessors (void) {
- static int _numProcessors = computeNumProcessors();
- return _numProcessors;
- }
-
- static inline unsigned long getThreadId (void);
- inline static int computeNumProcessors (void);
-
-};
-
-
-int CPUInfo::computeNumProcessors (void)
-{
- static int np = 0;
- if (!np) {
-#if defined(__linux) || defined(__APPLE__)
- np = (int) sysconf(_SC_NPROCESSORS_ONLN);
-#elif defined(_WIN32)
- SYSTEM_INFO infoReturn[1];
- GetSystemInfo (infoReturn);
- np = (int) (infoReturn->dwNumberOfProcessors);
-#elif defined(__sgi)
- np = (int) sysmp(MP_NAPROCS);
-#elif defined(hpux)
- np = mpctl(MPC_GETNUMSPUS, NULL, NULL); // or pthread_num_processors_np()?
-#elif defined(_SC_NPROCESSORS_ONLN)
- np = (int) (sysconf(_SC_NPROCESSORS_ONLN));
-#else
- np = 2;
- // Unsupported platform.
- // Pretend we have at least two processors. This approach avoids the risk of assuming
- // we're on a uniprocessor, which might lead clever allocators to avoid using atomic
- // operations for all locks.
-#endif
- return np;
- } else {
- return np;
- }
-}
-
- // Note: when stacksize arg is NULL for pthread_attr_setstacksize [Solaris],
-// stack size is 1 MB for 32-bit arch, 2 MB for 64-bit arch.
-// pthread_attr_getstacksize
-// pthread_attr_setstackaddr
-// pthread_attr_getstackaddr
-// PTHREAD_STACK_SIZE is minimum.
-// or should we just assume we have __declspec(thread) or __thread?
-
-#if defined(USE_THREAD_KEYWORD)
- extern __thread int localThreadId;
-#endif
-
- // FIX ME FIXME
- //#include <stdio.h>
-
-unsigned long CPUInfo::getThreadId (void) {
-#if defined(__SVR4)
- size_t THREAD_STACK_SIZE;
- if (sizeof(size_t) <= 4) {
- THREAD_STACK_SIZE = 1048576;
- } else {
- // 64-bits.
- THREAD_STACK_SIZE = 1048576 * 2;
- }
- if (0) { // !anyThreadStackCreated) {
- // We know a priori that all stack variables
- // are on different stacks. Since no one has created
- // a special one, we are in control, and thus all stacks
- // are 1 MB in size and on 1 MB boundaries.
- // (Actually: 1 MB for 32-bits, 2 MB for 64-bits.)
- char buf;
- return (((size_t) &buf) & ~(THREAD_STACK_SIZE-1)) >> 20;
- } else {
- return (int) pthread_self();
- }
-#elif defined(_WIN32)
- // It looks like thread id's are always multiples of 4, so...
- return GetCurrentThreadId() >> 2;
-#elif defined(__APPLE__)
- // Consecutive thread id's in Mac OS are 4096 apart;
- // dividing off the 4096 gives us an appropriate thread id.
- int tid = (int) ((unsigned long) pthread_self()) >> 12;
- return tid;
-#elif defined(__BEOS__)
- return find_thread(0);
-#elif defined(USE_THREAD_KEYWORD)
- return localThreadId;
-#elif defined(__linux) || defined(PTHREAD_KEYS_MAX)
- // Consecutive thread id's in Linux are 1024 apart;
- // dividing off the 1024 gives us an appropriate thread id.
- return (unsigned long) pthread_self() >> 10;
-#elif defined(POSIX)
- return (unsigned long) pthread_self();
-#elif USE_SPROC
- // This hairiness has the same effect as calling getpid(),
- // but it's MUCH faster since it avoids making a system call
- // and just accesses the sproc-local data directly.
- unsigned long pid = (unsigned long) PRDA->sys_prda.prda_sys.t_pid;
- return pid;
-#else
- return 0;
-#endif
-}
-
-}
-
-#endif
diff --git a/benchmarks/fred.h b/benchmarks/fred.h deleted file mode 100644 index b0198a7..0000000 --- a/benchmarks/fred.h +++ /dev/null @@ -1,97 +0,0 @@ -// -*- C++ -*- - -#ifndef HL_FRED_H -#define HL_FRED_H - -/// A thread-wrapper of childlike simplicity :). - -#if defined(_WIN32) - - #include <windows.h> - #include <process.h> - -#elif defined(__SVR4) - - #include <thread.h> - #include <pthread.h> - #include <unistd.h> - -#else - - #include <pthread.h> - #include <unistd.h> - -#endif - -typedef void * (*ThreadFunctionType) (void *); - -namespace HL { - -class Fred { -public: - - Fred() { -#if !defined(_WIN32) - pthread_attr_init (&attr); - pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM); -#endif - } - - ~Fred() { -#if !defined(_WIN32) - pthread_attr_destroy (&attr); -#endif - } - - void create (ThreadFunctionType function, void * arg) { -#if defined(_WIN32) - t = CreateThread (0, 0, (LPTHREAD_START_ROUTINE) *function, (LPVOID) arg, 0, 0); -#else - pthread_create (&t, &attr, function, arg); -#endif - } - - void join (void) { -#if defined(_WIN32) - WaitForSingleObject (t, INFINITE); -#else - pthread_join (t, NULL); -#endif - } - - static void yield (void) { -#if defined(_WIN32) - Sleep (0); -#elif defined(__SVR4) - thr_yield(); -#else - sched_yield(); -#endif - } - - - static void setConcurrency (int n) { -#if defined(_WIN32) -#elif defined(__SVR4) - thr_setconcurrency (n); -#else - pthread_setconcurrency (n); -#endif - } - - -private: -#if defined(_WIN32) - typedef HANDLE FredType; -#else - typedef pthread_t FredType; - pthread_attr_t attr; -#endif - - FredType t; -}; - -} - - -#endif diff --git a/benchmarks/larson.cc b/benchmarks/larson.cc deleted file mode 100644 index be8038f..0000000 --- a/benchmarks/larson.cc +++ /dev/null @@ -1,744 +0,0 @@ -#include <assert.h> -#include <stdio.h> - -#if defined(_WIN32) -#define __WIN32__ -#endif - -#ifdef __WIN32__ -#include <windows.h> -#include <conio.h> -#include <process.h> - -#else -#include <unistd.h> -#include <sys/resource.h> -#include <sys/time.h> - -#ifndef __SVR4 -//extern "C" int pthread_setconcurrency (int) throw(); -#include <pthread.h> -#endif - - -typedef void * LPVOID; -typedef long long LONGLONG; -typedef long DWORD; -typedef long LONG; -typedef unsigned long ULONG; -typedef union _LARGE_INTEGER { - struct { - DWORD LowPart; - LONG HighPart; - } foo; - LONGLONG QuadPart; // In Visual C++, a typedef to _ _int64} LARGE_INTEGER; -} LARGE_INTEGER; -typedef long long _int64; -#ifndef TRUE -enum { TRUE = 1, FALSE = 0 }; -#endif -#include <assert.h> -#define _ASSERTE(x) assert(x) -#define _inline inline -void Sleep (long x) -{ - // printf ("sleeping for %ld seconds.\n", x/1000); - sleep(x/1000); -} - -void QueryPerformanceCounter (long * x) -{ - struct timezone tz; - struct timeval tv; - gettimeofday (&tv, &tz); - *x = tv.tv_sec * 1000000L + tv.tv_usec; -} - -void QueryPerformanceFrequency(long * x) -{ - *x = 1000000L; -} - - -#include <stdio.h> -#include <stdlib.h> -#include <stddef.h> -#include <string.h> -#include <ctype.h> -#include <time.h> -#include <assert.h> - -#define _REENTRANT 1 -#include <pthread.h> -#ifdef __sun -#include <thread.h> -#endif -typedef void * VoidFunction (void *); -void _beginthread (VoidFunction x, int, void * z) -{ - pthread_t pt; - pthread_attr_t pa; - pthread_attr_init (&pa); - -#if 1//defined(__SVR4) - pthread_attr_setscope (&pa, PTHREAD_SCOPE_SYSTEM); /* bound behavior */ -#endif - - // printf ("creating a thread.\n"); - int v = pthread_create(&pt, &pa, x, z); - // printf ("v = %d\n", v); -} -#endif - - -#if 0 -static char buf[65536]; - -#define malloc(v) &buf -#define free(p) -#endif - -#undef CPP -//#define CPP -//#include "arch-specific.h" - -#if USE_ROCKALL -//#include "FastHeap.hpp" -//FAST_HEAP theFastHeap (1024 * 1024, true, true, true); - -typedef int SBIT32; - -#include "SmpHeap.hpp" -SMP_HEAP theFastHeap (1024 * 1024, true, true, true); - -void * operator new( unsigned int cb ) -{ - void *pRet = theFastHeap.New ((size_t)cb) ; - return pRet; -} - -void operator delete(void *pUserData ) -{ - theFastHeap.Delete (pUserData) ; -} -#endif - -#if 0 -extern "C" void * hdmalloc (size_t sz) ; -extern "C" void hdfree (void * ptr) ; -extern "C" void hdmalloc_stats (void) ; -void * operator new( unsigned int cb ) -{ - void *pRet = hdmalloc((size_t)cb) ; - return pRet; -} - -void operator delete(void *pUserData ) -{ - hdfree(pUserData) ; -} -#endif - - - -/* Test driver for memory allocators */ -/* Author: Paul Larson, palarson@microsoft.com */ -#define MAX_THREADS 100 -#define MAX_BLOCKS 20000000 - -int volatile stopflag=FALSE ; - -struct lran2_st { - long x, y, v[97]; -}; - -int TotalAllocs=0 ; - -typedef struct thr_data { - - int threadno ; - int NumBlocks ; - int seed ; - - int min_size ; - int max_size ; - - char * *array ; - int *blksize ; - int asize ; - - unsigned long cAllocs ; - unsigned long cFrees ; - int cThreads ; - unsigned long cBytesAlloced ; - - volatile int finished ; - struct lran2_st rgen ; - -} thread_data; - -void runthreads(long sleep_cnt, int min_threads, int max_threads, - int chperthread, int num_rounds) ; -void runloops(long sleep_cnt, int num_chunks ) ; -static void warmup(char **blkp, int num_chunks ); -static void * exercise_heap( void *pinput) ; -static void lran2_init(struct lran2_st* d, long seed) ; -static long lran2(struct lran2_st* d) ; -ULONG CountReservedSpace() ; - -char ** blkp = new char *[MAX_BLOCKS] ; -int * blksize = new int[MAX_BLOCKS] ; -long seqlock=0 ; -struct lran2_st rgen ; -int min_size=10, max_size=500 ; -int num_threads ; -ULONG init_space ; - -extern int cLockSleeps ; -extern int cAllocedChunks ; -extern int cAllocedSpace ; -extern int cUsedSpace ; -extern int cFreeChunks ; -extern int cFreeSpace ; - -int cChecked=0 ; - -#if defined(_WIN32) -extern "C" { - extern HANDLE crtheap; -}; -#endif - -int main (int argc, char *argv[]) -{ -#if defined(USE_LFH) && defined(_WIN32) - // Activate 'Low Fragmentation Heap'. - ULONG info = 2; - HeapSetInformation (GetProcessHeap(), - HeapCompatibilityInformation, - &info, - sizeof(info)); -#endif -#if 0 // defined(__SVR4) - { - psinfo_t ps; - int pid = getpid(); - char fname[255]; - sprintf (fname, "/proc/%d/psinfo", pid); - // sprintf (fname, "/proc/self/ps"); - FILE * f = fopen (fname, "rb"); - printf ("opening %s\n", fname); - if (f) { - fread (&ps, sizeof(ps), 1, f); - printf ("resident set size = %dK\n", ps.pr_rssize); - fclose (f); - } - } -#endif - -#if defined(_MT) || defined(_REENTRANT) - int min_threads, max_threads ; - int num_rounds ; - int chperthread ; -#endif - unsigned seed=12345 ; - int num_chunks=10000; - long sleep_cnt; - - if (argc > 7) { - sleep_cnt = atoi(argv[1]); - min_size = atoi(argv[2]); - max_size = atoi(argv[3]); - chperthread = atoi(argv[4]); - num_rounds = atoi(argv[5]); - seed = atoi(argv[6]); - max_threads = atoi(argv[7]); - min_threads = max_threads; - printf ("sleep = %ld, min = %d, max = %d, per thread = %d, num rounds = %d, seed = %d, max_threads = %d, min_threads = %d\n", - sleep_cnt, min_size, max_size, chperthread, num_rounds, seed, max_threads, min_threads); - goto DoneWithInput; - } - -#if defined(_MT) || defined(_REENTRANT) - //#ifdef _MT - printf( "\nMulti-threaded test driver \n") ; -#else - printf( "\nSingle-threaded test driver \n") ; -#endif -#ifdef CPP - printf("C++ version (new and delete)\n") ; -#else - printf("C version (malloc and free)\n") ; -#endif - printf("runtime (sec): ") ; - scanf ("%ld", &sleep_cnt); - - printf("chunk size (min,max): ") ; - scanf("%d %d", &min_size, &max_size ) ; -#if defined(_MT) || defined(_REENTRANT) - //#ifdef _MT - printf("threads (min, max): ") ; - scanf("%d %d", &min_threads, &max_threads) ; - printf("chunks/thread: ") ; scanf("%d", &chperthread ) ; - printf("no of rounds: ") ; scanf("%d", &num_rounds ) ; - num_chunks = max_threads*chperthread ; -#else - printf("no of chunks: ") ; scanf("%d", &num_chunks ) ; -#endif - printf("random seed: ") ; scanf("%d", &seed) ; - - DoneWithInput: - - if( num_chunks > MAX_BLOCKS ){ - printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ; - return(1) ; - } - -#ifndef __WIN32__ -#ifdef __SVR4 - pthread_setconcurrency (max_threads); -#endif -#endif - - lran2_init(&rgen, seed) ; - // init_space = CountReservedSpace() ; - -#if defined(_MT) || defined(_REENTRANT) - //#ifdef _MT - runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ; -#else - runloops(sleep_cnt, num_chunks ) ; -#endif - -#ifdef _DEBUG - _cputs("Hit any key to exit...") ; (void)_getch() ; -#endif - - return 0; - -} /* main */ - -void runloops(long sleep_cnt, int num_chunks ) -{ - int cblks ; - int victim ; - int blk_size ; -#ifdef __WIN32__ - _LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt; -#else - long ticks_per_sec ; - long start_cnt, end_cnt ; -#endif - _int64 ticks ; - double duration ; - double reqd_space ; - ULONG used_space ; - int sum_allocs=0 ; - - QueryPerformanceFrequency( &ticks_per_sec ) ; - QueryPerformanceCounter( &start_cnt) ; - - for( cblks=0; cblks<num_chunks; cblks++){ - if (max_size == min_size) { - blk_size = min_size; - } else { - blk_size = min_size+lran2(&rgen)%(max_size - min_size) ; - } -#ifdef CPP - blkp[cblks] = new char[blk_size] ; -#else - blkp[cblks] = (char *) malloc(blk_size) ; -#endif - blksize[cblks] = blk_size ; - assert(blkp[cblks] != NULL) ; - } - - while(TRUE){ - for( cblks=0; cblks<num_chunks; cblks++){ - victim = lran2(&rgen)%num_chunks ; -#ifdef CPP - delete blkp[victim] ; -#else - free(blkp[victim]) ; -#endif - - if (max_size == min_size) { - blk_size = min_size; - } else { - blk_size = min_size+lran2(&rgen)%(max_size - min_size) ; - } -#ifdef CPP - blkp[victim] = new char[blk_size] ; -#else - blkp[victim] = (char *) malloc(blk_size) ; -#endif - blksize[victim] = blk_size ; - assert(blkp[victim] != NULL) ; - } - sum_allocs += num_chunks ; - - QueryPerformanceCounter( &end_cnt) ; -#ifdef __WIN32__ - ticks = end_cnt.QuadPart - start_cnt.QuadPart ; - duration = (double)ticks/ticks_per_sec.QuadPart ; -#else - ticks = end_cnt - start_cnt ; - duration = (double)ticks/ticks_per_sec ; -#endif - - if( duration >= sleep_cnt) break ; - } - reqd_space = (0.5*(min_size+max_size)*num_chunks) ; - // used_space = CountReservedSpace() - init_space; - - printf("%6.3f", duration ) ; - printf("%8.0f", sum_allocs/duration ) ; - printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ; - printf("\n") ; - -} - - -#if defined(_MT) || defined(_REENTRANT) -//#ifdef _MT -void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds) -{ - thread_data *de_area = new thread_data[max_threads] ; - thread_data *pdea; - int nperthread ; - int sum_threads ; - unsigned long sum_allocs ; - unsigned long sum_frees ; - double duration ; -#ifdef __WIN32__ - _LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt; -#else - long ticks_per_sec ; - long start_cnt, end_cnt ; -#endif - _int64 ticks ; - double rate_1=0, rate_n ; - double reqd_space ; - ULONG used_space ; - int prevthreads ; - int i ; - - QueryPerformanceFrequency( &ticks_per_sec ) ; - - pdea = &de_area[0] ; - memset(&de_area[0], 0, sizeof(thread_data)) ; - - prevthreads = 0 ; - for(num_threads=min_threads; num_threads <= max_threads; num_threads++ ) - { - - warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread ); - - nperthread = chperthread ; - stopflag = FALSE ; - - for(i=0; i< num_threads; i++){ - de_area[i].threadno = i+1 ; - de_area[i].NumBlocks = num_rounds*nperthread; - de_area[i].array = &blkp[i*nperthread] ; - de_area[i].blksize = &blksize[i*nperthread] ; - de_area[i].asize = nperthread ; - de_area[i].min_size = min_size ; - de_area[i].max_size = max_size ; - de_area[i].seed = lran2(&rgen) ; ; - de_area[i].finished = 0 ; - de_area[i].cAllocs = 0 ; - de_area[i].cFrees = 0 ; - de_area[i].cThreads = 0 ; - de_area[i].finished = FALSE ; - lran2_init(&de_area[i].rgen, de_area[i].seed) ; - -#ifdef __WIN32__ - _beginthread((void (__cdecl*)(void *)) exercise_heap, 0, &de_area[i]) ; -#else - _beginthread(exercise_heap, 0, &de_area[i]) ; -#endif - - } - - QueryPerformanceCounter( &start_cnt) ; - - // printf ("Sleeping for %ld seconds.\n", sleep_cnt); - Sleep(sleep_cnt * 1000L) ; - - stopflag = TRUE ; - - for(i=0; i<num_threads; i++){ - while( !de_area[i].finished ){ -#ifdef __WIN32__ - Sleep(1); -#elif defined(__SVR4) - thr_yield(); -#else - sched_yield(); -#endif - } - } - - - QueryPerformanceCounter( &end_cnt) ; - - sum_frees = sum_allocs =0 ; - sum_threads = 0 ; - for(i=0;i< num_threads; i++){ - sum_allocs += de_area[i].cAllocs ; - sum_frees += de_area[i].cFrees ; - sum_threads += de_area[i].cThreads ; - de_area[i].cAllocs = de_area[i].cFrees = 0; - } - - -#ifdef __WIN32__ - ticks = end_cnt.QuadPart - start_cnt.QuadPart ; - duration = (double)ticks/ticks_per_sec.QuadPart ; -#else - ticks = end_cnt - start_cnt ; - duration = (double)ticks/ticks_per_sec ; -#endif - - for( i=0; i<num_threads; i++){ - if( !de_area[i].finished ) - printf("Thread at %d not finished\n", i) ; - } - - - rate_n = sum_allocs/duration ; - if( rate_1 == 0){ - rate_1 = rate_n ; - } - - reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ; - // used_space = CountReservedSpace() - init_space; - used_space = 0; - - printf ("Throughput = %8.0f operations per second.\n", sum_allocs / duration); - -#if 0 - printf("%2d ", num_threads ) ; - printf("%6.3f", duration ) ; - printf("%6.3f", rate_n/rate_1 ) ; - printf("%8.0f", sum_allocs/duration ) ; - printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ; - printf("\n") ; -#endif - - Sleep(5000L) ; // wait 5 sec for old threads to die - - prevthreads = num_threads ; - - printf ("Done sleeping...\n"); - - } - delete [] de_area; -} - - -static void * exercise_heap( void *pinput) -{ - thread_data *pdea; - int cblks=0 ; - int victim ; - long blk_size ; - int range ; - - if( stopflag ) return 0; - - pdea = (thread_data *)pinput ; - pdea->finished = FALSE ; - pdea->cThreads++ ; - range = pdea->max_size - pdea->min_size ; - - /* allocate NumBlocks chunks of random size */ - for( cblks=0; cblks<pdea->NumBlocks; cblks++){ - victim = lran2(&pdea->rgen)%pdea->asize ; -#ifdef CPP - delete pdea->array[victim] ; -#else - free(pdea->array[victim]) ; -#endif - pdea->cFrees++ ; - - if (range == 0) { - blk_size = pdea->min_size; - } else { - blk_size = pdea->min_size+lran2(&pdea->rgen)%range ; - } -#ifdef CPP - pdea->array[victim] = new char[blk_size] ; -#else - pdea->array[victim] = (char *) malloc(blk_size) ; -#endif - - pdea->blksize[victim] = blk_size ; - assert(pdea->array[victim] != NULL) ; - - pdea->cAllocs++ ; - - /* Write something! */ - - volatile char * chptr = ((char *) pdea->array[victim]); - *chptr++ = 'a'; - volatile char ch = *((char *) pdea->array[victim]); - *chptr = 'b'; - - - if( stopflag ) break ; - } - - // printf("Thread %u terminating: %d allocs, %d frees\n", - // pdea->threadno, pdea->cAllocs, pdea->cFrees) ; - pdea->finished = TRUE ; - - if( !stopflag ){ -#ifdef __WIN32__ - _beginthread((void (__cdecl*)(void *)) exercise_heap, 0, pdea) ; -#else - _beginthread(exercise_heap, 0, pdea) ; -#endif - } else { - printf ("thread stopping.\n"); - } -#ifndef _WIN32 - pthread_exit (NULL); -#endif - return 0; -} - -static void warmup(char **blkp, int num_chunks ) -{ - int cblks ; - int victim ; - int blk_size ; - LPVOID tmp ; - - - for( cblks=0; cblks<num_chunks; cblks++){ - if (min_size == max_size) { - blk_size = min_size; - } else { - blk_size = min_size+lran2(&rgen)%(max_size-min_size) ; - } -#ifdef CPP - blkp[cblks] = new char[blk_size] ; -#else - blkp[cblks] = (char *) malloc(blk_size) ; -#endif - blksize[cblks] = blk_size ; - assert(blkp[cblks] != NULL) ; - } - - /* generate a random permutation of the chunks */ - for( cblks=num_chunks; cblks > 0 ; cblks--){ - victim = lran2(&rgen)%cblks ; - tmp = blkp[victim] ; - blkp[victim] = blkp[cblks-1] ; - blkp[cblks-1] = (char *) tmp ; - } - - for( cblks=0; cblks<4*num_chunks; cblks++){ - victim = lran2(&rgen)%num_chunks ; -#ifdef CPP - delete blkp[victim] ; -#else - free(blkp[victim]) ; -#endif - - if (max_size == min_size) { - blk_size = min_size; - } else { - blk_size = min_size+lran2(&rgen)%(max_size - min_size) ; - } -#ifdef CPP - blkp[victim] = new char[blk_size] ; -#else - blkp[victim] = (char *) malloc(blk_size) ; -#endif - blksize[victim] = blk_size ; - assert(blkp[victim] != NULL) ; - } -} -#endif // _MT - -#ifdef __WIN32__ -ULONG CountReservedSpace() -{ - MEMORY_BASIC_INFORMATION info; - char *addr=NULL ; - ULONG size=0 ; - - while( true){ - VirtualQuery(addr, &info, sizeof(info)); - switch( info.State){ - case MEM_FREE: - case MEM_RESERVE: - break ; - case MEM_COMMIT: - size += info.RegionSize ; - break ; - } - addr += info.RegionSize ; - if( addr >= (char *)0x80000000UL ) break ; - } - - return size ; - -} -#endif - -// ======================================================= - -/* lran2.h - * by Wolfram Gloger 1996. - * - * A small, portable pseudo-random number generator. - */ - -#ifndef _LRAN2_H -#define _LRAN2_H - -#define LRAN2_MAX 714025l /* constants for portable */ -#define IA 1366l /* random number generator */ -#define IC 150889l /* (see e.g. `Numerical Recipes') */ - -//struct lran2_st { -// long x, y, v[97]; -//}; - -static void -lran2_init(struct lran2_st* d, long seed) -{ - long x; - int j; - - x = (IC - seed) % LRAN2_MAX; - if(x < 0) x = -x; - for(j=0; j<97; j++) { - x = (IA*x + IC) % LRAN2_MAX; - d->v[j] = x; - } - d->x = (IA*x + IC) % LRAN2_MAX; - d->y = d->x; -} - -static -long lran2(struct lran2_st* d) -{ - int j = (d->y % 97); - - d->y = d->v[j]; - d->x = (IA*d->x + IC) % LRAN2_MAX; - d->v[j] = d->x; - return d->y; -} - -#undef IA -#undef IC - -#endif - - diff --git a/benchmarks/timer.h b/benchmarks/timer.h deleted file mode 100644 index d4d42c7..0000000 --- a/benchmarks/timer.h +++ /dev/null @@ -1,372 +0,0 @@ -/* -*- C++ -*- */ - -/* - - Heap Layers: An Extensible Memory Allocation Infrastructure - - Copyright (C) 2000-2003 by Emery Berger - http://www.cs.umass.edu/~emery - emery@cs.umass.edu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ - -#include <cassert> -#include <stdio.h> - - -#ifndef _TIMER_H_ -#define _TIMER_H_ - -/** - * @class Timer - * @brief A portable class for high-resolution timing. - * - * This class simplifies timing measurements across a number of platforms. - * - * @code - * Timer t; - * t.start(); - * // do some work - * t.stop(); - * cout << "That took " << (double) t << " seconds." << endl; - * @endcode - * - */ - -#ifdef __APPLE__ -#include <sys/time.h> -#endif - -#if defined(__linux__) && defined(__GNUG__) && defined(__i386__) - -#include <stdio.h> -#include <limits.h> -#include <time.h> -#include <unistd.h> -#include <fcntl.h> -#include <string.h> - -static void getTime (unsigned long& tlo, unsigned long& thi) { - asm volatile ("rdtsc" - : "=a"(tlo), - "=d" (thi)); -} - - -static double getFrequency (void) { - static double freq = 0.0; - static bool initialized = false; - unsigned long LTime0, LTime1, HTime0, HTime1; - if (!initialized) { - - freq = 2600000.0; - -#if 0 - // Compute MHz directly. - // Wait for approximately one second. - - getTime (LTime0, HTime0); - // printf ("waiting...\n"); - struct timespec rqtp, rmtp; - rqtp.tv_sec = 1; - rqtp.tv_nsec = 0; - nanosleep (&rqtp, &rmtp); - // printf ("done.\n"); - getTime (LTime1, HTime1); - - freq = (double)(LTime1 - LTime0) + (double)(UINT_MAX)*(double)(HTime1 - HTime0); - if (LTime1 < LTime0) { - freq -= (double)UINT_MAX; - } -#endif - initialized = true; - - } else { - // printf ("wha?\n"); - } - return freq; -} - - -namespace HL { - -class Timer { -public: - Timer (void) - : timeElapsed (0.0) - { - _frequency = getFrequency(); - // printf ("wooo!\n"); - // printf ("freq = %lf\n", frequency); - } - void start (void) { - getTime (currentLo, currentHi); - } - void stop (void) { - unsigned long lo, hi; - getTime (lo, hi); - double now = (double) hi * 4294967296.0 + lo; - double prev = (double) currentHi * 4294967296.0 + currentLo; - timeElapsed = (now - prev) / _frequency; - } - - operator double (void) { - return timeElapsed; - } - -private: - double timeElapsed; - unsigned long currentLo, currentHi; - double _frequency; -}; - -}; - -#else - - -#ifdef __SVR4 // Solaris -#include <sys/time.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/procfs.h> -#include <stdio.h> -#endif // __SVR4 - -#include <time.h> - -#if defined(unix) || defined(__linux) -#include <sys/time.h> -#include <unistd.h> -#endif - - -#ifdef __sgi -#include <sys/types.h> -#include <sys/times.h> -#include <limits.h> -#endif - - -#if defined(_WIN32) -#include <windows.h> -#endif - - -#if defined(__BEOS__) -#include <OS.h> -#endif - - -namespace HL { - -class Timer { - -public: - - /// Initializes the timer. - Timer (void) -#if !defined(_WIN32) - : _starttime (0), - _elapsedtime (0) -#endif - { - } - - /// Start the timer. - void start (void) { _starttime = _time(); } - - /// Stop the timer. - void stop (void) { _elapsedtime += _time() - _starttime; } - - /// Reset the timer. - void reset (void) { _starttime = _elapsedtime; } - -#if 0 - // Set the timer. - void set (double secs) { _starttime = 0; _elapsedtime = _sectotime (secs);} -#endif - - /// Return the number of seconds elapsed. - operator double (void) { return _timetosec (_elapsedtime); } - - static double currentTime (void) { TimeType t; t = _time(); return _timetosec (t); } - - -private: - - // The _timer variable will be different depending on the OS. - // We try to use the best timer available. - -#ifdef __sgi -#define TIMER_FOUND - - long _starttime, _elapsedtime; - - long _time (void) { - struct tms t; - long ticks = times (&t); - return ticks; - } - - static double _timetosec (long t) { - return ((double) (t) / CLK_TCK); - } - - static long _sectotime (double sec) { - return (long) sec * CLK_TCK; - } -#endif - -#ifdef __SVR4 // Solaris -#define TIMER_FOUND - typedef hrtime_t TimeType; - TimeType _starttime, _elapsedtime; - - static TimeType _time (void) { - return gethrtime(); - } - - static TimeType _sectotime (double sec) { return (hrtime_t) (sec * 1.0e9); } - - static double _timetosec (TimeType& t) { - return ((double) (t) / 1.0e9); - } -#endif // __SVR4 - -#if defined(MAC) || defined(macintosh) -#define TIMER_FOUND - double _starttime, _elapsedtime; - - double _time (void) { - return get_Mac_microseconds(); - } - - double _timetosec (hrtime_t& t) { - return t; - } -#endif // MAC - -#ifdef _WIN32 -#define TIMER_FOUND - -#ifndef __GNUC__ - class TimeType { - public: - TimeType (void) - { - largeInt.QuadPart = 0; - } - operator double& (void) { return (double&) largeInt.QuadPart; } - operator LARGE_INTEGER& (void) { return largeInt; } - double timeToSec (void) { - return (double) largeInt.QuadPart / getFreq(); - } - private: - double getFreq (void) { - QueryPerformanceFrequency (&freq); - return (double) freq.QuadPart; - } - - LARGE_INTEGER largeInt; - LARGE_INTEGER freq; - }; - - TimeType _starttime, _elapsedtime; - - static TimeType _time (void) { - TimeType t; - int r = QueryPerformanceCounter (&((LARGE_INTEGER&) t)); - assert (r); - return t; - } - - static double _timetosec (TimeType& t) { - return t.timeToSec(); - } -#else - typedef DWORD TimeType; - DWORD _starttime, _elapsedtime; - static DWORD _time (void) { - return GetTickCount(); - } - - static double _timetosec (DWORD& t) { - return (double) t / 100000.0; - } - static unsigned long _sectotime (double sec) { - return (unsigned long)(sec); - } -#endif -#endif // _WIN32 - - -#ifdef __BEOS__ -#define TIMER_FOUND - bigtime_t _starttime, _elapsedtime; - bigtime_t _time(void) { - return system_time(); - } - double _timetosec (bigtime_t& t) { - return (double) t / 1000000.0; - } - - bigtime_t _sectotime (double sec) { - return (bigtime_t)(sec * 1000000.0); - } -#endif // __BEOS__ - -#ifndef TIMER_FOUND - - typedef long TimeType; - TimeType _starttime, _elapsedtime; - - static TimeType _time (void) { - struct timeval t; - gettimeofday (&t, NULL); - return t.tv_sec * 1000000 + t.tv_usec; - } - - static double _timetosec (TimeType t) { - return ((double) (t) / 1000000.0); - } - - static TimeType _sectotime (double sec) { - return (TimeType) (sec * 1000000.0); - } - -#endif // TIMER_FOUND - -#undef TIMER_FOUND - -}; - - -#ifdef __SVR4 // Solaris -class VirtualTimer : public Timer { -public: - hrtime_t _time (void) { - return gethrvtime(); - } -}; -#endif - -} - -#endif - -#endif |
