aboutsummaryrefslogtreecommitdiff
path: root/src/benchmarks/rdtsc/rdtsc.c
diff options
context:
space:
mode:
authorFlorian Fischer <florian.fl.fischer@fau.de>2020-04-08 16:20:29 +0200
committerFlorian Fischer <florian.fl.fischer@fau.de>2020-04-08 16:20:29 +0200
commitd58dc6c95d9044ffafa08b4327f5abbf0f5b54e0 (patch)
tree5e66b5ceebdfcd8bb9e1e492287fd0d69d53f727 /src/benchmarks/rdtsc/rdtsc.c
parentf7c6f7142e38e4bf42f95bb706c37c9ae61a04df (diff)
downloadallocbench-d58dc6c95d9044ffafa08b4327f5abbf0f5b54e0.tar.gz
allocbench-d58dc6c95d9044ffafa08b4327f5abbf0f5b54e0.zip
add micro benchmark measureing malloc using rdtsc
Diffstat (limited to 'src/benchmarks/rdtsc/rdtsc.c')
-rw-r--r--src/benchmarks/rdtsc/rdtsc.c104
1 files changed, 104 insertions, 0 deletions
diff --git a/src/benchmarks/rdtsc/rdtsc.c b/src/benchmarks/rdtsc/rdtsc.c
new file mode 100644
index 0000000..33a8626
--- /dev/null
+++ b/src/benchmarks/rdtsc/rdtsc.c
@@ -0,0 +1,104 @@
+#define _GNU_SOURCE /* See feature_test_macros(7) */
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/sysinfo.h>
+
+int mode = 0;
+int size = 64;
+int iterations = 100000;
+int num_cpus;
+
+static __inline__ int64_t rdtsc_s(void)
+{
+ unsigned a, d;
+ asm volatile("cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx");
+ asm volatile("rdtsc" : "=a" (a), "=d" (d));
+ return ((unsigned long)a) | (((unsigned long)d) << 32);
+}
+
+static __inline__ int64_t rdtsc_e(void)
+{
+ unsigned a, d;
+ asm volatile("rdtscp" : "=a" (a), "=d" (d));
+ asm volatile("cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx");
+ return ((unsigned long)a) | (((unsigned long)d) << 32);
+}
+
+static void* test_thread_func(void* arg) {
+ int64_t clock_before, clock_after;
+ void* p;
+
+ int64_t* clocks = malloc(iterations * sizeof(int64_t));
+ if (!clocks)
+ abort();
+
+ // set cpu affinity to prevent cpu switching
+ int64_t tid = (int64_t) arg;
+ cpu_set_t my_cpu;
+ /* Skip CPU0 - let the OS run on that one */
+ int my_cpu_num = (tid % (num_cpus-1))+1;
+
+ CPU_ZERO (&my_cpu);
+ /* CPU_SET (my_cpu_num, &my_cpu); */
+ CPU_SET (3, &my_cpu);
+ if (sched_setaffinity (0, sizeof(my_cpu), &my_cpu) == -1)
+ perror ("setaffinity failed");
+
+ for(int i = 0; i < iterations; i++) {
+ clock_before = rdtsc_s();
+ p = malloc(size);
+ clock_after = rdtsc_e();
+
+ // measure potentially cached allocations
+ if (mode)
+ free(p);
+
+ clocks[i] = clock_after - clock_before;
+ }
+
+ for(int i = 0; i < iterations; i++) {
+ printf("malloc(%d): %d cycles\n", size, clocks[i]);
+ }
+
+ return NULL;
+}
+
+int main(int argc, char* argv[]) {
+ pthread_t* threads;
+ int num_threads = 1;
+
+ num_cpus = get_nprocs();
+
+ if (argc > 5) {
+ fprintf(stderr, "Usage: %s <iterations> <size> <num threads>\n", argv[0]);
+ return 1;
+ }
+
+ if (argc > 1 && strncmp(argv[1], "cached", strlen("cached"))) mode = 1;
+ if (argc > 2) iterations = atoi(argv[2]);
+ if (argc > 3) size = atoi(argv[3]);
+ if (argc > 4) num_threads = atoi(argv[4]);
+
+ fprintf(stderr, "iterations = %d; size = %d; threads = %d\n", iterations, size, num_threads);
+
+ threads = (pthread_t*) malloc(num_threads * sizeof(pthread_t));
+
+ for (int i = 0; i < num_threads; i++) {
+ if (0 != pthread_create(&threads[i], NULL, test_thread_func, NULL)) {
+ perror("pthread_create");
+ return 1;
+ }
+ }
+
+ for(int i = 0; i < num_threads; i++) {
+ if (0 != pthread_join(threads[i], NULL)) {
+ perror("pthread_join");
+ return 1;
+ }
+ }
+
+ return 0;
+}