summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobin Nehls <nehls@mi.fu-berlin.de>2011-05-21 11:40:18 +0200
committerRobin Nehls <nehls@mi.fu-berlin.de>2011-05-21 11:40:18 +0200
commit2674da763577c71a05be0145569056228b2eee29 (patch)
treef0eefae3e5da602cd697dd9c35b0439469918ea8
downloadmanycore-2674da763577c71a05be0145569056228b2eee29.tar.gz
manycore-2674da763577c71a05be0145569056228b2eee29.tar.bz2
manycore-2674da763577c71a05be0145569056228b2eee29.zip
initial commit
-rw-r--r--bench2
-rw-r--r--inlineasm.h95
-rw-r--r--test.c8
-rw-r--r--test32.c8
4 files changed, 113 insertions, 0 deletions
diff --git a/bench b/bench
new file mode 100644
index 0000000..26195cb
--- /dev/null
+++ b/bench
@@ -0,0 +1,2 @@
+for i in $(seq 1 1000) ; do ./mem64 ; done | sort | uniq -c | awk '{ sum += ($1 * $2) } END { print sum }'
+for i in $(seq 1 1000) ; do ./mem32 ; done | awk '{ sum += $1 ; counter+=1 ; print sum/counter }'
diff --git a/inlineasm.h b/inlineasm.h
new file mode 100644
index 0000000..af466d3
--- /dev/null
+++ b/inlineasm.h
@@ -0,0 +1,95 @@
+#include <stdint.h>
+
+#if defined(__i386__)
+
+//averages at about 80 ticks with an offset of 1
+static uint32_t optmemmeasure(char* memory, uint32_t offset)
+{
+ asm(
+ "rdtsc ;"
+ "mov %%eax, %%ebp ;"
+
+ "cmpb $0x23, (%%ebx) ;"
+ "cmpb $0x42, (%%ebx, %%ecx) ;"
+
+ "rdtsc ;"
+ "sub %%ebp, %%eax ;"
+ : : "b" (memory), "c" (offset)
+ );
+}
+
+static uint64_t rdtsc()
+{
+ uint64_t rdtsc;
+
+ asm(
+ "rdtsc ;"
+ : "=A" (rdtsc)
+ );
+
+ return rdtsc;
+}
+
+#elif defined(__x86_64__)
+
+// this implementation uses all of the data provided by rdtsc but uses
+// more instrucions
+// averages at about 77.3 ticks with an offset of 1
+static uint64_t memmeasure(char* memory, uint64_t offset)
+{
+ asm(
+ "rdtsc ;"
+ "shl $32, %%rdx ;"
+ "add %%rax, %%rdx ;"
+ "mov %%rdx, %%rdi ;"
+
+ // here be magic dragons and memory access (read segfaults) ahead
+ // TODO: evaluate if more cmp types (like w and l) do make sense
+ "cmpb $0x23, (%%rbx) ;"
+ "cmpb $0x42, (%%rbx,%%rcx) ;"
+
+ "rdtsc ;"
+ "shl $32, %%rdx ;"
+ "add %%rdx, %%rax ;"
+ "sub %%rdi, %%rax ;" //result in rax with is the return value
+ : : "b" (memory), "c" (offset)
+ );
+}
+
+// this implementation only uses the lower part of the values retured by
+// rdtsc to save instrucions. it is not significantly faster than the
+// accurate one but it has fewer instrucions and by that is less likely
+// to be delayed by the scheduler
+// averages at about 72.5 ticks with an offset of 1
+static uint64_t optmemmeasure(char* memory, uint64_t offset)
+{
+ asm(
+ "rdtsc ;"
+ "mov %%eax, %%edi ;"
+
+ // here be magic dragons and memory access (read segfaults) ahead
+ // TODO: evaluate if more cmp types (like w and l) do make sense
+ "cmpb $0x23, (%%rbx) ;"
+ "cmpb $0x42, (%%rbx,%%rcx) ;"
+
+ "rdtsc ;"
+ "sub %%edi, %%eax ;"
+ : : "b" (memory), "c" (offset)
+ );
+}
+
+// smal implentation to get the rdtsc counter
+static uint64_t rdtsc()
+{
+ asm(
+ "rdtsc ;"
+ "shl $32, %rdx ;"
+ "add %rdx, %rax"
+ );
+}
+
+#else
+
+#error "This code only supports x86 and x86_64"
+
+#endif
diff --git a/test.c b/test.c
new file mode 100644
index 0000000..b97200a
--- /dev/null
+++ b/test.c
@@ -0,0 +1,8 @@
+#include <stdio.h>
+#include "inlineasm.h"
+
+int main(int argc, char* argv[]) {
+ unsigned long long time = optmemmeasure(argv[0],1);
+ printf("%lld\n",time);
+ return 0;
+}
diff --git a/test32.c b/test32.c
new file mode 100644
index 0000000..a78f27f
--- /dev/null
+++ b/test32.c
@@ -0,0 +1,8 @@
+#include <stdio.h>
+#include "inlineasm.h"
+
+int main(int argc, char* argv[]) {
+ uint32_t time = optmemmeasure(argv[0],2);
+ printf("%d\n",time);
+ return 0;
+}