summaryrefslogtreecommitdiffstats
path: root/inlineasm.h
diff options
context:
space:
mode:
authorRobin Nehls <nehls@mi.fu-berlin.de>2011-05-21 11:40:18 +0200
committerRobin Nehls <nehls@mi.fu-berlin.de>2011-05-21 11:40:18 +0200
commit2674da763577c71a05be0145569056228b2eee29 (patch)
treef0eefae3e5da602cd697dd9c35b0439469918ea8 /inlineasm.h
downloadmanycore-2674da763577c71a05be0145569056228b2eee29.tar.gz
manycore-2674da763577c71a05be0145569056228b2eee29.tar.bz2
manycore-2674da763577c71a05be0145569056228b2eee29.zip
initial commit
Diffstat (limited to 'inlineasm.h')
-rw-r--r--inlineasm.h95
1 files changed, 95 insertions, 0 deletions
diff --git a/inlineasm.h b/inlineasm.h
new file mode 100644
index 0000000..af466d3
--- /dev/null
+++ b/inlineasm.h
@@ -0,0 +1,95 @@
+#include <stdint.h>
+
+#if defined(__i386__)
+
+//averages at about 80 ticks with an offset of 1
+static uint32_t optmemmeasure(char* memory, uint32_t offset)
+{
+ asm(
+ "rdtsc ;"
+ "mov %%eax, %%ebp ;"
+
+ "cmpb $0x23, (%%ebx) ;"
+ "cmpb $0x42, (%%ebx, %%ecx) ;"
+
+ "rdtsc ;"
+ "sub %%ebp, %%eax ;"
+ : : "b" (memory), "c" (offset)
+ );
+}
+
+static uint64_t rdtsc()
+{
+ uint64_t rdtsc;
+
+ asm(
+ "rdtsc ;"
+ : "=A" (rdtsc)
+ );
+
+ return rdtsc;
+}
+
+#elif defined(__x86_64__)
+
+// this implementation uses all of the data provided by rdtsc but uses
+// more instrucions
+// averages at about 77.3 ticks with an offset of 1
+static uint64_t memmeasure(char* memory, uint64_t offset)
+{
+ asm(
+ "rdtsc ;"
+ "shl $32, %%rdx ;"
+ "add %%rax, %%rdx ;"
+ "mov %%rdx, %%rdi ;"
+
+ // here be magic dragons and memory access (read segfaults) ahead
+ // TODO: evaluate if more cmp types (like w and l) do make sense
+ "cmpb $0x23, (%%rbx) ;"
+ "cmpb $0x42, (%%rbx,%%rcx) ;"
+
+ "rdtsc ;"
+ "shl $32, %%rdx ;"
+ "add %%rdx, %%rax ;"
+ "sub %%rdi, %%rax ;" //result in rax with is the return value
+ : : "b" (memory), "c" (offset)
+ );
+}
+
+// this implementation only uses the lower part of the values retured by
+// rdtsc to save instrucions. it is not significantly faster than the
+// accurate one but it has fewer instrucions and by that is less likely
+// to be delayed by the scheduler
+// averages at about 72.5 ticks with an offset of 1
+static uint64_t optmemmeasure(char* memory, uint64_t offset)
+{
+ asm(
+ "rdtsc ;"
+ "mov %%eax, %%edi ;"
+
+ // here be magic dragons and memory access (read segfaults) ahead
+ // TODO: evaluate if more cmp types (like w and l) do make sense
+ "cmpb $0x23, (%%rbx) ;"
+ "cmpb $0x42, (%%rbx,%%rcx) ;"
+
+ "rdtsc ;"
+ "sub %%edi, %%eax ;"
+ : : "b" (memory), "c" (offset)
+ );
+}
+
+// smal implentation to get the rdtsc counter
+static uint64_t rdtsc()
+{
+ asm(
+ "rdtsc ;"
+ "shl $32, %rdx ;"
+ "add %rdx, %rax"
+ );
+}
+
+#else
+
+#error "This code only supports x86 and x86_64"
+
+#endif