From 2674da763577c71a05be0145569056228b2eee29 Mon Sep 17 00:00:00 2001 From: Robin Nehls Date: Sat, 21 May 2011 11:40:18 +0200 Subject: initial commit --- bench | 2 ++ inlineasm.h | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test.c | 8 ++++++ test32.c | 8 ++++++ 4 files changed, 113 insertions(+) create mode 100644 bench create mode 100644 inlineasm.h create mode 100644 test.c create mode 100644 test32.c diff --git a/bench b/bench new file mode 100644 index 0000000..26195cb --- /dev/null +++ b/bench @@ -0,0 +1,2 @@ +for i in $(seq 1 1000) ; do ./mem64 ; done | sort | uniq -c | awk '{ sum += ($1 * $2) } END { print sum }' +for i in $(seq 1 1000) ; do ./mem32 ; done | awk '{ sum += $1 ; counter+=1 ; print sum/counter }' diff --git a/inlineasm.h b/inlineasm.h new file mode 100644 index 0000000..af466d3 --- /dev/null +++ b/inlineasm.h @@ -0,0 +1,95 @@ +#include + +#if defined(__i386__) + +//averages at about 80 ticks with an offset of 1 +static uint32_t optmemmeasure(char* memory, uint32_t offset) +{ + asm( + "rdtsc ;" + "mov %%eax, %%ebp ;" + + "cmpb $0x23, (%%ebx) ;" + "cmpb $0x42, (%%ebx, %%ecx) ;" + + "rdtsc ;" + "sub %%ebp, %%eax ;" + : : "b" (memory), "c" (offset) + ); +} + +static uint64_t rdtsc() +{ + uint64_t rdtsc; + + asm( + "rdtsc ;" + : "=A" (rdtsc) + ); + + return rdtsc; +} + +#elif defined(__x86_64__) + +// this implementation uses all of the data provided by rdtsc but uses +// more instrucions +// averages at about 77.3 ticks with an offset of 1 +static uint64_t memmeasure(char* memory, uint64_t offset) +{ + asm( + "rdtsc ;" + "shl $32, %%rdx ;" + "add %%rax, %%rdx ;" + "mov %%rdx, %%rdi ;" + + // here be magic dragons and memory access (read segfaults) ahead + // TODO: evaluate if more cmp types (like w and l) do make sense + "cmpb $0x23, (%%rbx) ;" + "cmpb $0x42, (%%rbx,%%rcx) ;" + + "rdtsc ;" + "shl $32, %%rdx ;" + "add %%rdx, %%rax ;" + "sub %%rdi, %%rax ;" //result in rax with is the return value + : : "b" (memory), "c" (offset) + ); +} + +// this implementation only uses the lower part of the values retured by +// rdtsc to save instrucions. it is not significantly faster than the +// accurate one but it has fewer instrucions and by that is less likely +// to be delayed by the scheduler +// averages at about 72.5 ticks with an offset of 1 +static uint64_t optmemmeasure(char* memory, uint64_t offset) +{ + asm( + "rdtsc ;" + "mov %%eax, %%edi ;" + + // here be magic dragons and memory access (read segfaults) ahead + // TODO: evaluate if more cmp types (like w and l) do make sense + "cmpb $0x23, (%%rbx) ;" + "cmpb $0x42, (%%rbx,%%rcx) ;" + + "rdtsc ;" + "sub %%edi, %%eax ;" + : : "b" (memory), "c" (offset) + ); +} + +// smal implentation to get the rdtsc counter +static uint64_t rdtsc() +{ + asm( + "rdtsc ;" + "shl $32, %rdx ;" + "add %rdx, %rax" + ); +} + +#else + +#error "This code only supports x86 and x86_64" + +#endif diff --git a/test.c b/test.c new file mode 100644 index 0000000..b97200a --- /dev/null +++ b/test.c @@ -0,0 +1,8 @@ +#include +#include "inlineasm.h" + +int main(int argc, char* argv[]) { + unsigned long long time = optmemmeasure(argv[0],1); + printf("%lld\n",time); + return 0; +} diff --git a/test32.c b/test32.c new file mode 100644 index 0000000..a78f27f --- /dev/null +++ b/test32.c @@ -0,0 +1,8 @@ +#include +#include "inlineasm.h" + +int main(int argc, char* argv[]) { + uint32_t time = optmemmeasure(argv[0],2); + printf("%d\n",time); + return 0; +} -- cgit v1.2.3-1-g7c22