#include #if defined(__i386__) static uint32_t optmemmeasure(char* memory, uint32_t offset) { asm( "rdtsc ;" "mov %%eax, %%ebp ;" "movb (%%ebx), %%al ;" "xor (%%ebx,%%ecx), %%eax ;" "movb %%al, (%%ebx) ;" "movb %%al, (%%ebx,%%ecx) ;" "rdtsc ;" "sub %%ebp, %%eax ;" : : "b" (memory), "c" (offset) ); } // we did no acurate implementation on 32 bit x86 static uint32_t memmeasure(char* memory, uint32_t offset) { return optmemmeasure(memory, offset); } static uint64_t rdtsc() { uint64_t rdtsc; asm( "rdtsc ;" : "=A" (rdtsc) ); return rdtsc; } #elif defined(__x86_64__) // this implementation uses all of the data provided by rdtsc but uses // more instrucions static uint64_t memmeasure(char* memory, uint64_t offset) { asm( "rdtsc ;" "shl $32, %%rdx ;" "add %%rax, %%rdx ;" "mov %%rdx, %%rdi ;" // here be magic dragons and memory access (read segfaults) ahead "movb (%%rbx), %%al ;" "xor (%%rbx,%%rcx), %%rax ;" "movb %%al, (%%rbx) ;" "movb %%al, (%%rbx,%%rcx) ;" "rdtsc ;" "shl $32, %%rdx ;" "add %%rdx, %%rax ;" "sub %%rdi, %%rax ;" //result in rax with is the return value : : "b" (memory), "c" (offset) ); } // this implementation only uses the lower part of the values retured by // rdtsc to save instrucions. it is not significantly faster than the // accurate one but it has fewer instrucions and by that is less likely // to be delayed by the scheduler static uint64_t optmemmeasure(char* memory, uint64_t offset) { asm( "rdtsc ;" "mov %%eax, %%edi ;" // here be magic dragons and memory access (read segfaults) ahead "movb (%%rbx), %%al ;" "xor (%%rbx,%%rcx), %%rax ;" "movb %%al, (%%rbx) ;" "movb %%al, (%%rbx,%%rcx) ;" "rdtsc ;" "sub %%edi, %%eax ;" : : "b" (memory), "c" (offset) ); } // smal implentation to get the rdtsc counter static uint64_t rdtsc() { asm( "rdtsc ;" "shl $32, %rdx ;" "add %rdx, %rax" ); } #else #error "This code only supports x86 and x86_64" #endif