summaryrefslogtreecommitdiffstats
path: root/inlineasm.h
blob: d1cb1384baa9a389941572db56669b2c50e1270c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#include <stdint.h>

#if defined(__i386__)

static uint32_t optmemmeasure(char* memory, uint32_t offset)
{
    asm(
        "rdtsc ;"
        "mov %%eax, %%ebp ;"

        "movb (%%ebx), %%al ;"
        "xor (%%ebx,%%ecx), %%eax ;"
        "movb %%al, (%%ebx) ;"
        "movb %%al, (%%ebx,%%ecx) ;"

        "rdtsc ;"
        "sub %%ebp, %%eax ;"
        : : "b" (memory), "c" (offset)
       );
}

// we did no acurate implementation on 32 bit x86
static uint32_t memmeasure(char* memory, uint32_t offset)
{
   return optmemmeasure(memory, offset);
}


static uint64_t rdtsc()
{
    uint64_t rdtsc;

    asm(
        "rdtsc ;"
        : "=A" (rdtsc)
       );

    return rdtsc;
}

#elif defined(__x86_64__)

// this implementation uses all of the data provided by rdtsc but uses
// more instrucions
static uint64_t memmeasure(char* memory, uint64_t offset)
{
    asm(
        "rdtsc ;"
        "shl $32, %%rdx ;"
        "add %%rax, %%rdx ;"
        "mov %%rdx, %%rdi ;"

        // here be magic dragons and memory access (read segfaults) ahead
        "movb (%%rbx), %%al ;"
        "xor (%%rbx,%%rcx), %%rax ;"
        "movb %%al, (%%rbx) ;"
        "movb %%al, (%%rbx,%%rcx) ;"

        "rdtsc ;"
        "shl $32, %%rdx ;"
        "add %%rdx, %%rax ;"
        "sub %%rdi, %%rax ;" //result in rax with is the return value
        : : "b" (memory), "c" (offset)
       );
}

// this implementation only uses the lower part of the values retured by
// rdtsc to save instrucions. it is not significantly faster than the
// accurate one but it has fewer instrucions and by that is less likely
// to be delayed by the scheduler
static uint64_t  optmemmeasure(char* memory, uint64_t offset)
{
    asm(
        "rdtsc ;"
        "mov %%eax, %%edi ;"

        // here be magic dragons and memory access (read segfaults) ahead
        "movb (%%rbx), %%al ;"
        "xor (%%rbx,%%rcx), %%rax ;"
        "movb %%al, (%%rbx) ;"
        "movb %%al, (%%rbx,%%rcx) ;"

        "rdtsc ;"
        "sub %%edi, %%eax ;"
        : : "b" (memory), "c" (offset)
       );
}

// smal implentation to get the rdtsc counter
static uint64_t rdtsc()
{
    asm(
        "rdtsc ;"
        "shl $32, %rdx ;"
        "add %rdx, %rax"
       );
}

#else

#error "This code only supports x86 and x86_64"

#endif