summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s')
-rw-r--r--vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s497
1 files changed, 497 insertions, 0 deletions
diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s b/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s
new file mode 100644
index 000000000..f8d4ee928
--- /dev/null
+++ b/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s
@@ -0,0 +1,497 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This code was translated into a form compatible with 6a from the public
+// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
+
+// +build amd64,!gccgo,!appengine
+
+// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
+TEXT ·poly1305(SB),0,$224-32
+ MOVQ out+0(FP),DI
+ MOVQ m+8(FP),SI
+ MOVQ mlen+16(FP),DX
+ MOVQ key+24(FP),CX
+
+ MOVQ SP,R11
+ MOVQ $31,R9
+ NOTQ R9
+ ANDQ R9,SP
+ ADDQ $32,SP
+
+ MOVQ R11,32(SP)
+ MOVQ R12,40(SP)
+ MOVQ R13,48(SP)
+ MOVQ R14,56(SP)
+ MOVQ R15,64(SP)
+ MOVQ BX,72(SP)
+ MOVQ BP,80(SP)
+ FLDCW ·ROUNDING(SB)
+ MOVL 0(CX),R8
+ MOVL 4(CX),R9
+ MOVL 8(CX),AX
+ MOVL 12(CX),R10
+ MOVQ DI,88(SP)
+ MOVQ CX,96(SP)
+ MOVL $0X43300000,108(SP)
+ MOVL $0X45300000,116(SP)
+ MOVL $0X47300000,124(SP)
+ MOVL $0X49300000,132(SP)
+ ANDL $0X0FFFFFFF,R8
+ ANDL $0X0FFFFFFC,R9
+ ANDL $0X0FFFFFFC,AX
+ ANDL $0X0FFFFFFC,R10
+ MOVL R8,104(SP)
+ MOVL R9,112(SP)
+ MOVL AX,120(SP)
+ MOVL R10,128(SP)
+ FMOVD 104(SP), F0
+ FSUBD ·DOFFSET0(SB), F0
+ FMOVD 112(SP), F0
+ FSUBD ·DOFFSET1(SB), F0
+ FMOVD 120(SP), F0
+ FSUBD ·DOFFSET2(SB), F0
+ FMOVD 128(SP), F0
+ FSUBD ·DOFFSET3(SB), F0
+ FXCHD F0, F3
+ FMOVDP F0, 136(SP)
+ FXCHD F0, F1
+ FMOVD F0, 144(SP)
+ FMULD ·SCALE(SB), F0
+ FMOVDP F0, 152(SP)
+ FMOVD F0, 160(SP)
+ FMULD ·SCALE(SB), F0
+ FMOVDP F0, 168(SP)
+ FMOVD F0, 176(SP)
+ FMULD ·SCALE(SB), F0
+ FMOVDP F0, 184(SP)
+ FLDZ
+ FLDZ
+ FLDZ
+ FLDZ
+ CMPQ DX,$16
+ JB ADDATMOST15BYTES
+ INITIALATLEAST16BYTES:
+ MOVL 12(SI),DI
+ MOVL 8(SI),CX
+ MOVL 4(SI),R8
+ MOVL 0(SI),R9
+ MOVL DI,128(SP)
+ MOVL CX,120(SP)
+ MOVL R8,112(SP)
+ MOVL R9,104(SP)
+ ADDQ $16,SI
+ SUBQ $16,DX
+ FXCHD F0, F3
+ FADDD 128(SP), F0
+ FSUBD ·DOFFSET3MINUSTWO128(SB), F0
+ FXCHD F0, F1
+ FADDD 112(SP), F0
+ FSUBD ·DOFFSET1(SB), F0
+ FXCHD F0, F2
+ FADDD 120(SP), F0
+ FSUBD ·DOFFSET2(SB), F0
+ FXCHD F0, F3
+ FADDD 104(SP), F0
+ FSUBD ·DOFFSET0(SB), F0
+ CMPQ DX,$16
+ JB MULTIPLYADDATMOST15BYTES
+ MULTIPLYADDATLEAST16BYTES:
+ MOVL 12(SI),DI
+ MOVL 8(SI),CX
+ MOVL 4(SI),R8
+ MOVL 0(SI),R9
+ MOVL DI,128(SP)
+ MOVL CX,120(SP)
+ MOVL R8,112(SP)
+ MOVL R9,104(SP)
+ ADDQ $16,SI
+ SUBQ $16,DX
+ FMOVD ·ALPHA130(SB), F0
+ FADDD F2,F0
+ FSUBD ·ALPHA130(SB), F0
+ FSUBD F0,F2
+ FMULD ·SCALE(SB), F0
+ FMOVD ·ALPHA32(SB), F0
+ FADDD F2,F0
+ FSUBD ·ALPHA32(SB), F0
+ FSUBD F0,F2
+ FXCHD F0, F2
+ FADDDP F0,F1
+ FMOVD ·ALPHA64(SB), F0
+ FADDD F4,F0
+ FSUBD ·ALPHA64(SB), F0
+ FSUBD F0,F4
+ FMOVD ·ALPHA96(SB), F0
+ FADDD F6,F0
+ FSUBD ·ALPHA96(SB), F0
+ FSUBD F0,F6
+ FXCHD F0, F6
+ FADDDP F0,F1
+ FXCHD F0, F3
+ FADDDP F0,F5
+ FXCHD F0, F3
+ FADDDP F0,F1
+ FMOVD 176(SP), F0
+ FMULD F3,F0
+ FMOVD 160(SP), F0
+ FMULD F4,F0
+ FMOVD 144(SP), F0
+ FMULD F5,F0
+ FMOVD 136(SP), F0
+ FMULDP F0,F6
+ FMOVD 160(SP), F0
+ FMULD F4,F0
+ FADDDP F0,F3
+ FMOVD 144(SP), F0
+ FMULD F4,F0
+ FADDDP F0,F2
+ FMOVD 136(SP), F0
+ FMULD F4,F0
+ FADDDP F0,F1
+ FMOVD 184(SP), F0
+ FMULDP F0,F4
+ FXCHD F0, F3
+ FADDDP F0,F5
+ FMOVD 144(SP), F0
+ FMULD F4,F0
+ FADDDP F0,F2
+ FMOVD 136(SP), F0
+ FMULD F4,F0
+ FADDDP F0,F1
+ FMOVD 184(SP), F0
+ FMULD F4,F0
+ FADDDP F0,F3
+ FMOVD 168(SP), F0
+ FMULDP F0,F4
+ FXCHD F0, F3
+ FADDDP F0,F4
+ FMOVD 136(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F1
+ FXCHD F0, F3
+ FMOVD 184(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F3
+ FXCHD F0, F1
+ FMOVD 168(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F1
+ FMOVD 152(SP), F0
+ FMULDP F0,F5
+ FXCHD F0, F4
+ FADDDP F0,F1
+ CMPQ DX,$16
+ FXCHD F0, F2
+ FMOVD 128(SP), F0
+ FSUBD ·DOFFSET3MINUSTWO128(SB), F0
+ FADDDP F0,F1
+ FXCHD F0, F1
+ FMOVD 120(SP), F0
+ FSUBD ·DOFFSET2(SB), F0
+ FADDDP F0,F1
+ FXCHD F0, F3
+ FMOVD 112(SP), F0
+ FSUBD ·DOFFSET1(SB), F0
+ FADDDP F0,F1
+ FXCHD F0, F2
+ FMOVD 104(SP), F0
+ FSUBD ·DOFFSET0(SB), F0
+ FADDDP F0,F1
+ JAE MULTIPLYADDATLEAST16BYTES
+ MULTIPLYADDATMOST15BYTES:
+ FMOVD ·ALPHA130(SB), F0
+ FADDD F2,F0
+ FSUBD ·ALPHA130(SB), F0
+ FSUBD F0,F2
+ FMULD ·SCALE(SB), F0
+ FMOVD ·ALPHA32(SB), F0
+ FADDD F2,F0
+ FSUBD ·ALPHA32(SB), F0
+ FSUBD F0,F2
+ FMOVD ·ALPHA64(SB), F0
+ FADDD F5,F0
+ FSUBD ·ALPHA64(SB), F0
+ FSUBD F0,F5
+ FMOVD ·ALPHA96(SB), F0
+ FADDD F7,F0
+ FSUBD ·ALPHA96(SB), F0
+ FSUBD F0,F7
+ FXCHD F0, F7
+ FADDDP F0,F1
+ FXCHD F0, F5
+ FADDDP F0,F1
+ FXCHD F0, F3
+ FADDDP F0,F5
+ FADDDP F0,F1
+ FMOVD 176(SP), F0
+ FMULD F1,F0
+ FMOVD 160(SP), F0
+ FMULD F2,F0
+ FMOVD 144(SP), F0
+ FMULD F3,F0
+ FMOVD 136(SP), F0
+ FMULDP F0,F4
+ FMOVD 160(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F3
+ FMOVD 144(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F2
+ FMOVD 136(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F1
+ FMOVD 184(SP), F0
+ FMULDP F0,F5
+ FXCHD F0, F4
+ FADDDP F0,F3
+ FMOVD 144(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F2
+ FMOVD 136(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F1
+ FMOVD 184(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F4
+ FMOVD 168(SP), F0
+ FMULDP F0,F5
+ FXCHD F0, F4
+ FADDDP F0,F2
+ FMOVD 136(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F1
+ FMOVD 184(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F4
+ FMOVD 168(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F3
+ FMOVD 152(SP), F0
+ FMULDP F0,F5
+ FXCHD F0, F4
+ FADDDP F0,F1
+ ADDATMOST15BYTES:
+ CMPQ DX,$0
+ JE NOMOREBYTES
+ MOVL $0,0(SP)
+ MOVL $0, 4 (SP)
+ MOVL $0, 8 (SP)
+ MOVL $0, 12 (SP)
+ LEAQ 0(SP),DI
+ MOVQ DX,CX
+ REP; MOVSB
+ MOVB $1,0(DI)
+ MOVL 12 (SP),DI
+ MOVL 8 (SP),SI
+ MOVL 4 (SP),DX
+ MOVL 0(SP),CX
+ MOVL DI,128(SP)
+ MOVL SI,120(SP)
+ MOVL DX,112(SP)
+ MOVL CX,104(SP)
+ FXCHD F0, F3
+ FADDD 128(SP), F0
+ FSUBD ·DOFFSET3(SB), F0
+ FXCHD F0, F2
+ FADDD 120(SP), F0
+ FSUBD ·DOFFSET2(SB), F0
+ FXCHD F0, F1
+ FADDD 112(SP), F0
+ FSUBD ·DOFFSET1(SB), F0
+ FXCHD F0, F3
+ FADDD 104(SP), F0
+ FSUBD ·DOFFSET0(SB), F0
+ FMOVD ·ALPHA130(SB), F0
+ FADDD F3,F0
+ FSUBD ·ALPHA130(SB), F0
+ FSUBD F0,F3
+ FMULD ·SCALE(SB), F0
+ FMOVD ·ALPHA32(SB), F0
+ FADDD F2,F0
+ FSUBD ·ALPHA32(SB), F0
+ FSUBD F0,F2
+ FMOVD ·ALPHA64(SB), F0
+ FADDD F6,F0
+ FSUBD ·ALPHA64(SB), F0
+ FSUBD F0,F6
+ FMOVD ·ALPHA96(SB), F0
+ FADDD F5,F0
+ FSUBD ·ALPHA96(SB), F0
+ FSUBD F0,F5
+ FXCHD F0, F4
+ FADDDP F0,F3
+ FXCHD F0, F6
+ FADDDP F0,F1
+ FXCHD F0, F3
+ FADDDP F0,F5
+ FXCHD F0, F3
+ FADDDP F0,F1
+ FMOVD 176(SP), F0
+ FMULD F3,F0
+ FMOVD 160(SP), F0
+ FMULD F4,F0
+ FMOVD 144(SP), F0
+ FMULD F5,F0
+ FMOVD 136(SP), F0
+ FMULDP F0,F6
+ FMOVD 160(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F3
+ FMOVD 144(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F2
+ FMOVD 136(SP), F0
+ FMULD F5,F0
+ FADDDP F0,F1
+ FMOVD 184(SP), F0
+ FMULDP F0,F5
+ FXCHD F0, F4
+ FADDDP F0,F5
+ FMOVD 144(SP), F0
+ FMULD F6,F0
+ FADDDP F0,F2
+ FMOVD 136(SP), F0
+ FMULD F6,F0
+ FADDDP F0,F1
+ FMOVD 184(SP), F0
+ FMULD F6,F0
+ FADDDP F0,F4
+ FMOVD 168(SP), F0
+ FMULDP F0,F6
+ FXCHD F0, F5
+ FADDDP F0,F4
+ FMOVD 136(SP), F0
+ FMULD F2,F0
+ FADDDP F0,F1
+ FMOVD 184(SP), F0
+ FMULD F2,F0
+ FADDDP F0,F5
+ FMOVD 168(SP), F0
+ FMULD F2,F0
+ FADDDP F0,F3
+ FMOVD 152(SP), F0
+ FMULDP F0,F2
+ FXCHD F0, F1
+ FADDDP F0,F3
+ FXCHD F0, F3
+ FXCHD F0, F2
+ NOMOREBYTES:
+ MOVL $0,R10
+ FMOVD ·ALPHA130(SB), F0
+ FADDD F4,F0
+ FSUBD ·ALPHA130(SB), F0
+ FSUBD F0,F4
+ FMULD ·SCALE(SB), F0
+ FMOVD ·ALPHA32(SB), F0
+ FADDD F2,F0
+ FSUBD ·ALPHA32(SB), F0
+ FSUBD F0,F2
+ FMOVD ·ALPHA64(SB), F0
+ FADDD F4,F0
+ FSUBD ·ALPHA64(SB), F0
+ FSUBD F0,F4
+ FMOVD ·ALPHA96(SB), F0
+ FADDD F6,F0
+ FSUBD ·ALPHA96(SB), F0
+ FXCHD F0, F6
+ FSUBD F6,F0
+ FXCHD F0, F4
+ FADDDP F0,F3
+ FXCHD F0, F4
+ FADDDP F0,F1
+ FXCHD F0, F2
+ FADDDP F0,F3
+ FXCHD F0, F4
+ FADDDP F0,F3
+ FXCHD F0, F3
+ FADDD ·HOFFSET0(SB), F0
+ FXCHD F0, F3
+ FADDD ·HOFFSET1(SB), F0
+ FXCHD F0, F1
+ FADDD ·HOFFSET2(SB), F0
+ FXCHD F0, F2
+ FADDD ·HOFFSET3(SB), F0
+ FXCHD F0, F3
+ FMOVDP F0, 104(SP)
+ FMOVDP F0, 112(SP)
+ FMOVDP F0, 120(SP)
+ FMOVDP F0, 128(SP)
+ MOVL 108(SP),DI
+ ANDL $63,DI
+ MOVL 116(SP),SI
+ ANDL $63,SI
+ MOVL 124(SP),DX
+ ANDL $63,DX
+ MOVL 132(SP),CX
+ ANDL $63,CX
+ MOVL 112(SP),R8
+ ADDL DI,R8
+ MOVQ R8,112(SP)
+ MOVL 120(SP),DI
+ ADCL SI,DI
+ MOVQ DI,120(SP)
+ MOVL 128(SP),DI
+ ADCL DX,DI
+ MOVQ DI,128(SP)
+ MOVL R10,DI
+ ADCL CX,DI
+ MOVQ DI,136(SP)
+ MOVQ $5,DI
+ MOVL 104(SP),SI
+ ADDL SI,DI
+ MOVQ DI,104(SP)
+ MOVL R10,DI
+ MOVQ 112(SP),DX
+ ADCL DX,DI
+ MOVQ DI,112(SP)
+ MOVL R10,DI
+ MOVQ 120(SP),CX
+ ADCL CX,DI
+ MOVQ DI,120(SP)
+ MOVL R10,DI
+ MOVQ 128(SP),R8
+ ADCL R8,DI
+ MOVQ DI,128(SP)
+ MOVQ $0XFFFFFFFC,DI
+ MOVQ 136(SP),R9
+ ADCL R9,DI
+ SARL $16,DI
+ MOVQ DI,R9
+ XORL $0XFFFFFFFF,R9
+ ANDQ DI,SI
+ MOVQ 104(SP),AX
+ ANDQ R9,AX
+ ORQ AX,SI
+ ANDQ DI,DX
+ MOVQ 112(SP),AX
+ ANDQ R9,AX
+ ORQ AX,DX
+ ANDQ DI,CX
+ MOVQ 120(SP),AX
+ ANDQ R9,AX
+ ORQ AX,CX
+ ANDQ DI,R8
+ MOVQ 128(SP),DI
+ ANDQ R9,DI
+ ORQ DI,R8
+ MOVQ 88(SP),DI
+ MOVQ 96(SP),R9
+ ADDL 16(R9),SI
+ ADCL 20(R9),DX
+ ADCL 24(R9),CX
+ ADCL 28(R9),R8
+ MOVL SI,0(DI)
+ MOVL DX,4(DI)
+ MOVL CX,8(DI)
+ MOVL R8,12(DI)
+ MOVQ 32(SP),R11
+ MOVQ 40(SP),R12
+ MOVQ 48(SP),R13
+ MOVQ 56(SP),R14
+ MOVQ 64(SP),R15
+ MOVQ 72(SP),BX
+ MOVQ 80(SP),BP
+ MOVQ R11,SP
+ RET