From 38ee83e45b4de7edf89bf9f0ef629eb4c6ad0fa8 Mon Sep 17 00:00:00 2001 From: Christopher Speller Date: Thu, 12 May 2016 23:56:07 -0400 Subject: Moving to glide --- .../golang.org/x/crypto/poly1305/poly1305_amd64.s | 497 +++++++++++++++++++++ 1 file changed, 497 insertions(+) create mode 100644 vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s (limited to 'vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s') diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s b/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s new file mode 100644 index 000000000..f8d4ee928 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s @@ -0,0 +1,497 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key) +TEXT ·poly1305(SB),0,$224-32 + MOVQ out+0(FP),DI + MOVQ m+8(FP),SI + MOVQ mlen+16(FP),DX + MOVQ key+24(FP),CX + + MOVQ SP,R11 + MOVQ $31,R9 + NOTQ R9 + ANDQ R9,SP + ADDQ $32,SP + + MOVQ R11,32(SP) + MOVQ R12,40(SP) + MOVQ R13,48(SP) + MOVQ R14,56(SP) + MOVQ R15,64(SP) + MOVQ BX,72(SP) + MOVQ BP,80(SP) + FLDCW ·ROUNDING(SB) + MOVL 0(CX),R8 + MOVL 4(CX),R9 + MOVL 8(CX),AX + MOVL 12(CX),R10 + MOVQ DI,88(SP) + MOVQ CX,96(SP) + MOVL $0X43300000,108(SP) + MOVL $0X45300000,116(SP) + MOVL $0X47300000,124(SP) + MOVL $0X49300000,132(SP) + ANDL $0X0FFFFFFF,R8 + ANDL $0X0FFFFFFC,R9 + ANDL $0X0FFFFFFC,AX + ANDL $0X0FFFFFFC,R10 + MOVL R8,104(SP) + MOVL R9,112(SP) + MOVL AX,120(SP) + MOVL R10,128(SP) + FMOVD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + FMOVD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FMOVD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FMOVD 128(SP), F0 + FSUBD ·DOFFSET3(SB), F0 + FXCHD F0, F3 + FMOVDP F0, 136(SP) + FXCHD F0, F1 + FMOVD F0, 144(SP) + FMULD ·SCALE(SB), F0 + FMOVDP F0, 152(SP) + FMOVD F0, 160(SP) + FMULD ·SCALE(SB), F0 + FMOVDP F0, 168(SP) + FMOVD F0, 176(SP) + FMULD ·SCALE(SB), F0 + FMOVDP F0, 184(SP) + FLDZ + FLDZ + FLDZ + FLDZ + CMPQ DX,$16 + JB ADDATMOST15BYTES + INITIALATLEAST16BYTES: + MOVL 12(SI),DI + MOVL 8(SI),CX + MOVL 4(SI),R8 + MOVL 0(SI),R9 + MOVL DI,128(SP) + MOVL CX,120(SP) + MOVL R8,112(SP) + MOVL R9,104(SP) + ADDQ $16,SI + SUBQ $16,DX + FXCHD F0, F3 + FADDD 128(SP), F0 + FSUBD ·DOFFSET3MINUSTWO128(SB), F0 + FXCHD F0, F1 + FADDD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FXCHD F0, F2 + FADDD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FXCHD F0, F3 + FADDD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + CMPQ DX,$16 + JB MULTIPLYADDATMOST15BYTES + MULTIPLYADDATLEAST16BYTES: + MOVL 12(SI),DI + MOVL 8(SI),CX + MOVL 4(SI),R8 + MOVL 0(SI),R9 + MOVL DI,128(SP) + MOVL CX,120(SP) + MOVL R8,112(SP) + MOVL R9,104(SP) + ADDQ $16,SI + SUBQ $16,DX + FMOVD ·ALPHA130(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F2 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FXCHD F0, F2 + FADDDP F0,F1 + FMOVD ·ALPHA64(SB), F0 + FADDD F4,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F4 + FMOVD ·ALPHA96(SB), F0 + FADDD F6,F0 + FSUBD ·ALPHA96(SB), F0 + FSUBD F0,F6 + FXCHD F0, F6 + FADDDP F0,F1 + FXCHD F0, F3 + FADDDP F0,F5 + FXCHD F0, F3 + FADDDP F0,F1 + FMOVD 176(SP), F0 + FMULD F3,F0 + FMOVD 160(SP), F0 + FMULD F4,F0 + FMOVD 144(SP), F0 + FMULD F5,F0 + FMOVD 136(SP), F0 + FMULDP F0,F6 + FMOVD 160(SP), F0 + FMULD F4,F0 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F4,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F4,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULDP F0,F4 + FXCHD F0, F3 + FADDDP F0,F5 + FMOVD 144(SP), F0 + FMULD F4,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F4,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F4,F0 + FADDDP F0,F3 + FMOVD 168(SP), F0 + FMULDP F0,F4 + FXCHD F0, F3 + FADDDP F0,F4 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FXCHD F0, F3 + FMOVD 184(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FXCHD F0, F1 + FMOVD 168(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 152(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F1 + CMPQ DX,$16 + FXCHD F0, F2 + FMOVD 128(SP), F0 + FSUBD ·DOFFSET3MINUSTWO128(SB), F0 + FADDDP F0,F1 + FXCHD F0, F1 + FMOVD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FADDDP F0,F1 + FXCHD F0, F3 + FMOVD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FADDDP F0,F1 + FXCHD F0, F2 + FMOVD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + FADDDP F0,F1 + JAE MULTIPLYADDATLEAST16BYTES + MULTIPLYADDATMOST15BYTES: + FMOVD ·ALPHA130(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F2 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FMOVD ·ALPHA64(SB), F0 + FADDD F5,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F5 + FMOVD ·ALPHA96(SB), F0 + FADDD F7,F0 + FSUBD ·ALPHA96(SB), F0 + FSUBD F0,F7 + FXCHD F0, F7 + FADDDP F0,F1 + FXCHD F0, F5 + FADDDP F0,F1 + FXCHD F0, F3 + FADDDP F0,F5 + FADDDP F0,F1 + FMOVD 176(SP), F0 + FMULD F1,F0 + FMOVD 160(SP), F0 + FMULD F2,F0 + FMOVD 144(SP), F0 + FMULD F3,F0 + FMOVD 136(SP), F0 + FMULDP F0,F4 + FMOVD 160(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F5,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F5,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F5,F0 + FADDDP F0,F4 + FMOVD 168(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F5,F0 + FADDDP F0,F4 + FMOVD 168(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FMOVD 152(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F1 + ADDATMOST15BYTES: + CMPQ DX,$0 + JE NOMOREBYTES + MOVL $0,0(SP) + MOVL $0, 4 (SP) + MOVL $0, 8 (SP) + MOVL $0, 12 (SP) + LEAQ 0(SP),DI + MOVQ DX,CX + REP; MOVSB + MOVB $1,0(DI) + MOVL 12 (SP),DI + MOVL 8 (SP),SI + MOVL 4 (SP),DX + MOVL 0(SP),CX + MOVL DI,128(SP) + MOVL SI,120(SP) + MOVL DX,112(SP) + MOVL CX,104(SP) + FXCHD F0, F3 + FADDD 128(SP), F0 + FSUBD ·DOFFSET3(SB), F0 + FXCHD F0, F2 + FADDD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FXCHD F0, F1 + FADDD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FXCHD F0, F3 + FADDD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + FMOVD ·ALPHA130(SB), F0 + FADDD F3,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F3 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FMOVD ·ALPHA64(SB), F0 + FADDD F6,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F6 + FMOVD ·ALPHA96(SB), F0 + FADDD F5,F0 + FSUBD ·ALPHA96(SB), F0 + FSUBD F0,F5 + FXCHD F0, F4 + FADDDP F0,F3 + FXCHD F0, F6 + FADDDP F0,F1 + FXCHD F0, F3 + FADDDP F0,F5 + FXCHD F0, F3 + FADDDP F0,F1 + FMOVD 176(SP), F0 + FMULD F3,F0 + FMOVD 160(SP), F0 + FMULD F4,F0 + FMOVD 144(SP), F0 + FMULD F5,F0 + FMOVD 136(SP), F0 + FMULDP F0,F6 + FMOVD 160(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F5,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F5 + FMOVD 144(SP), F0 + FMULD F6,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F6,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F6,F0 + FADDDP F0,F4 + FMOVD 168(SP), F0 + FMULDP F0,F6 + FXCHD F0, F5 + FADDDP F0,F4 + FMOVD 136(SP), F0 + FMULD F2,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F2,F0 + FADDDP F0,F5 + FMOVD 168(SP), F0 + FMULD F2,F0 + FADDDP F0,F3 + FMOVD 152(SP), F0 + FMULDP F0,F2 + FXCHD F0, F1 + FADDDP F0,F3 + FXCHD F0, F3 + FXCHD F0, F2 + NOMOREBYTES: + MOVL $0,R10 + FMOVD ·ALPHA130(SB), F0 + FADDD F4,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F4 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FMOVD ·ALPHA64(SB), F0 + FADDD F4,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F4 + FMOVD ·ALPHA96(SB), F0 + FADDD F6,F0 + FSUBD ·ALPHA96(SB), F0 + FXCHD F0, F6 + FSUBD F6,F0 + FXCHD F0, F4 + FADDDP F0,F3 + FXCHD F0, F4 + FADDDP F0,F1 + FXCHD F0, F2 + FADDDP F0,F3 + FXCHD F0, F4 + FADDDP F0,F3 + FXCHD F0, F3 + FADDD ·HOFFSET0(SB), F0 + FXCHD F0, F3 + FADDD ·HOFFSET1(SB), F0 + FXCHD F0, F1 + FADDD ·HOFFSET2(SB), F0 + FXCHD F0, F2 + FADDD ·HOFFSET3(SB), F0 + FXCHD F0, F3 + FMOVDP F0, 104(SP) + FMOVDP F0, 112(SP) + FMOVDP F0, 120(SP) + FMOVDP F0, 128(SP) + MOVL 108(SP),DI + ANDL $63,DI + MOVL 116(SP),SI + ANDL $63,SI + MOVL 124(SP),DX + ANDL $63,DX + MOVL 132(SP),CX + ANDL $63,CX + MOVL 112(SP),R8 + ADDL DI,R8 + MOVQ R8,112(SP) + MOVL 120(SP),DI + ADCL SI,DI + MOVQ DI,120(SP) + MOVL 128(SP),DI + ADCL DX,DI + MOVQ DI,128(SP) + MOVL R10,DI + ADCL CX,DI + MOVQ DI,136(SP) + MOVQ $5,DI + MOVL 104(SP),SI + ADDL SI,DI + MOVQ DI,104(SP) + MOVL R10,DI + MOVQ 112(SP),DX + ADCL DX,DI + MOVQ DI,112(SP) + MOVL R10,DI + MOVQ 120(SP),CX + ADCL CX,DI + MOVQ DI,120(SP) + MOVL R10,DI + MOVQ 128(SP),R8 + ADCL R8,DI + MOVQ DI,128(SP) + MOVQ $0XFFFFFFFC,DI + MOVQ 136(SP),R9 + ADCL R9,DI + SARL $16,DI + MOVQ DI,R9 + XORL $0XFFFFFFFF,R9 + ANDQ DI,SI + MOVQ 104(SP),AX + ANDQ R9,AX + ORQ AX,SI + ANDQ DI,DX + MOVQ 112(SP),AX + ANDQ R9,AX + ORQ AX,DX + ANDQ DI,CX + MOVQ 120(SP),AX + ANDQ R9,AX + ORQ AX,CX + ANDQ DI,R8 + MOVQ 128(SP),DI + ANDQ R9,DI + ORQ DI,R8 + MOVQ 88(SP),DI + MOVQ 96(SP),R9 + ADDL 16(R9),SI + ADCL 20(R9),DX + ADCL 24(R9),CX + ADCL 28(R9),R8 + MOVL SI,0(DI) + MOVL DX,4(DI) + MOVL CX,8(DI) + MOVL R8,12(DI) + MOVQ 32(SP),R11 + MOVQ 40(SP),R12 + MOVQ 48(SP),R13 + MOVQ 56(SP),R14 + MOVQ 64(SP),R15 + MOVQ 72(SP),BX + MOVQ 80(SP),BP + MOVQ R11,SP + RET -- cgit v1.2.3-1-g7c22