From 38ee83e45b4de7edf89bf9f0ef629eb4c6ad0fa8 Mon Sep 17 00:00:00 2001 From: Christopher Speller Date: Thu, 12 May 2016 23:56:07 -0400 Subject: Moving to glide --- vendor/golang.org/x/crypto/poly1305/const_amd64.s | 45 + vendor/golang.org/x/crypto/poly1305/poly1305.go | 32 + .../golang.org/x/crypto/poly1305/poly1305_amd64.s | 497 +++++++ vendor/golang.org/x/crypto/poly1305/poly1305_arm.s | 379 +++++ .../golang.org/x/crypto/poly1305/poly1305_test.go | 86 ++ vendor/golang.org/x/crypto/poly1305/sum_amd64.go | 24 + vendor/golang.org/x/crypto/poly1305/sum_arm.go | 24 + vendor/golang.org/x/crypto/poly1305/sum_ref.go | 1531 ++++++++++++++++++++ 8 files changed, 2618 insertions(+) create mode 100644 vendor/golang.org/x/crypto/poly1305/const_amd64.s create mode 100644 vendor/golang.org/x/crypto/poly1305/poly1305.go create mode 100644 vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s create mode 100644 vendor/golang.org/x/crypto/poly1305/poly1305_arm.s create mode 100644 vendor/golang.org/x/crypto/poly1305/poly1305_test.go create mode 100644 vendor/golang.org/x/crypto/poly1305/sum_amd64.go create mode 100644 vendor/golang.org/x/crypto/poly1305/sum_arm.go create mode 100644 vendor/golang.org/x/crypto/poly1305/sum_ref.go (limited to 'vendor/golang.org/x/crypto/poly1305') diff --git a/vendor/golang.org/x/crypto/poly1305/const_amd64.s b/vendor/golang.org/x/crypto/poly1305/const_amd64.s new file mode 100644 index 000000000..8e861f337 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/const_amd64.s @@ -0,0 +1,45 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +DATA ·SCALE(SB)/8, $0x37F4000000000000 +GLOBL ·SCALE(SB), 8, $8 +DATA ·TWO32(SB)/8, $0x41F0000000000000 +GLOBL ·TWO32(SB), 8, $8 +DATA ·TWO64(SB)/8, $0x43F0000000000000 +GLOBL ·TWO64(SB), 8, $8 +DATA ·TWO96(SB)/8, $0x45F0000000000000 +GLOBL ·TWO96(SB), 8, $8 +DATA ·ALPHA32(SB)/8, $0x45E8000000000000 +GLOBL ·ALPHA32(SB), 8, $8 +DATA ·ALPHA64(SB)/8, $0x47E8000000000000 +GLOBL ·ALPHA64(SB), 8, $8 +DATA ·ALPHA96(SB)/8, $0x49E8000000000000 +GLOBL ·ALPHA96(SB), 8, $8 +DATA ·ALPHA130(SB)/8, $0x4C08000000000000 +GLOBL ·ALPHA130(SB), 8, $8 +DATA ·DOFFSET0(SB)/8, $0x4330000000000000 +GLOBL ·DOFFSET0(SB), 8, $8 +DATA ·DOFFSET1(SB)/8, $0x4530000000000000 +GLOBL ·DOFFSET1(SB), 8, $8 +DATA ·DOFFSET2(SB)/8, $0x4730000000000000 +GLOBL ·DOFFSET2(SB), 8, $8 +DATA ·DOFFSET3(SB)/8, $0x4930000000000000 +GLOBL ·DOFFSET3(SB), 8, $8 +DATA ·DOFFSET3MINUSTWO128(SB)/8, $0x492FFFFE00000000 +GLOBL ·DOFFSET3MINUSTWO128(SB), 8, $8 +DATA ·HOFFSET0(SB)/8, $0x43300001FFFFFFFB +GLOBL ·HOFFSET0(SB), 8, $8 +DATA ·HOFFSET1(SB)/8, $0x45300001FFFFFFFE +GLOBL ·HOFFSET1(SB), 8, $8 +DATA ·HOFFSET2(SB)/8, $0x47300001FFFFFFFE +GLOBL ·HOFFSET2(SB), 8, $8 +DATA ·HOFFSET3(SB)/8, $0x49300003FFFFFFFE +GLOBL ·HOFFSET3(SB), 8, $8 +DATA ·ROUNDING(SB)/2, $0x137f +GLOBL ·ROUNDING(SB), 8, $2 diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305.go b/vendor/golang.org/x/crypto/poly1305/poly1305.go new file mode 100644 index 000000000..4a5f826f7 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305.go @@ -0,0 +1,32 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package poly1305 implements Poly1305 one-time message authentication code as specified in http://cr.yp.to/mac/poly1305-20050329.pdf. + +Poly1305 is a fast, one-time authentication function. It is infeasible for an +attacker to generate an authenticator for a message without the key. However, a +key must only be used for a single message. Authenticating two different +messages with the same key allows an attacker to forge authenticators for other +messages with the same key. + +Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was +used with a fixed key in order to generate one-time keys from an nonce. +However, in this package AES isn't used and the one-time key is specified +directly. +*/ +package poly1305 // import "golang.org/x/crypto/poly1305" + +import "crypto/subtle" + +// TagSize is the size, in bytes, of a poly1305 authenticator. +const TagSize = 16 + +// Verify returns true if mac is a valid authenticator for m with the given +// key. +func Verify(mac *[16]byte, m []byte, key *[32]byte) bool { + var tmp [16]byte + Sum(&tmp, m, key) + return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1 +} diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s b/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s new file mode 100644 index 000000000..f8d4ee928 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s @@ -0,0 +1,497 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key) +TEXT ·poly1305(SB),0,$224-32 + MOVQ out+0(FP),DI + MOVQ m+8(FP),SI + MOVQ mlen+16(FP),DX + MOVQ key+24(FP),CX + + MOVQ SP,R11 + MOVQ $31,R9 + NOTQ R9 + ANDQ R9,SP + ADDQ $32,SP + + MOVQ R11,32(SP) + MOVQ R12,40(SP) + MOVQ R13,48(SP) + MOVQ R14,56(SP) + MOVQ R15,64(SP) + MOVQ BX,72(SP) + MOVQ BP,80(SP) + FLDCW ·ROUNDING(SB) + MOVL 0(CX),R8 + MOVL 4(CX),R9 + MOVL 8(CX),AX + MOVL 12(CX),R10 + MOVQ DI,88(SP) + MOVQ CX,96(SP) + MOVL $0X43300000,108(SP) + MOVL $0X45300000,116(SP) + MOVL $0X47300000,124(SP) + MOVL $0X49300000,132(SP) + ANDL $0X0FFFFFFF,R8 + ANDL $0X0FFFFFFC,R9 + ANDL $0X0FFFFFFC,AX + ANDL $0X0FFFFFFC,R10 + MOVL R8,104(SP) + MOVL R9,112(SP) + MOVL AX,120(SP) + MOVL R10,128(SP) + FMOVD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + FMOVD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FMOVD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FMOVD 128(SP), F0 + FSUBD ·DOFFSET3(SB), F0 + FXCHD F0, F3 + FMOVDP F0, 136(SP) + FXCHD F0, F1 + FMOVD F0, 144(SP) + FMULD ·SCALE(SB), F0 + FMOVDP F0, 152(SP) + FMOVD F0, 160(SP) + FMULD ·SCALE(SB), F0 + FMOVDP F0, 168(SP) + FMOVD F0, 176(SP) + FMULD ·SCALE(SB), F0 + FMOVDP F0, 184(SP) + FLDZ + FLDZ + FLDZ + FLDZ + CMPQ DX,$16 + JB ADDATMOST15BYTES + INITIALATLEAST16BYTES: + MOVL 12(SI),DI + MOVL 8(SI),CX + MOVL 4(SI),R8 + MOVL 0(SI),R9 + MOVL DI,128(SP) + MOVL CX,120(SP) + MOVL R8,112(SP) + MOVL R9,104(SP) + ADDQ $16,SI + SUBQ $16,DX + FXCHD F0, F3 + FADDD 128(SP), F0 + FSUBD ·DOFFSET3MINUSTWO128(SB), F0 + FXCHD F0, F1 + FADDD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FXCHD F0, F2 + FADDD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FXCHD F0, F3 + FADDD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + CMPQ DX,$16 + JB MULTIPLYADDATMOST15BYTES + MULTIPLYADDATLEAST16BYTES: + MOVL 12(SI),DI + MOVL 8(SI),CX + MOVL 4(SI),R8 + MOVL 0(SI),R9 + MOVL DI,128(SP) + MOVL CX,120(SP) + MOVL R8,112(SP) + MOVL R9,104(SP) + ADDQ $16,SI + SUBQ $16,DX + FMOVD ·ALPHA130(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F2 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FXCHD F0, F2 + FADDDP F0,F1 + FMOVD ·ALPHA64(SB), F0 + FADDD F4,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F4 + FMOVD ·ALPHA96(SB), F0 + FADDD F6,F0 + FSUBD ·ALPHA96(SB), F0 + FSUBD F0,F6 + FXCHD F0, F6 + FADDDP F0,F1 + FXCHD F0, F3 + FADDDP F0,F5 + FXCHD F0, F3 + FADDDP F0,F1 + FMOVD 176(SP), F0 + FMULD F3,F0 + FMOVD 160(SP), F0 + FMULD F4,F0 + FMOVD 144(SP), F0 + FMULD F5,F0 + FMOVD 136(SP), F0 + FMULDP F0,F6 + FMOVD 160(SP), F0 + FMULD F4,F0 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F4,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F4,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULDP F0,F4 + FXCHD F0, F3 + FADDDP F0,F5 + FMOVD 144(SP), F0 + FMULD F4,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F4,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F4,F0 + FADDDP F0,F3 + FMOVD 168(SP), F0 + FMULDP F0,F4 + FXCHD F0, F3 + FADDDP F0,F4 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FXCHD F0, F3 + FMOVD 184(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FXCHD F0, F1 + FMOVD 168(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 152(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F1 + CMPQ DX,$16 + FXCHD F0, F2 + FMOVD 128(SP), F0 + FSUBD ·DOFFSET3MINUSTWO128(SB), F0 + FADDDP F0,F1 + FXCHD F0, F1 + FMOVD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FADDDP F0,F1 + FXCHD F0, F3 + FMOVD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FADDDP F0,F1 + FXCHD F0, F2 + FMOVD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + FADDDP F0,F1 + JAE MULTIPLYADDATLEAST16BYTES + MULTIPLYADDATMOST15BYTES: + FMOVD ·ALPHA130(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F2 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FMOVD ·ALPHA64(SB), F0 + FADDD F5,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F5 + FMOVD ·ALPHA96(SB), F0 + FADDD F7,F0 + FSUBD ·ALPHA96(SB), F0 + FSUBD F0,F7 + FXCHD F0, F7 + FADDDP F0,F1 + FXCHD F0, F5 + FADDDP F0,F1 + FXCHD F0, F3 + FADDDP F0,F5 + FADDDP F0,F1 + FMOVD 176(SP), F0 + FMULD F1,F0 + FMOVD 160(SP), F0 + FMULD F2,F0 + FMOVD 144(SP), F0 + FMULD F3,F0 + FMOVD 136(SP), F0 + FMULDP F0,F4 + FMOVD 160(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F5,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F5,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F5,F0 + FADDDP F0,F4 + FMOVD 168(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F5,F0 + FADDDP F0,F4 + FMOVD 168(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FMOVD 152(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F1 + ADDATMOST15BYTES: + CMPQ DX,$0 + JE NOMOREBYTES + MOVL $0,0(SP) + MOVL $0, 4 (SP) + MOVL $0, 8 (SP) + MOVL $0, 12 (SP) + LEAQ 0(SP),DI + MOVQ DX,CX + REP; MOVSB + MOVB $1,0(DI) + MOVL 12 (SP),DI + MOVL 8 (SP),SI + MOVL 4 (SP),DX + MOVL 0(SP),CX + MOVL DI,128(SP) + MOVL SI,120(SP) + MOVL DX,112(SP) + MOVL CX,104(SP) + FXCHD F0, F3 + FADDD 128(SP), F0 + FSUBD ·DOFFSET3(SB), F0 + FXCHD F0, F2 + FADDD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FXCHD F0, F1 + FADDD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FXCHD F0, F3 + FADDD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + FMOVD ·ALPHA130(SB), F0 + FADDD F3,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F3 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FMOVD ·ALPHA64(SB), F0 + FADDD F6,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F6 + FMOVD ·ALPHA96(SB), F0 + FADDD F5,F0 + FSUBD ·ALPHA96(SB), F0 + FSUBD F0,F5 + FXCHD F0, F4 + FADDDP F0,F3 + FXCHD F0, F6 + FADDDP F0,F1 + FXCHD F0, F3 + FADDDP F0,F5 + FXCHD F0, F3 + FADDDP F0,F1 + FMOVD 176(SP), F0 + FMULD F3,F0 + FMOVD 160(SP), F0 + FMULD F4,F0 + FMOVD 144(SP), F0 + FMULD F5,F0 + FMOVD 136(SP), F0 + FMULDP F0,F6 + FMOVD 160(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F5,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F5 + FMOVD 144(SP), F0 + FMULD F6,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F6,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F6,F0 + FADDDP F0,F4 + FMOVD 168(SP), F0 + FMULDP F0,F6 + FXCHD F0, F5 + FADDDP F0,F4 + FMOVD 136(SP), F0 + FMULD F2,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F2,F0 + FADDDP F0,F5 + FMOVD 168(SP), F0 + FMULD F2,F0 + FADDDP F0,F3 + FMOVD 152(SP), F0 + FMULDP F0,F2 + FXCHD F0, F1 + FADDDP F0,F3 + FXCHD F0, F3 + FXCHD F0, F2 + NOMOREBYTES: + MOVL $0,R10 + FMOVD ·ALPHA130(SB), F0 + FADDD F4,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F4 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FMOVD ·ALPHA64(SB), F0 + FADDD F4,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F4 + FMOVD ·ALPHA96(SB), F0 + FADDD F6,F0 + FSUBD ·ALPHA96(SB), F0 + FXCHD F0, F6 + FSUBD F6,F0 + FXCHD F0, F4 + FADDDP F0,F3 + FXCHD F0, F4 + FADDDP F0,F1 + FXCHD F0, F2 + FADDDP F0,F3 + FXCHD F0, F4 + FADDDP F0,F3 + FXCHD F0, F3 + FADDD ·HOFFSET0(SB), F0 + FXCHD F0, F3 + FADDD ·HOFFSET1(SB), F0 + FXCHD F0, F1 + FADDD ·HOFFSET2(SB), F0 + FXCHD F0, F2 + FADDD ·HOFFSET3(SB), F0 + FXCHD F0, F3 + FMOVDP F0, 104(SP) + FMOVDP F0, 112(SP) + FMOVDP F0, 120(SP) + FMOVDP F0, 128(SP) + MOVL 108(SP),DI + ANDL $63,DI + MOVL 116(SP),SI + ANDL $63,SI + MOVL 124(SP),DX + ANDL $63,DX + MOVL 132(SP),CX + ANDL $63,CX + MOVL 112(SP),R8 + ADDL DI,R8 + MOVQ R8,112(SP) + MOVL 120(SP),DI + ADCL SI,DI + MOVQ DI,120(SP) + MOVL 128(SP),DI + ADCL DX,DI + MOVQ DI,128(SP) + MOVL R10,DI + ADCL CX,DI + MOVQ DI,136(SP) + MOVQ $5,DI + MOVL 104(SP),SI + ADDL SI,DI + MOVQ DI,104(SP) + MOVL R10,DI + MOVQ 112(SP),DX + ADCL DX,DI + MOVQ DI,112(SP) + MOVL R10,DI + MOVQ 120(SP),CX + ADCL CX,DI + MOVQ DI,120(SP) + MOVL R10,DI + MOVQ 128(SP),R8 + ADCL R8,DI + MOVQ DI,128(SP) + MOVQ $0XFFFFFFFC,DI + MOVQ 136(SP),R9 + ADCL R9,DI + SARL $16,DI + MOVQ DI,R9 + XORL $0XFFFFFFFF,R9 + ANDQ DI,SI + MOVQ 104(SP),AX + ANDQ R9,AX + ORQ AX,SI + ANDQ DI,DX + MOVQ 112(SP),AX + ANDQ R9,AX + ORQ AX,DX + ANDQ DI,CX + MOVQ 120(SP),AX + ANDQ R9,AX + ORQ AX,CX + ANDQ DI,R8 + MOVQ 128(SP),DI + ANDQ R9,DI + ORQ DI,R8 + MOVQ 88(SP),DI + MOVQ 96(SP),R9 + ADDL 16(R9),SI + ADCL 20(R9),DX + ADCL 24(R9),CX + ADCL 28(R9),R8 + MOVL SI,0(DI) + MOVL DX,4(DI) + MOVL CX,8(DI) + MOVL R8,12(DI) + MOVQ 32(SP),R11 + MOVQ 40(SP),R12 + MOVQ 48(SP),R13 + MOVQ 56(SP),R14 + MOVQ 64(SP),R15 + MOVQ 72(SP),BX + MOVQ 80(SP),BP + MOVQ R11,SP + RET diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305_arm.s b/vendor/golang.org/x/crypto/poly1305/poly1305_arm.s new file mode 100644 index 000000000..c15386744 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305_arm.s @@ -0,0 +1,379 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 5a from the public +// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305. + +// +build arm,!gccgo,!appengine + +DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff +DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03 +DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff +DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff +DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff +GLOBL poly1305_init_constants_armv6<>(SB), 8, $20 + +// Warning: the linker may use R11 to synthesize certain instructions. Please +// take care and verify that no synthetic instructions use it. + +TEXT poly1305_init_ext_armv6<>(SB),4,$-4 + MOVM.DB.W [R4-R11], (R13) + MOVM.IA.W (R1), [R2-R5] + MOVW $poly1305_init_constants_armv6<>(SB), R7 + MOVW R2, R8 + MOVW R2>>26, R9 + MOVW R3>>20, g + MOVW R4>>14, R11 + MOVW R5>>8, R12 + ORR R3<<6, R9, R9 + ORR R4<<12, g, g + ORR R5<<18, R11, R11 + MOVM.IA (R7), [R2-R6] + AND R8, R2, R2 + AND R9, R3, R3 + AND g, R4, R4 + AND R11, R5, R5 + AND R12, R6, R6 + MOVM.IA.W [R2-R6], (R0) + EOR R2, R2, R2 + EOR R3, R3, R3 + EOR R4, R4, R4 + EOR R5, R5, R5 + EOR R6, R6, R6 + MOVM.IA.W [R2-R6], (R0) + MOVM.IA.W (R1), [R2-R5] + MOVM.IA [R2-R6], (R0) + MOVM.IA.W (R13), [R4-R11] + RET + +#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \ + MOVBU (offset+0)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+0)(Rdst); \ + MOVBU (offset+1)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+1)(Rdst); \ + MOVBU (offset+2)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+2)(Rdst); \ + MOVBU (offset+3)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+3)(Rdst) + +TEXT poly1305_blocks_armv6<>(SB),4,$-4 + MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13) + SUB $128, R13 + MOVW R0, 36(R13) + MOVW R1, 40(R13) + MOVW R2, 44(R13) + MOVW R1, R14 + MOVW R2, R12 + MOVW 56(R0), R8 + WORD $0xe1180008 // TST R8, R8 not working see issue 5921 + EOR R6, R6, R6 + MOVW.EQ $(1<<24), R6 + MOVW R6, 32(R13) + ADD $64, R13, g + MOVM.IA (R0), [R0-R9] + MOVM.IA [R0-R4], (g) + CMP $16, R12 + BLO poly1305_blocks_armv6_done +poly1305_blocks_armv6_mainloop: + WORD $0xe31e0003 // TST R14, #3 not working see issue 5921 + BEQ poly1305_blocks_armv6_mainloop_aligned + ADD $48, R13, g + MOVW_UNALIGNED(R14, g, R0, 0) + MOVW_UNALIGNED(R14, g, R0, 4) + MOVW_UNALIGNED(R14, g, R0, 8) + MOVW_UNALIGNED(R14, g, R0, 12) + MOVM.IA (g), [R0-R3] + ADD $16, R14 + B poly1305_blocks_armv6_mainloop_loaded +poly1305_blocks_armv6_mainloop_aligned: + MOVM.IA.W (R14), [R0-R3] +poly1305_blocks_armv6_mainloop_loaded: + MOVW R0>>26, g + MOVW R1>>20, R11 + MOVW R2>>14, R12 + MOVW R14, 40(R13) + MOVW R3>>8, R4 + ORR R1<<6, g, g + ORR R2<<12, R11, R11 + ORR R3<<18, R12, R12 + BIC $0xfc000000, R0, R0 + BIC $0xfc000000, g, g + MOVW 32(R13), R3 + BIC $0xfc000000, R11, R11 + BIC $0xfc000000, R12, R12 + ADD R0, R5, R5 + ADD g, R6, R6 + ORR R3, R4, R4 + ADD R11, R7, R7 + ADD $64, R13, R14 + ADD R12, R8, R8 + ADD R4, R9, R9 + MOVM.IA (R14), [R0-R4] + MULLU R4, R5, (R11, g) + MULLU R3, R5, (R14, R12) + MULALU R3, R6, (R11, g) + MULALU R2, R6, (R14, R12) + MULALU R2, R7, (R11, g) + MULALU R1, R7, (R14, R12) + ADD R4<<2, R4, R4 + ADD R3<<2, R3, R3 + MULALU R1, R8, (R11, g) + MULALU R0, R8, (R14, R12) + MULALU R0, R9, (R11, g) + MULALU R4, R9, (R14, R12) + MOVW g, 24(R13) + MOVW R11, 28(R13) + MOVW R12, 16(R13) + MOVW R14, 20(R13) + MULLU R2, R5, (R11, g) + MULLU R1, R5, (R14, R12) + MULALU R1, R6, (R11, g) + MULALU R0, R6, (R14, R12) + MULALU R0, R7, (R11, g) + MULALU R4, R7, (R14, R12) + ADD R2<<2, R2, R2 + ADD R1<<2, R1, R1 + MULALU R4, R8, (R11, g) + MULALU R3, R8, (R14, R12) + MULALU R3, R9, (R11, g) + MULALU R2, R9, (R14, R12) + MOVW g, 8(R13) + MOVW R11, 12(R13) + MOVW R12, 0(R13) + MOVW R14, w+4(SP) + MULLU R0, R5, (R11, g) + MULALU R4, R6, (R11, g) + MULALU R3, R7, (R11, g) + MULALU R2, R8, (R11, g) + MULALU R1, R9, (R11, g) + MOVM.IA (R13), [R0-R7] + MOVW g>>26, R12 + MOVW R4>>26, R14 + ORR R11<<6, R12, R12 + ORR R5<<6, R14, R14 + BIC $0xfc000000, g, g + BIC $0xfc000000, R4, R4 + ADD.S R12, R0, R0 + ADC $0, R1, R1 + ADD.S R14, R6, R6 + ADC $0, R7, R7 + MOVW R0>>26, R12 + MOVW R6>>26, R14 + ORR R1<<6, R12, R12 + ORR R7<<6, R14, R14 + BIC $0xfc000000, R0, R0 + BIC $0xfc000000, R6, R6 + ADD R14<<2, R14, R14 + ADD.S R12, R2, R2 + ADC $0, R3, R3 + ADD R14, g, g + MOVW R2>>26, R12 + MOVW g>>26, R14 + ORR R3<<6, R12, R12 + BIC $0xfc000000, g, R5 + BIC $0xfc000000, R2, R7 + ADD R12, R4, R4 + ADD R14, R0, R0 + MOVW R4>>26, R12 + BIC $0xfc000000, R4, R8 + ADD R12, R6, R9 + MOVW w+44(SP), R12 + MOVW w+40(SP), R14 + MOVW R0, R6 + CMP $32, R12 + SUB $16, R12, R12 + MOVW R12, 44(R13) + BHS poly1305_blocks_armv6_mainloop +poly1305_blocks_armv6_done: + MOVW 36(R13), R12 + MOVW R5, 20(R12) + MOVW R6, 24(R12) + MOVW R7, 28(R12) + MOVW R8, 32(R12) + MOVW R9, 36(R12) + ADD $128, R13, R13 + MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14] + RET + +#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \ + MOVBU.P 1(Rsrc), Rtmp; \ + MOVBU.P Rtmp, 1(Rdst); \ + MOVBU.P 1(Rsrc), Rtmp; \ + MOVBU.P Rtmp, 1(Rdst) + +#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \ + MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \ + MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) + +TEXT poly1305_finish_ext_armv6<>(SB),4,$-4 + MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13) + SUB $16, R13, R13 + MOVW R0, R5 + MOVW R1, R6 + MOVW R2, R7 + MOVW R3, R8 + AND.S R2, R2, R2 + BEQ poly1305_finish_ext_armv6_noremaining + EOR R0, R0 + MOVW R13, R9 + MOVW R0, 0(R13) + MOVW R0, 4(R13) + MOVW R0, 8(R13) + MOVW R0, 12(R13) + WORD $0xe3110003 // TST R1, #3 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_aligned + WORD $0xe3120008 // TST R2, #8 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip8 + MOVWP_UNALIGNED(R1, R9, g) + MOVWP_UNALIGNED(R1, R9, g) +poly1305_finish_ext_armv6_skip8: + WORD $0xe3120004 // TST $4, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip4 + MOVWP_UNALIGNED(R1, R9, g) +poly1305_finish_ext_armv6_skip4: + WORD $0xe3120002 // TST $2, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip2 + MOVHUP_UNALIGNED(R1, R9, g) + B poly1305_finish_ext_armv6_skip2 +poly1305_finish_ext_armv6_aligned: + WORD $0xe3120008 // TST R2, #8 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip8_aligned + MOVM.IA.W (R1), [g-R11] + MOVM.IA.W [g-R11], (R9) +poly1305_finish_ext_armv6_skip8_aligned: + WORD $0xe3120004 // TST $4, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip4_aligned + MOVW.P 4(R1), g + MOVW.P g, 4(R9) +poly1305_finish_ext_armv6_skip4_aligned: + WORD $0xe3120002 // TST $2, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip2 + MOVHU.P 2(R1), g + MOVH.P g, 2(R9) +poly1305_finish_ext_armv6_skip2: + WORD $0xe3120001 // TST $1, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip1 + MOVBU.P 1(R1), g + MOVBU.P g, 1(R9) +poly1305_finish_ext_armv6_skip1: + MOVW $1, R11 + MOVBU R11, 0(R9) + MOVW R11, 56(R5) + MOVW R5, R0 + MOVW R13, R1 + MOVW $16, R2 + BL poly1305_blocks_armv6<>(SB) +poly1305_finish_ext_armv6_noremaining: + MOVW 20(R5), R0 + MOVW 24(R5), R1 + MOVW 28(R5), R2 + MOVW 32(R5), R3 + MOVW 36(R5), R4 + MOVW R4>>26, R12 + BIC $0xfc000000, R4, R4 + ADD R12<<2, R12, R12 + ADD R12, R0, R0 + MOVW R0>>26, R12 + BIC $0xfc000000, R0, R0 + ADD R12, R1, R1 + MOVW R1>>26, R12 + BIC $0xfc000000, R1, R1 + ADD R12, R2, R2 + MOVW R2>>26, R12 + BIC $0xfc000000, R2, R2 + ADD R12, R3, R3 + MOVW R3>>26, R12 + BIC $0xfc000000, R3, R3 + ADD R12, R4, R4 + ADD $5, R0, R6 + MOVW R6>>26, R12 + BIC $0xfc000000, R6, R6 + ADD R12, R1, R7 + MOVW R7>>26, R12 + BIC $0xfc000000, R7, R7 + ADD R12, R2, g + MOVW g>>26, R12 + BIC $0xfc000000, g, g + ADD R12, R3, R11 + MOVW $-(1<<26), R12 + ADD R11>>26, R12, R12 + BIC $0xfc000000, R11, R11 + ADD R12, R4, R14 + MOVW R14>>31, R12 + SUB $1, R12 + AND R12, R6, R6 + AND R12, R7, R7 + AND R12, g, g + AND R12, R11, R11 + AND R12, R14, R14 + MVN R12, R12 + AND R12, R0, R0 + AND R12, R1, R1 + AND R12, R2, R2 + AND R12, R3, R3 + AND R12, R4, R4 + ORR R6, R0, R0 + ORR R7, R1, R1 + ORR g, R2, R2 + ORR R11, R3, R3 + ORR R14, R4, R4 + ORR R1<<26, R0, R0 + MOVW R1>>6, R1 + ORR R2<<20, R1, R1 + MOVW R2>>12, R2 + ORR R3<<14, R2, R2 + MOVW R3>>18, R3 + ORR R4<<8, R3, R3 + MOVW 40(R5), R6 + MOVW 44(R5), R7 + MOVW 48(R5), g + MOVW 52(R5), R11 + ADD.S R6, R0, R0 + ADC.S R7, R1, R1 + ADC.S g, R2, R2 + ADC.S R11, R3, R3 + MOVM.IA [R0-R3], (R8) + MOVW R5, R12 + EOR R0, R0, R0 + EOR R1, R1, R1 + EOR R2, R2, R2 + EOR R3, R3, R3 + EOR R4, R4, R4 + EOR R5, R5, R5 + EOR R6, R6, R6 + EOR R7, R7, R7 + MOVM.IA.W [R0-R7], (R12) + MOVM.IA [R0-R7], (R12) + ADD $16, R13, R13 + MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14] + RET + +// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key) +TEXT ·poly1305_auth_armv6(SB),0,$280-16 + MOVW out+0(FP), R4 + MOVW m+4(FP), R5 + MOVW mlen+8(FP), R6 + MOVW key+12(FP), R7 + + MOVW R13, R8 + BIC $63, R13 + SUB $64, R13, R13 + MOVW R13, R0 + MOVW R7, R1 + BL poly1305_init_ext_armv6<>(SB) + BIC.S $15, R6, R2 + BEQ poly1305_auth_armv6_noblocks + MOVW R13, R0 + MOVW R5, R1 + ADD R2, R5, R5 + SUB R2, R6, R6 + BL poly1305_blocks_armv6<>(SB) +poly1305_auth_armv6_noblocks: + MOVW R13, R0 + MOVW R5, R1 + MOVW R6, R2 + MOVW R4, R3 + BL poly1305_finish_ext_armv6<>(SB) + MOVW R8, R13 + RET diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305_test.go b/vendor/golang.org/x/crypto/poly1305/poly1305_test.go new file mode 100644 index 000000000..b3e92310b --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305_test.go @@ -0,0 +1,86 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package poly1305 + +import ( + "bytes" + "testing" + "unsafe" +) + +var testData = []struct { + in, k, correct []byte +}{ + { + []byte("Hello world!"), + []byte("this is 32-byte key for Poly1305"), + []byte{0xa6, 0xf7, 0x45, 0x00, 0x8f, 0x81, 0xc9, 0x16, 0xa2, 0x0d, 0xcc, 0x74, 0xee, 0xf2, 0xb2, 0xf0}, + }, + { + make([]byte, 32), + []byte("this is 32-byte key for Poly1305"), + []byte{0x49, 0xec, 0x78, 0x09, 0x0e, 0x48, 0x1e, 0xc6, 0xc2, 0x6b, 0x33, 0xb9, 0x1c, 0xcc, 0x03, 0x07}, + }, + { + make([]byte, 2007), + []byte("this is 32-byte key for Poly1305"), + []byte{0xda, 0x84, 0xbc, 0xab, 0x02, 0x67, 0x6c, 0x38, 0xcd, 0xb0, 0x15, 0x60, 0x42, 0x74, 0xc2, 0xaa}, + }, + { + make([]byte, 2007), + make([]byte, 32), + make([]byte, 16), + }, +} + +func testSum(t *testing.T, unaligned bool) { + var out [16]byte + var key [32]byte + + for i, v := range testData { + in := v.in + if unaligned { + in = unalignBytes(in) + } + copy(key[:], v.k) + Sum(&out, in, &key) + if !bytes.Equal(out[:], v.correct) { + t.Errorf("%d: expected %x, got %x", i, v.correct, out[:]) + } + } +} + +func TestSum(t *testing.T) { testSum(t, false) } +func TestSumUnaligned(t *testing.T) { testSum(t, true) } + +func benchmark(b *testing.B, size int, unaligned bool) { + var out [16]byte + var key [32]byte + in := make([]byte, size) + if unaligned { + in = unalignBytes(in) + } + b.SetBytes(int64(len(in))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + Sum(&out, in, &key) + } +} + +func Benchmark64(b *testing.B) { benchmark(b, 64, false) } +func Benchmark1K(b *testing.B) { benchmark(b, 1024, false) } +func Benchmark64Unaligned(b *testing.B) { benchmark(b, 64, true) } +func Benchmark1KUnaligned(b *testing.B) { benchmark(b, 1024, true) } + +func unalignBytes(in []byte) []byte { + out := make([]byte, len(in)+1) + if uintptr(unsafe.Pointer(&out[0]))&(unsafe.Alignof(uint32(0))-1) == 0 { + out = out[1:] + } else { + out = out[:len(in)] + } + copy(out, in) + return out +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_amd64.go b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go new file mode 100644 index 000000000..6775c703f --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go @@ -0,0 +1,24 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine + +package poly1305 + +// This function is implemented in poly1305_amd64.s + +//go:noescape + +func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]byte) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + var mPtr *byte + if len(m) > 0 { + mPtr = &m[0] + } + poly1305(out, mPtr, uint64(len(m)), key) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_arm.go b/vendor/golang.org/x/crypto/poly1305/sum_arm.go new file mode 100644 index 000000000..50b979c24 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.go @@ -0,0 +1,24 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build arm,!gccgo,!appengine + +package poly1305 + +// This function is implemented in poly1305_arm.s + +//go:noescape + +func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + var mPtr *byte + if len(m) > 0 { + mPtr = &m[0] + } + poly1305_auth_armv6(out, mPtr, uint32(len(m)), key) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_ref.go b/vendor/golang.org/x/crypto/poly1305/sum_ref.go new file mode 100644 index 000000000..0b24fc78b --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_ref.go @@ -0,0 +1,1531 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!arm gccgo appengine + +package poly1305 + +// Based on original, public domain implementation from NaCl by D. J. +// Bernstein. + +import "math" + +const ( + alpham80 = 0.00000000558793544769287109375 + alpham48 = 24.0 + alpham16 = 103079215104.0 + alpha0 = 6755399441055744.0 + alpha18 = 1770887431076116955136.0 + alpha32 = 29014219670751100192948224.0 + alpha50 = 7605903601369376408980219232256.0 + alpha64 = 124615124604835863084731911901282304.0 + alpha82 = 32667107224410092492483962313449748299776.0 + alpha96 = 535217884764734955396857238543560676143529984.0 + alpha112 = 35076039295941670036888435985190792471742381031424.0 + alpha130 = 9194973245195333150150082162901855101712434733101613056.0 + scale = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 + offset0 = 6755408030990331.0 + offset1 = 29014256564239239022116864.0 + offset2 = 124615283061160854719918951570079744.0 + offset3 = 535219245894202480694386063513315216128475136.0 +) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + r := key + s := key[16:] + var ( + y7 float64 + y6 float64 + y1 float64 + y0 float64 + y5 float64 + y4 float64 + x7 float64 + x6 float64 + x1 float64 + x0 float64 + y3 float64 + y2 float64 + x5 float64 + r3lowx0 float64 + x4 float64 + r0lowx6 float64 + x3 float64 + r3highx0 float64 + x2 float64 + r0highx6 float64 + r0lowx0 float64 + sr1lowx6 float64 + r0highx0 float64 + sr1highx6 float64 + sr3low float64 + r1lowx0 float64 + sr2lowx6 float64 + r1highx0 float64 + sr2highx6 float64 + r2lowx0 float64 + sr3lowx6 float64 + r2highx0 float64 + sr3highx6 float64 + r1highx4 float64 + r1lowx4 float64 + r0highx4 float64 + r0lowx4 float64 + sr3highx4 float64 + sr3lowx4 float64 + sr2highx4 float64 + sr2lowx4 float64 + r0lowx2 float64 + r0highx2 float64 + r1lowx2 float64 + r1highx2 float64 + r2lowx2 float64 + r2highx2 float64 + sr3lowx2 float64 + sr3highx2 float64 + z0 float64 + z1 float64 + z2 float64 + z3 float64 + m0 int64 + m1 int64 + m2 int64 + m3 int64 + m00 uint32 + m01 uint32 + m02 uint32 + m03 uint32 + m10 uint32 + m11 uint32 + m12 uint32 + m13 uint32 + m20 uint32 + m21 uint32 + m22 uint32 + m23 uint32 + m30 uint32 + m31 uint32 + m32 uint32 + m33 uint64 + lbelow2 int32 + lbelow3 int32 + lbelow4 int32 + lbelow5 int32 + lbelow6 int32 + lbelow7 int32 + lbelow8 int32 + lbelow9 int32 + lbelow10 int32 + lbelow11 int32 + lbelow12 int32 + lbelow13 int32 + lbelow14 int32 + lbelow15 int32 + s00 uint32 + s01 uint32 + s02 uint32 + s03 uint32 + s10 uint32 + s11 uint32 + s12 uint32 + s13 uint32 + s20 uint32 + s21 uint32 + s22 uint32 + s23 uint32 + s30 uint32 + s31 uint32 + s32 uint32 + s33 uint32 + bits32 uint64 + f uint64 + f0 uint64 + f1 uint64 + f2 uint64 + f3 uint64 + f4 uint64 + g uint64 + g0 uint64 + g1 uint64 + g2 uint64 + g3 uint64 + g4 uint64 + ) + + var p int32 + + l := int32(len(m)) + + r00 := uint32(r[0]) + + r01 := uint32(r[1]) + + r02 := uint32(r[2]) + r0 := int64(2151) + + r03 := uint32(r[3]) + r03 &= 15 + r0 <<= 51 + + r10 := uint32(r[4]) + r10 &= 252 + r01 <<= 8 + r0 += int64(r00) + + r11 := uint32(r[5]) + r02 <<= 16 + r0 += int64(r01) + + r12 := uint32(r[6]) + r03 <<= 24 + r0 += int64(r02) + + r13 := uint32(r[7]) + r13 &= 15 + r1 := int64(2215) + r0 += int64(r03) + + d0 := r0 + r1 <<= 51 + r2 := int64(2279) + + r20 := uint32(r[8]) + r20 &= 252 + r11 <<= 8 + r1 += int64(r10) + + r21 := uint32(r[9]) + r12 <<= 16 + r1 += int64(r11) + + r22 := uint32(r[10]) + r13 <<= 24 + r1 += int64(r12) + + r23 := uint32(r[11]) + r23 &= 15 + r2 <<= 51 + r1 += int64(r13) + + d1 := r1 + r21 <<= 8 + r2 += int64(r20) + + r30 := uint32(r[12]) + r30 &= 252 + r22 <<= 16 + r2 += int64(r21) + + r31 := uint32(r[13]) + r23 <<= 24 + r2 += int64(r22) + + r32 := uint32(r[14]) + r2 += int64(r23) + r3 := int64(2343) + + d2 := r2 + r3 <<= 51 + + r33 := uint32(r[15]) + r33 &= 15 + r31 <<= 8 + r3 += int64(r30) + + r32 <<= 16 + r3 += int64(r31) + + r33 <<= 24 + r3 += int64(r32) + + r3 += int64(r33) + h0 := alpha32 - alpha32 + + d3 := r3 + h1 := alpha32 - alpha32 + + h2 := alpha32 - alpha32 + + h3 := alpha32 - alpha32 + + h4 := alpha32 - alpha32 + + r0low := math.Float64frombits(uint64(d0)) + h5 := alpha32 - alpha32 + + r1low := math.Float64frombits(uint64(d1)) + h6 := alpha32 - alpha32 + + r2low := math.Float64frombits(uint64(d2)) + h7 := alpha32 - alpha32 + + r0low -= alpha0 + + r1low -= alpha32 + + r2low -= alpha64 + + r0high := r0low + alpha18 + + r3low := math.Float64frombits(uint64(d3)) + + r1high := r1low + alpha50 + sr1low := scale * r1low + + r2high := r2low + alpha82 + sr2low := scale * r2low + + r0high -= alpha18 + r0high_stack := r0high + + r3low -= alpha96 + + r1high -= alpha50 + r1high_stack := r1high + + sr1high := sr1low + alpham80 + + r0low -= r0high + + r2high -= alpha82 + sr3low = scale * r3low + + sr2high := sr2low + alpham48 + + r1low -= r1high + r1low_stack := r1low + + sr1high -= alpham80 + sr1high_stack := sr1high + + r2low -= r2high + r2low_stack := r2low + + sr2high -= alpham48 + sr2high_stack := sr2high + + r3high := r3low + alpha112 + r0low_stack := r0low + + sr1low -= sr1high + sr1low_stack := sr1low + + sr3high := sr3low + alpham16 + r2high_stack := r2high + + sr2low -= sr2high + sr2low_stack := sr2low + + r3high -= alpha112 + r3high_stack := r3high + + sr3high -= alpham16 + sr3high_stack := sr3high + + r3low -= r3high + r3low_stack := r3low + + sr3low -= sr3high + sr3low_stack := sr3low + + if l < 16 { + goto addatmost15bytes + } + + m00 = uint32(m[p+0]) + m0 = 2151 + + m0 <<= 51 + m1 = 2215 + m01 = uint32(m[p+1]) + + m1 <<= 51 + m2 = 2279 + m02 = uint32(m[p+2]) + + m2 <<= 51 + m3 = 2343 + m03 = uint32(m[p+3]) + + m10 = uint32(m[p+4]) + m01 <<= 8 + m0 += int64(m00) + + m11 = uint32(m[p+5]) + m02 <<= 16 + m0 += int64(m01) + + m12 = uint32(m[p+6]) + m03 <<= 24 + m0 += int64(m02) + + m13 = uint32(m[p+7]) + m3 <<= 51 + m0 += int64(m03) + + m20 = uint32(m[p+8]) + m11 <<= 8 + m1 += int64(m10) + + m21 = uint32(m[p+9]) + m12 <<= 16 + m1 += int64(m11) + + m22 = uint32(m[p+10]) + m13 <<= 24 + m1 += int64(m12) + + m23 = uint32(m[p+11]) + m1 += int64(m13) + + m30 = uint32(m[p+12]) + m21 <<= 8 + m2 += int64(m20) + + m31 = uint32(m[p+13]) + m22 <<= 16 + m2 += int64(m21) + + m32 = uint32(m[p+14]) + m23 <<= 24 + m2 += int64(m22) + + m33 = uint64(m[p+15]) + m2 += int64(m23) + + d0 = m0 + m31 <<= 8 + m3 += int64(m30) + + d1 = m1 + m32 <<= 16 + m3 += int64(m31) + + d2 = m2 + m33 += 256 + + m33 <<= 24 + m3 += int64(m32) + + m3 += int64(m33) + d3 = m3 + + p += 16 + l -= 16 + + z0 = math.Float64frombits(uint64(d0)) + + z1 = math.Float64frombits(uint64(d1)) + + z2 = math.Float64frombits(uint64(d2)) + + z3 = math.Float64frombits(uint64(d3)) + + z0 -= alpha0 + + z1 -= alpha32 + + z2 -= alpha64 + + z3 -= alpha96 + + h0 += z0 + + h1 += z1 + + h3 += z2 + + h5 += z3 + + if l < 16 { + goto multiplyaddatmost15bytes + } + +multiplyaddatleast16bytes: + + m2 = 2279 + m20 = uint32(m[p+8]) + y7 = h7 + alpha130 + + m2 <<= 51 + m3 = 2343 + m21 = uint32(m[p+9]) + y6 = h6 + alpha130 + + m3 <<= 51 + m0 = 2151 + m22 = uint32(m[p+10]) + y1 = h1 + alpha32 + + m0 <<= 51 + m1 = 2215 + m23 = uint32(m[p+11]) + y0 = h0 + alpha32 + + m1 <<= 51 + m30 = uint32(m[p+12]) + y7 -= alpha130 + + m21 <<= 8 + m2 += int64(m20) + m31 = uint32(m[p+13]) + y6 -= alpha130 + + m22 <<= 16 + m2 += int64(m21) + m32 = uint32(m[p+14]) + y1 -= alpha32 + + m23 <<= 24 + m2 += int64(m22) + m33 = uint64(m[p+15]) + y0 -= alpha32 + + m2 += int64(m23) + m00 = uint32(m[p+0]) + y5 = h5 + alpha96 + + m31 <<= 8 + m3 += int64(m30) + m01 = uint32(m[p+1]) + y4 = h4 + alpha96 + + m32 <<= 16 + m02 = uint32(m[p+2]) + x7 = h7 - y7 + y7 *= scale + + m33 += 256 + m03 = uint32(m[p+3]) + x6 = h6 - y6 + y6 *= scale + + m33 <<= 24 + m3 += int64(m31) + m10 = uint32(m[p+4]) + x1 = h1 - y1 + + m01 <<= 8 + m3 += int64(m32) + m11 = uint32(m[p+5]) + x0 = h0 - y0 + + m3 += int64(m33) + m0 += int64(m00) + m12 = uint32(m[p+6]) + y5 -= alpha96 + + m02 <<= 16 + m0 += int64(m01) + m13 = uint32(m[p+7]) + y4 -= alpha96 + + m03 <<= 24 + m0 += int64(m02) + d2 = m2 + x1 += y7 + + m0 += int64(m03) + d3 = m3 + x0 += y6 + + m11 <<= 8 + m1 += int64(m10) + d0 = m0 + x7 += y5 + + m12 <<= 16 + m1 += int64(m11) + x6 += y4 + + m13 <<= 24 + m1 += int64(m12) + y3 = h3 + alpha64 + + m1 += int64(m13) + d1 = m1 + y2 = h2 + alpha64 + + x0 += x1 + + x6 += x7 + + y3 -= alpha64 + r3low = r3low_stack + + y2 -= alpha64 + r0low = r0low_stack + + x5 = h5 - y5 + r3lowx0 = r3low * x0 + r3high = r3high_stack + + x4 = h4 - y4 + r0lowx6 = r0low * x6 + r0high = r0high_stack + + x3 = h3 - y3 + r3highx0 = r3high * x0 + sr1low = sr1low_stack + + x2 = h2 - y2 + r0highx6 = r0high * x6 + sr1high = sr1high_stack + + x5 += y3 + r0lowx0 = r0low * x0 + r1low = r1low_stack + + h6 = r3lowx0 + r0lowx6 + sr1lowx6 = sr1low * x6 + r1high = r1high_stack + + x4 += y2 + r0highx0 = r0high * x0 + sr2low = sr2low_stack + + h7 = r3highx0 + r0highx6 + sr1highx6 = sr1high * x6 + sr2high = sr2high_stack + + x3 += y1 + r1lowx0 = r1low * x0 + r2low = r2low_stack + + h0 = r0lowx0 + sr1lowx6 + sr2lowx6 = sr2low * x6 + r2high = r2high_stack + + x2 += y0 + r1highx0 = r1high * x0 + sr3low = sr3low_stack + + h1 = r0highx0 + sr1highx6 + sr2highx6 = sr2high * x6 + sr3high = sr3high_stack + + x4 += x5 + r2lowx0 = r2low * x0 + z2 = math.Float64frombits(uint64(d2)) + + h2 = r1lowx0 + sr2lowx6 + sr3lowx6 = sr3low * x6 + + x2 += x3 + r2highx0 = r2high * x0 + z3 = math.Float64frombits(uint64(d3)) + + h3 = r1highx0 + sr2highx6 + sr3highx6 = sr3high * x6 + + r1highx4 = r1high * x4 + z2 -= alpha64 + + h4 = r2lowx0 + sr3lowx6 + r1lowx4 = r1low * x4 + + r0highx4 = r0high * x4 + z3 -= alpha96 + + h5 = r2highx0 + sr3highx6 + r0lowx4 = r0low * x4 + + h7 += r1highx4 + sr3highx4 = sr3high * x4 + + h6 += r1lowx4 + sr3lowx4 = sr3low * x4 + + h5 += r0highx4 + sr2highx4 = sr2high * x4 + + h4 += r0lowx4 + sr2lowx4 = sr2low * x4 + + h3 += sr3highx4 + r0lowx2 = r0low * x2 + + h2 += sr3lowx4 + r0highx2 = r0high * x2 + + h1 += sr2highx4 + r1lowx2 = r1low * x2 + + h0 += sr2lowx4 + r1highx2 = r1high * x2 + + h2 += r0lowx2 + r2lowx2 = r2low * x2 + + h3 += r0highx2 + r2highx2 = r2high * x2 + + h4 += r1lowx2 + sr3lowx2 = sr3low * x2 + + h5 += r1highx2 + sr3highx2 = sr3high * x2 + + p += 16 + l -= 16 + h6 += r2lowx2 + + h7 += r2highx2 + + z1 = math.Float64frombits(uint64(d1)) + h0 += sr3lowx2 + + z0 = math.Float64frombits(uint64(d0)) + h1 += sr3highx2 + + z1 -= alpha32 + + z0 -= alpha0 + + h5 += z3 + + h3 += z2 + + h1 += z1 + + h0 += z0 + + if l >= 16 { + goto multiplyaddatleast16bytes + } + +multiplyaddatmost15bytes: + + y7 = h7 + alpha130 + + y6 = h6 + alpha130 + + y1 = h1 + alpha32 + + y0 = h0 + alpha32 + + y7 -= alpha130 + + y6 -= alpha130 + + y1 -= alpha32 + + y0 -= alpha32 + + y5 = h5 + alpha96 + + y4 = h4 + alpha96 + + x7 = h7 - y7 + y7 *= scale + + x6 = h6 - y6 + y6 *= scale + + x1 = h1 - y1 + + x0 = h0 - y0 + + y5 -= alpha96 + + y4 -= alpha96 + + x1 += y7 + + x0 += y6 + + x7 += y5 + + x6 += y4 + + y3 = h3 + alpha64 + + y2 = h2 + alpha64 + + x0 += x1 + + x6 += x7 + + y3 -= alpha64 + r3low = r3low_stack + + y2 -= alpha64 + r0low = r0low_stack + + x5 = h5 - y5 + r3lowx0 = r3low * x0 + r3high = r3high_stack + + x4 = h4 - y4 + r0lowx6 = r0low * x6 + r0high = r0high_stack + + x3 = h3 - y3 + r3highx0 = r3high * x0 + sr1low = sr1low_stack + + x2 = h2 - y2 + r0highx6 = r0high * x6 + sr1high = sr1high_stack + + x5 += y3 + r0lowx0 = r0low * x0 + r1low = r1low_stack + + h6 = r3lowx0 + r0lowx6 + sr1lowx6 = sr1low * x6 + r1high = r1high_stack + + x4 += y2 + r0highx0 = r0high * x0 + sr2low = sr2low_stack + + h7 = r3highx0 + r0highx6 + sr1highx6 = sr1high * x6 + sr2high = sr2high_stack + + x3 += y1 + r1lowx0 = r1low * x0 + r2low = r2low_stack + + h0 = r0lowx0 + sr1lowx6 + sr2lowx6 = sr2low * x6 + r2high = r2high_stack + + x2 += y0 + r1highx0 = r1high * x0 + sr3low = sr3low_stack + + h1 = r0highx0 + sr1highx6 + sr2highx6 = sr2high * x6 + sr3high = sr3high_stack + + x4 += x5 + r2lowx0 = r2low * x0 + + h2 = r1lowx0 + sr2lowx6 + sr3lowx6 = sr3low * x6 + + x2 += x3 + r2highx0 = r2high * x0 + + h3 = r1highx0 + sr2highx6 + sr3highx6 = sr3high * x6 + + r1highx4 = r1high * x4 + + h4 = r2lowx0 + sr3lowx6 + r1lowx4 = r1low * x4 + + r0highx4 = r0high * x4 + + h5 = r2highx0 + sr3highx6 + r0lowx4 = r0low * x4 + + h7 += r1highx4 + sr3highx4 = sr3high * x4 + + h6 += r1lowx4 + sr3lowx4 = sr3low * x4 + + h5 += r0highx4 + sr2highx4 = sr2high * x4 + + h4 += r0lowx4 + sr2lowx4 = sr2low * x4 + + h3 += sr3highx4 + r0lowx2 = r0low * x2 + + h2 += sr3lowx4 + r0highx2 = r0high * x2 + + h1 += sr2highx4 + r1lowx2 = r1low * x2 + + h0 += sr2lowx4 + r1highx2 = r1high * x2 + + h2 += r0lowx2 + r2lowx2 = r2low * x2 + + h3 += r0highx2 + r2highx2 = r2high * x2 + + h4 += r1lowx2 + sr3lowx2 = sr3low * x2 + + h5 += r1highx2 + sr3highx2 = sr3high * x2 + + h6 += r2lowx2 + + h7 += r2highx2 + + h0 += sr3lowx2 + + h1 += sr3highx2 + +addatmost15bytes: + + if l == 0 { + goto nomorebytes + } + + lbelow2 = l - 2 + + lbelow3 = l - 3 + + lbelow2 >>= 31 + lbelow4 = l - 4 + + m00 = uint32(m[p+0]) + lbelow3 >>= 31 + p += lbelow2 + + m01 = uint32(m[p+1]) + lbelow4 >>= 31 + p += lbelow3 + + m02 = uint32(m[p+2]) + p += lbelow4 + m0 = 2151 + + m03 = uint32(m[p+3]) + m0 <<= 51 + m1 = 2215 + + m0 += int64(m00) + m01 &^= uint32(lbelow2) + + m02 &^= uint32(lbelow3) + m01 -= uint32(lbelow2) + + m01 <<= 8 + m03 &^= uint32(lbelow4) + + m0 += int64(m01) + lbelow2 -= lbelow3 + + m02 += uint32(lbelow2) + lbelow3 -= lbelow4 + + m02 <<= 16 + m03 += uint32(lbelow3) + + m03 <<= 24 + m0 += int64(m02) + + m0 += int64(m03) + lbelow5 = l - 5 + + lbelow6 = l - 6 + lbelow7 = l - 7 + + lbelow5 >>= 31 + lbelow8 = l - 8 + + lbelow6 >>= 31 + p += lbelow5 + + m10 = uint32(m[p+4]) + lbelow7 >>= 31 + p += lbelow6 + + m11 = uint32(m[p+5]) + lbelow8 >>= 31 + p += lbelow7 + + m12 = uint32(m[p+6]) + m1 <<= 51 + p += lbelow8 + + m13 = uint32(m[p+7]) + m10 &^= uint32(lbelow5) + lbelow4 -= lbelow5 + + m10 += uint32(lbelow4) + lbelow5 -= lbelow6 + + m11 &^= uint32(lbelow6) + m11 += uint32(lbelow5) + + m11 <<= 8 + m1 += int64(m10) + + m1 += int64(m11) + m12 &^= uint32(lbelow7) + + lbelow6 -= lbelow7 + m13 &^= uint32(lbelow8) + + m12 += uint32(lbelow6) + lbelow7 -= lbelow8 + + m12 <<= 16 + m13 += uint32(lbelow7) + + m13 <<= 24 + m1 += int64(m12) + + m1 += int64(m13) + m2 = 2279 + + lbelow9 = l - 9 + m3 = 2343 + + lbelow10 = l - 10 + lbelow11 = l - 11 + + lbelow9 >>= 31 + lbelow12 = l - 12 + + lbelow10 >>= 31 + p += lbelow9 + + m20 = uint32(m[p+8]) + lbelow11 >>= 31 + p += lbelow10 + + m21 = uint32(m[p+9]) + lbelow12 >>= 31 + p += lbelow11 + + m22 = uint32(m[p+10]) + m2 <<= 51 + p += lbelow12 + + m23 = uint32(m[p+11]) + m20 &^= uint32(lbelow9) + lbelow8 -= lbelow9 + + m20 += uint32(lbelow8) + lbelow9 -= lbelow10 + + m21 &^= uint32(lbelow10) + m21 += uint32(lbelow9) + + m21 <<= 8 + m2 += int64(m20) + + m2 += int64(m21) + m22 &^= uint32(lbelow11) + + lbelow10 -= lbelow11 + m23 &^= uint32(lbelow12) + + m22 += uint32(lbelow10) + lbelow11 -= lbelow12 + + m22 <<= 16 + m23 += uint32(lbelow11) + + m23 <<= 24 + m2 += int64(m22) + + m3 <<= 51 + lbelow13 = l - 13 + + lbelow13 >>= 31 + lbelow14 = l - 14 + + lbelow14 >>= 31 + p += lbelow13 + lbelow15 = l - 15 + + m30 = uint32(m[p+12]) + lbelow15 >>= 31 + p += lbelow14 + + m31 = uint32(m[p+13]) + p += lbelow15 + m2 += int64(m23) + + m32 = uint32(m[p+14]) + m30 &^= uint32(lbelow13) + lbelow12 -= lbelow13 + + m30 += uint32(lbelow12) + lbelow13 -= lbelow14 + + m3 += int64(m30) + m31 &^= uint32(lbelow14) + + m31 += uint32(lbelow13) + m32 &^= uint32(lbelow15) + + m31 <<= 8 + lbelow14 -= lbelow15 + + m3 += int64(m31) + m32 += uint32(lbelow14) + d0 = m0 + + m32 <<= 16 + m33 = uint64(lbelow15 + 1) + d1 = m1 + + m33 <<= 24 + m3 += int64(m32) + d2 = m2 + + m3 += int64(m33) + d3 = m3 + + z3 = math.Float64frombits(uint64(d3)) + + z2 = math.Float64frombits(uint64(d2)) + + z1 = math.Float64frombits(uint64(d1)) + + z0 = math.Float64frombits(uint64(d0)) + + z3 -= alpha96 + + z2 -= alpha64 + + z1 -= alpha32 + + z0 -= alpha0 + + h5 += z3 + + h3 += z2 + + h1 += z1 + + h0 += z0 + + y7 = h7 + alpha130 + + y6 = h6 + alpha130 + + y1 = h1 + alpha32 + + y0 = h0 + alpha32 + + y7 -= alpha130 + + y6 -= alpha130 + + y1 -= alpha32 + + y0 -= alpha32 + + y5 = h5 + alpha96 + + y4 = h4 + alpha96 + + x7 = h7 - y7 + y7 *= scale + + x6 = h6 - y6 + y6 *= scale + + x1 = h1 - y1 + + x0 = h0 - y0 + + y5 -= alpha96 + + y4 -= alpha96 + + x1 += y7 + + x0 += y6 + + x7 += y5 + + x6 += y4 + + y3 = h3 + alpha64 + + y2 = h2 + alpha64 + + x0 += x1 + + x6 += x7 + + y3 -= alpha64 + r3low = r3low_stack + + y2 -= alpha64 + r0low = r0low_stack + + x5 = h5 - y5 + r3lowx0 = r3low * x0 + r3high = r3high_stack + + x4 = h4 - y4 + r0lowx6 = r0low * x6 + r0high = r0high_stack + + x3 = h3 - y3 + r3highx0 = r3high * x0 + sr1low = sr1low_stack + + x2 = h2 - y2 + r0highx6 = r0high * x6 + sr1high = sr1high_stack + + x5 += y3 + r0lowx0 = r0low * x0 + r1low = r1low_stack + + h6 = r3lowx0 + r0lowx6 + sr1lowx6 = sr1low * x6 + r1high = r1high_stack + + x4 += y2 + r0highx0 = r0high * x0 + sr2low = sr2low_stack + + h7 = r3highx0 + r0highx6 + sr1highx6 = sr1high * x6 + sr2high = sr2high_stack + + x3 += y1 + r1lowx0 = r1low * x0 + r2low = r2low_stack + + h0 = r0lowx0 + sr1lowx6 + sr2lowx6 = sr2low * x6 + r2high = r2high_stack + + x2 += y0 + r1highx0 = r1high * x0 + sr3low = sr3low_stack + + h1 = r0highx0 + sr1highx6 + sr2highx6 = sr2high * x6 + sr3high = sr3high_stack + + x4 += x5 + r2lowx0 = r2low * x0 + + h2 = r1lowx0 + sr2lowx6 + sr3lowx6 = sr3low * x6 + + x2 += x3 + r2highx0 = r2high * x0 + + h3 = r1highx0 + sr2highx6 + sr3highx6 = sr3high * x6 + + r1highx4 = r1high * x4 + + h4 = r2lowx0 + sr3lowx6 + r1lowx4 = r1low * x4 + + r0highx4 = r0high * x4 + + h5 = r2highx0 + sr3highx6 + r0lowx4 = r0low * x4 + + h7 += r1highx4 + sr3highx4 = sr3high * x4 + + h6 += r1lowx4 + sr3lowx4 = sr3low * x4 + + h5 += r0highx4 + sr2highx4 = sr2high * x4 + + h4 += r0lowx4 + sr2lowx4 = sr2low * x4 + + h3 += sr3highx4 + r0lowx2 = r0low * x2 + + h2 += sr3lowx4 + r0highx2 = r0high * x2 + + h1 += sr2highx4 + r1lowx2 = r1low * x2 + + h0 += sr2lowx4 + r1highx2 = r1high * x2 + + h2 += r0lowx2 + r2lowx2 = r2low * x2 + + h3 += r0highx2 + r2highx2 = r2high * x2 + + h4 += r1lowx2 + sr3lowx2 = sr3low * x2 + + h5 += r1highx2 + sr3highx2 = sr3high * x2 + + h6 += r2lowx2 + + h7 += r2highx2 + + h0 += sr3lowx2 + + h1 += sr3highx2 + +nomorebytes: + + y7 = h7 + alpha130 + + y0 = h0 + alpha32 + + y1 = h1 + alpha32 + + y2 = h2 + alpha64 + + y7 -= alpha130 + + y3 = h3 + alpha64 + + y4 = h4 + alpha96 + + y5 = h5 + alpha96 + + x7 = h7 - y7 + y7 *= scale + + y0 -= alpha32 + + y1 -= alpha32 + + y2 -= alpha64 + + h6 += x7 + + y3 -= alpha64 + + y4 -= alpha96 + + y5 -= alpha96 + + y6 = h6 + alpha130 + + x0 = h0 - y0 + + x1 = h1 - y1 + + x2 = h2 - y2 + + y6 -= alpha130 + + x0 += y7 + + x3 = h3 - y3 + + x4 = h4 - y4 + + x5 = h5 - y5 + + x6 = h6 - y6 + + y6 *= scale + + x2 += y0 + + x3 += y1 + + x4 += y2 + + x0 += y6 + + x5 += y3 + + x6 += y4 + + x2 += x3 + + x0 += x1 + + x4 += x5 + + x6 += y5 + + x2 += offset1 + d1 = int64(math.Float64bits(x2)) + + x0 += offset0 + d0 = int64(math.Float64bits(x0)) + + x4 += offset2 + d2 = int64(math.Float64bits(x4)) + + x6 += offset3 + d3 = int64(math.Float64bits(x6)) + + f0 = uint64(d0) + + f1 = uint64(d1) + bits32 = math.MaxUint64 + + f2 = uint64(d2) + bits32 >>= 32 + + f3 = uint64(d3) + f = f0 >> 32 + + f0 &= bits32 + f &= 255 + + f1 += f + g0 = f0 + 5 + + g = g0 >> 32 + g0 &= bits32 + + f = f1 >> 32 + f1 &= bits32 + + f &= 255 + g1 = f1 + g + + g = g1 >> 32 + f2 += f + + f = f2 >> 32 + g1 &= bits32 + + f2 &= bits32 + f &= 255 + + f3 += f + g2 = f2 + g + + g = g2 >> 32 + g2 &= bits32 + + f4 = f3 >> 32 + f3 &= bits32 + + f4 &= 255 + g3 = f3 + g + + g = g3 >> 32 + g3 &= bits32 + + g4 = f4 + g + + g4 = g4 - 4 + s00 = uint32(s[0]) + + f = uint64(int64(g4) >> 63) + s01 = uint32(s[1]) + + f0 &= f + g0 &^= f + s02 = uint32(s[2]) + + f1 &= f + f0 |= g0 + s03 = uint32(s[3]) + + g1 &^= f + f2 &= f + s10 = uint32(s[4]) + + f3 &= f + g2 &^= f + s11 = uint32(s[5]) + + g3 &^= f + f1 |= g1 + s12 = uint32(s[6]) + + f2 |= g2 + f3 |= g3 + s13 = uint32(s[7]) + + s01 <<= 8 + f0 += uint64(s00) + s20 = uint32(s[8]) + + s02 <<= 16 + f0 += uint64(s01) + s21 = uint32(s[9]) + + s03 <<= 24 + f0 += uint64(s02) + s22 = uint32(s[10]) + + s11 <<= 8 + f1 += uint64(s10) + s23 = uint32(s[11]) + + s12 <<= 16 + f1 += uint64(s11) + s30 = uint32(s[12]) + + s13 <<= 24 + f1 += uint64(s12) + s31 = uint32(s[13]) + + f0 += uint64(s03) + f1 += uint64(s13) + s32 = uint32(s[14]) + + s21 <<= 8 + f2 += uint64(s20) + s33 = uint32(s[15]) + + s22 <<= 16 + f2 += uint64(s21) + + s23 <<= 24 + f2 += uint64(s22) + + s31 <<= 8 + f3 += uint64(s30) + + s32 <<= 16 + f3 += uint64(s31) + + s33 <<= 24 + f3 += uint64(s32) + + f2 += uint64(s23) + f3 += uint64(s33) + + out[0] = byte(f0) + f0 >>= 8 + out[1] = byte(f0) + f0 >>= 8 + out[2] = byte(f0) + f0 >>= 8 + out[3] = byte(f0) + f0 >>= 8 + f1 += f0 + + out[4] = byte(f1) + f1 >>= 8 + out[5] = byte(f1) + f1 >>= 8 + out[6] = byte(f1) + f1 >>= 8 + out[7] = byte(f1) + f1 >>= 8 + f2 += f1 + + out[8] = byte(f2) + f2 >>= 8 + out[9] = byte(f2) + f2 >>= 8 + out[10] = byte(f2) + f2 >>= 8 + out[11] = byte(f2) + f2 >>= 8 + f3 += f2 + + out[12] = byte(f3) + f3 >>= 8 + out[13] = byte(f3) + f3 >>= 8 + out[14] = byte(f3) + f3 >>= 8 + out[15] = byte(f3) +} -- cgit v1.2.3-1-g7c22