summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/image/vector/gen.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/image/vector/gen.go')
-rw-r--r--vendor/golang.org/x/image/vector/gen.go447
1 files changed, 0 insertions, 447 deletions
diff --git a/vendor/golang.org/x/image/vector/gen.go b/vendor/golang.org/x/image/vector/gen.go
deleted file mode 100644
index 28b298b5e..000000000
--- a/vendor/golang.org/x/image/vector/gen.go
+++ /dev/null
@@ -1,447 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import (
- "bytes"
- "io/ioutil"
- "log"
- "strings"
- "text/template"
-)
-
-const (
- copyright = "" +
- "// Copyright 2016 The Go Authors. All rights reserved.\n" +
- "// Use of this source code is governed by a BSD-style\n" +
- "// license that can be found in the LICENSE file.\n"
-
- doNotEdit = "// generated by go run gen.go; DO NOT EDIT\n"
-
- dashDashDash = "// --------"
-)
-
-func main() {
- tmpl, err := ioutil.ReadFile("gen_acc_amd64.s.tmpl")
- if err != nil {
- log.Fatalf("ReadFile: %v", err)
- }
- if !bytes.HasPrefix(tmpl, []byte(copyright)) {
- log.Fatal("source template did not start with the copyright header")
- }
- tmpl = tmpl[len(copyright):]
-
- preamble := []byte(nil)
- if i := bytes.Index(tmpl, []byte(dashDashDash)); i < 0 {
- log.Fatalf("source template did not contain %q", dashDashDash)
- } else {
- preamble, tmpl = tmpl[:i], tmpl[i:]
- }
-
- t, err := template.New("").Parse(string(tmpl))
- if err != nil {
- log.Fatalf("Parse: %v", err)
- }
-
- out := bytes.NewBuffer(nil)
- out.WriteString(doNotEdit)
- out.Write(preamble)
-
- for i, v := range instances {
- if i != 0 {
- out.WriteString("\n")
- }
- if strings.Contains(v.LoadArgs, "{{.ShortName}}") {
- v.LoadArgs = strings.Replace(v.LoadArgs, "{{.ShortName}}", v.ShortName, -1)
- }
- if err := t.Execute(out, v); err != nil {
- log.Fatalf("Execute(%q): %v", v.ShortName, err)
- }
- }
-
- if err := ioutil.WriteFile("acc_amd64.s", out.Bytes(), 0666); err != nil {
- log.Fatalf("WriteFile: %v", err)
- }
-}
-
-var instances = []struct {
- LongName string
- ShortName string
- FrameSize string
- ArgsSize string
- Args string
- DstElemSize1 int
- DstElemSize4 int
- XMM3 string
- XMM4 string
- XMM5 string
- XMM6 string
- XMM8 string
- XMM9 string
- XMM10 string
- LoadArgs string
- Setup string
- LoadXMMRegs string
- Add string
- ClampAndScale string
- ConvertToInt32 string
- Store4 string
- Store1 string
-}{{
- LongName: "fixedAccumulateOpOver",
- ShortName: "fxAccOpOver",
- FrameSize: fxFrameSize,
- ArgsSize: twoArgArgsSize,
- Args: "dst []uint8, src []uint32",
- DstElemSize1: 1 * sizeOfUint8,
- DstElemSize4: 4 * sizeOfUint8,
- XMM3: fxXMM3,
- XMM4: fxXMM4,
- XMM5: fxXMM5,
- XMM6: opOverXMM6,
- XMM8: opOverXMM8,
- XMM9: opOverXMM9,
- XMM10: opOverXMM10,
- LoadArgs: twoArgLoadArgs,
- Setup: fxSetup,
- LoadXMMRegs: fxLoadXMMRegs + "\n" + opOverLoadXMMRegs,
- Add: fxAdd,
- ClampAndScale: fxClampAndScale,
- ConvertToInt32: fxConvertToInt32,
- Store4: opOverStore4,
- Store1: opOverStore1,
-}, {
- LongName: "fixedAccumulateOpSrc",
- ShortName: "fxAccOpSrc",
- FrameSize: fxFrameSize,
- ArgsSize: twoArgArgsSize,
- Args: "dst []uint8, src []uint32",
- DstElemSize1: 1 * sizeOfUint8,
- DstElemSize4: 4 * sizeOfUint8,
- XMM3: fxXMM3,
- XMM4: fxXMM4,
- XMM5: fxXMM5,
- XMM6: opSrcXMM6,
- XMM8: opSrcXMM8,
- XMM9: opSrcXMM9,
- XMM10: opSrcXMM10,
- LoadArgs: twoArgLoadArgs,
- Setup: fxSetup,
- LoadXMMRegs: fxLoadXMMRegs + "\n" + opSrcLoadXMMRegs,
- Add: fxAdd,
- ClampAndScale: fxClampAndScale,
- ConvertToInt32: fxConvertToInt32,
- Store4: opSrcStore4,
- Store1: opSrcStore1,
-}, {
- LongName: "fixedAccumulateMask",
- ShortName: "fxAccMask",
- FrameSize: fxFrameSize,
- ArgsSize: oneArgArgsSize,
- Args: "buf []uint32",
- DstElemSize1: 1 * sizeOfUint32,
- DstElemSize4: 4 * sizeOfUint32,
- XMM3: fxXMM3,
- XMM4: fxXMM4,
- XMM5: fxXMM5,
- XMM6: maskXMM6,
- XMM8: maskXMM8,
- XMM9: maskXMM9,
- XMM10: maskXMM10,
- LoadArgs: oneArgLoadArgs,
- Setup: fxSetup,
- LoadXMMRegs: fxLoadXMMRegs + "\n" + maskLoadXMMRegs,
- Add: fxAdd,
- ClampAndScale: fxClampAndScale,
- ConvertToInt32: fxConvertToInt32,
- Store4: maskStore4,
- Store1: maskStore1,
-}, {
- LongName: "floatingAccumulateOpOver",
- ShortName: "flAccOpOver",
- FrameSize: flFrameSize,
- ArgsSize: twoArgArgsSize,
- Args: "dst []uint8, src []float32",
- DstElemSize1: 1 * sizeOfUint8,
- DstElemSize4: 4 * sizeOfUint8,
- XMM3: flXMM3,
- XMM4: flXMM4,
- XMM5: flXMM5,
- XMM6: opOverXMM6,
- XMM8: opOverXMM8,
- XMM9: opOverXMM9,
- XMM10: opOverXMM10,
- LoadArgs: twoArgLoadArgs,
- Setup: flSetup,
- LoadXMMRegs: flLoadXMMRegs + "\n" + opOverLoadXMMRegs,
- Add: flAdd,
- ClampAndScale: flClampAndScale,
- ConvertToInt32: flConvertToInt32,
- Store4: opOverStore4,
- Store1: opOverStore1,
-}, {
- LongName: "floatingAccumulateOpSrc",
- ShortName: "flAccOpSrc",
- FrameSize: flFrameSize,
- ArgsSize: twoArgArgsSize,
- Args: "dst []uint8, src []float32",
- DstElemSize1: 1 * sizeOfUint8,
- DstElemSize4: 4 * sizeOfUint8,
- XMM3: flXMM3,
- XMM4: flXMM4,
- XMM5: flXMM5,
- XMM6: opSrcXMM6,
- XMM8: opSrcXMM8,
- XMM9: opSrcXMM9,
- XMM10: opSrcXMM10,
- LoadArgs: twoArgLoadArgs,
- Setup: flSetup,
- LoadXMMRegs: flLoadXMMRegs + "\n" + opSrcLoadXMMRegs,
- Add: flAdd,
- ClampAndScale: flClampAndScale,
- ConvertToInt32: flConvertToInt32,
- Store4: opSrcStore4,
- Store1: opSrcStore1,
-}, {
- LongName: "floatingAccumulateMask",
- ShortName: "flAccMask",
- FrameSize: flFrameSize,
- ArgsSize: twoArgArgsSize,
- Args: "dst []uint32, src []float32",
- DstElemSize1: 1 * sizeOfUint32,
- DstElemSize4: 4 * sizeOfUint32,
- XMM3: flXMM3,
- XMM4: flXMM4,
- XMM5: flXMM5,
- XMM6: maskXMM6,
- XMM8: maskXMM8,
- XMM9: maskXMM9,
- XMM10: maskXMM10,
- LoadArgs: twoArgLoadArgs,
- Setup: flSetup,
- LoadXMMRegs: flLoadXMMRegs + "\n" + maskLoadXMMRegs,
- Add: flAdd,
- ClampAndScale: flClampAndScale,
- ConvertToInt32: flConvertToInt32,
- Store4: maskStore4,
- Store1: maskStore1,
-}}
-
-const (
- fxFrameSize = `0`
- flFrameSize = `8`
-
- oneArgArgsSize = `24`
- twoArgArgsSize = `48`
-
- sizeOfUint8 = 1
- sizeOfUint32 = 4
-
- fxXMM3 = `-`
- flXMM3 = `flSignMask`
-
- fxXMM4 = `-`
- flXMM4 = `flOne`
-
- fxXMM5 = `fxAlmost65536`
- flXMM5 = `flAlmost65536`
-
- oneArgLoadArgs = `
- MOVQ buf_base+0(FP), DI
- MOVQ buf_len+8(FP), BX
- MOVQ buf_base+0(FP), SI
- MOVQ buf_len+8(FP), R10
- `
- twoArgLoadArgs = `
- MOVQ dst_base+0(FP), DI
- MOVQ dst_len+8(FP), BX
- MOVQ src_base+24(FP), SI
- MOVQ src_len+32(FP), R10
- // Sanity check that len(dst) >= len(src).
- CMPQ BX, R10
- JLT {{.ShortName}}End
- `
-
- fxSetup = ``
- flSetup = `
- // Prepare to set MXCSR bits 13 and 14, so that the CVTPS2PL below is
- // "Round To Zero".
- STMXCSR mxcsrOrig-8(SP)
- MOVL mxcsrOrig-8(SP), AX
- ORL $0x6000, AX
- MOVL AX, mxcsrNew-4(SP)
- `
-
- fxLoadXMMRegs = `
- // fxAlmost65536 := XMM(0x0000ffff repeated four times) // Maximum of an uint16.
- MOVOU fxAlmost65536<>(SB), X5
- `
- flLoadXMMRegs = `
- // flSignMask := XMM(0x7fffffff repeated four times) // All but the sign bit of a float32.
- // flOne := XMM(0x3f800000 repeated four times) // 1 as a float32.
- // flAlmost65536 := XMM(0x477fffff repeated four times) // 255.99998 * 256 as a float32.
- MOVOU flSignMask<>(SB), X3
- MOVOU flOne<>(SB), X4
- MOVOU flAlmost65536<>(SB), X5
- `
-
- fxAdd = `PADDD`
- flAdd = `ADDPS`
-
- fxClampAndScale = `
- // y = abs(x)
- // y >>= 2 // Shift by 2*ϕ - 16.
- // y = min(y, fxAlmost65536)
- //
- // pabsd %xmm1,%xmm2
- // psrld $0x2,%xmm2
- // pminud %xmm5,%xmm2
- //
- // Hopefully we'll get these opcode mnemonics into the assembler for Go
- // 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
- // it's similar.
- BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
- BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
- BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
- `
- flClampAndScale = `
- // y = x & flSignMask
- // y = min(y, flOne)
- // y = mul(y, flAlmost65536)
- MOVOU X3, X2
- ANDPS X1, X2
- MINPS X4, X2
- MULPS X5, X2
- `
-
- fxConvertToInt32 = `
- // z = convertToInt32(y)
- // No-op.
- `
- flConvertToInt32 = `
- // z = convertToInt32(y)
- LDMXCSR mxcsrNew-4(SP)
- CVTPS2PL X2, X2
- LDMXCSR mxcsrOrig-8(SP)
- `
-
- opOverStore4 = `
- // Blend over the dst's prior value. SIMD for i in 0..3:
- //
- // dstA := uint32(dst[i]) * 0x101
- // maskA := z@i
- // outA := dstA*(0xffff-maskA)/0xffff + maskA
- // dst[i] = uint8(outA >> 8)
- //
- // First, set X0 to dstA*(0xfff-maskA).
- MOVL (DI), X0
- PSHUFB X8, X0
- MOVOU X9, X11
- PSUBL X2, X11
- PMULLD X11, X0
- // We implement uint32 division by 0xffff as multiplication by a magic
- // constant (0x800080001) and then a shift by a magic constant (47).
- // See TestDivideByFFFF for a justification.
- //
- // That multiplication widens from uint32 to uint64, so we have to
- // duplicate and shift our four uint32s from one XMM register (X0) to
- // two XMM registers (X0 and X11).
- //
- // Move the second and fourth uint32s in X0 to be the first and third
- // uint32s in X11.
- MOVOU X0, X11
- PSRLQ $32, X11
- // Multiply by magic, shift by magic.
- //
- // pmuludq %xmm10,%xmm0
- // pmuludq %xmm10,%xmm11
- BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
- BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
- PSRLQ $47, X0
- PSRLQ $47, X11
- // Merge the two registers back to one, X11, and add maskA.
- PSLLQ $32, X11
- XORPS X0, X11
- PADDD X11, X2
- // As per opSrcStore4, shuffle and copy the 4 second-lowest bytes.
- PSHUFB X6, X2
- MOVL X2, (DI)
- `
- opSrcStore4 = `
- // z = shuffleTheSecondLowestBytesOfEach4ByteElement(z)
- // copy(dst[:4], low4BytesOf(z))
- PSHUFB X6, X2
- MOVL X2, (DI)
- `
- maskStore4 = `
- // copy(dst[:4], z)
- MOVOU X2, (DI)
- `
-
- opOverStore1 = `
- // Blend over the dst's prior value.
- //
- // dstA := uint32(dst[0]) * 0x101
- // maskA := z
- // outA := dstA*(0xffff-maskA)/0xffff + maskA
- // dst[0] = uint8(outA >> 8)
- MOVBLZX (DI), R12
- IMULL $0x101, R12
- MOVL X2, R13
- MOVL $0xffff, AX
- SUBL R13, AX
- MULL R12 // MULL's implicit arg is AX, and the result is stored in DX:AX.
- MOVL $0x80008001, BX // Divide by 0xffff is to first multiply by a magic constant...
- MULL BX // MULL's implicit arg is AX, and the result is stored in DX:AX.
- SHRL $15, DX // ...and then shift by another magic constant (47 - 32 = 15).
- ADDL DX, R13
- SHRL $8, R13
- MOVB R13, (DI)
- `
- opSrcStore1 = `
- // dst[0] = uint8(z>>8)
- MOVL X2, BX
- SHRL $8, BX
- MOVB BX, (DI)
- `
- maskStore1 = `
- // dst[0] = uint32(z)
- MOVL X2, (DI)
- `
-
- opOverXMM6 = `gather`
- opSrcXMM6 = `gather`
- maskXMM6 = `-`
-
- opOverXMM8 = `scatterAndMulBy0x101`
- opSrcXMM8 = `-`
- maskXMM8 = `-`
-
- opOverXMM9 = `fxAlmost65536`
- opSrcXMM9 = `-`
- maskXMM9 = `-`
-
- opOverXMM10 = `inverseFFFF`
- opSrcXMM10 = `-`
- maskXMM10 = `-`
-
- opOverLoadXMMRegs = `
- // gather := XMM(see above) // PSHUFB shuffle mask.
- // scatterAndMulBy0x101 := XMM(see above) // PSHUFB shuffle mask.
- // fxAlmost65536 := XMM(0x0000ffff repeated four times) // 0xffff.
- // inverseFFFF := XMM(0x80008001 repeated four times) // Magic constant for dividing by 0xffff.
- MOVOU gather<>(SB), X6
- MOVOU scatterAndMulBy0x101<>(SB), X8
- MOVOU fxAlmost65536<>(SB), X9
- MOVOU inverseFFFF<>(SB), X10
- `
- opSrcLoadXMMRegs = `
- // gather := XMM(see above) // PSHUFB shuffle mask.
- MOVOU gather<>(SB), X6
- `
- maskLoadXMMRegs = ``
-)