diff options
author | Christopher Speller <crspeller@gmail.com> | 2018-04-16 05:37:14 -0700 |
---|---|---|
committer | Joram Wilander <jwawilander@gmail.com> | 2018-04-16 08:37:14 -0400 |
commit | 6e2cb00008cbf09e556b00f87603797fcaa47e09 (patch) | |
tree | 3c0eb55ff4226a3f024aad373140d1fb860a6404 /vendor/golang.org/x/image/vector | |
parent | bf24f51c4e1cc6286885460672f7f449e8c6f5ef (diff) | |
download | chat-6e2cb00008cbf09e556b00f87603797fcaa47e09.tar.gz chat-6e2cb00008cbf09e556b00f87603797fcaa47e09.tar.bz2 chat-6e2cb00008cbf09e556b00f87603797fcaa47e09.zip |
Depenancy upgrades and movign to dep. (#8630)
Diffstat (limited to 'vendor/golang.org/x/image/vector')
-rw-r--r-- | vendor/golang.org/x/image/vector/acc_amd64.go | 34 | ||||
-rw-r--r-- | vendor/golang.org/x/image/vector/acc_amd64.s | 1083 | ||||
-rw-r--r-- | vendor/golang.org/x/image/vector/acc_other.go | 17 | ||||
-rw-r--r-- | vendor/golang.org/x/image/vector/acc_test.go | 651 | ||||
-rw-r--r-- | vendor/golang.org/x/image/vector/gen.go | 447 | ||||
-rw-r--r-- | vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl | 171 | ||||
-rw-r--r-- | vendor/golang.org/x/image/vector/raster_fixed.go | 327 | ||||
-rw-r--r-- | vendor/golang.org/x/image/vector/raster_floating.go | 220 | ||||
-rw-r--r-- | vendor/golang.org/x/image/vector/vector.go | 472 | ||||
-rw-r--r-- | vendor/golang.org/x/image/vector/vector_test.go | 519 |
10 files changed, 0 insertions, 3941 deletions
diff --git a/vendor/golang.org/x/image/vector/acc_amd64.go b/vendor/golang.org/x/image/vector/acc_amd64.go deleted file mode 100644 index 68f6e030c..000000000 --- a/vendor/golang.org/x/image/vector/acc_amd64.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build go1.6 -// +build !noasm - -package vector - -func haveSSE4_1() bool - -var haveFixedAccumulateSIMD = haveSSE4_1() - -const haveFloatingAccumulateSIMD = true - -//go:noescape -func fixedAccumulateOpOverSIMD(dst []uint8, src []uint32) - -//go:noescape -func fixedAccumulateOpSrcSIMD(dst []uint8, src []uint32) - -//go:noescape -func fixedAccumulateMaskSIMD(buf []uint32) - -//go:noescape -func floatingAccumulateOpOverSIMD(dst []uint8, src []float32) - -//go:noescape -func floatingAccumulateOpSrcSIMD(dst []uint8, src []float32) - -//go:noescape -func floatingAccumulateMaskSIMD(dst []uint32, src []float32) diff --git a/vendor/golang.org/x/image/vector/acc_amd64.s b/vendor/golang.org/x/image/vector/acc_amd64.s deleted file mode 100644 index 6a424bcdd..000000000 --- a/vendor/golang.org/x/image/vector/acc_amd64.s +++ /dev/null @@ -1,1083 +0,0 @@ -// generated by go run gen.go; DO NOT EDIT - -// +build !appengine -// +build gc -// +build go1.6 -// +build !noasm - -#include "textflag.h" - -// fl is short for floating point math. fx is short for fixed point math. - -DATA flAlmost65536<>+0x00(SB)/8, $0x477fffff477fffff -DATA flAlmost65536<>+0x08(SB)/8, $0x477fffff477fffff -DATA flOne<>+0x00(SB)/8, $0x3f8000003f800000 -DATA flOne<>+0x08(SB)/8, $0x3f8000003f800000 -DATA flSignMask<>+0x00(SB)/8, $0x7fffffff7fffffff -DATA flSignMask<>+0x08(SB)/8, $0x7fffffff7fffffff - -// scatterAndMulBy0x101 is a PSHUFB mask that brings the low four bytes of an -// XMM register to the low byte of that register's four uint32 values. It -// duplicates those bytes, effectively multiplying each uint32 by 0x101. -// -// It transforms a little-endian 16-byte XMM value from -// ijkl???????????? -// to -// ii00jj00kk00ll00 -DATA scatterAndMulBy0x101<>+0x00(SB)/8, $0x8080010180800000 -DATA scatterAndMulBy0x101<>+0x08(SB)/8, $0x8080030380800202 - -// gather is a PSHUFB mask that brings the second-lowest byte of the XMM -// register's four uint32 values to the low four bytes of that register. -// -// It transforms a little-endian 16-byte XMM value from -// ?i???j???k???l?? -// to -// ijkl000000000000 -DATA gather<>+0x00(SB)/8, $0x808080800d090501 -DATA gather<>+0x08(SB)/8, $0x8080808080808080 - -DATA fxAlmost65536<>+0x00(SB)/8, $0x0000ffff0000ffff -DATA fxAlmost65536<>+0x08(SB)/8, $0x0000ffff0000ffff -DATA inverseFFFF<>+0x00(SB)/8, $0x8000800180008001 -DATA inverseFFFF<>+0x08(SB)/8, $0x8000800180008001 - -GLOBL flAlmost65536<>(SB), (NOPTR+RODATA), $16 -GLOBL flOne<>(SB), (NOPTR+RODATA), $16 -GLOBL flSignMask<>(SB), (NOPTR+RODATA), $16 -GLOBL scatterAndMulBy0x101<>(SB), (NOPTR+RODATA), $16 -GLOBL gather<>(SB), (NOPTR+RODATA), $16 -GLOBL fxAlmost65536<>(SB), (NOPTR+RODATA), $16 -GLOBL inverseFFFF<>(SB), (NOPTR+RODATA), $16 - -// func haveSSE4_1() bool -TEXT ·haveSSE4_1(SB), NOSPLIT, $0 - MOVQ $1, AX - CPUID - SHRQ $19, CX - ANDQ $1, CX - MOVB CX, ret+0(FP) - RET - -// ---------------------------------------------------------------------------- - -// func fixedAccumulateOpOverSIMD(dst []uint8, src []uint32) -// -// XMM registers. Variable names are per -// https://github.com/google/font-rs/blob/master/src/accumulate.c -// -// xmm0 scratch -// xmm1 x -// xmm2 y, z -// xmm3 - -// xmm4 - -// xmm5 fxAlmost65536 -// xmm6 gather -// xmm7 offset -// xmm8 scatterAndMulBy0x101 -// xmm9 fxAlmost65536 -// xmm10 inverseFFFF -TEXT ·fixedAccumulateOpOverSIMD(SB), NOSPLIT, $0-48 - - MOVQ dst_base+0(FP), DI - MOVQ dst_len+8(FP), BX - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R10 - - // Sanity check that len(dst) >= len(src). - CMPQ BX, R10 - JLT fxAccOpOverEnd - - // R10 = len(src) &^ 3 - // R11 = len(src) - MOVQ R10, R11 - ANDQ $-4, R10 - - // fxAlmost65536 := XMM(0x0000ffff repeated four times) // Maximum of an uint16. - MOVOU fxAlmost65536<>(SB), X5 - - // gather := XMM(see above) // PSHUFB shuffle mask. - // scatterAndMulBy0x101 := XMM(see above) // PSHUFB shuffle mask. - // fxAlmost65536 := XMM(0x0000ffff repeated four times) // 0xffff. - // inverseFFFF := XMM(0x80008001 repeated four times) // Magic constant for dividing by 0xffff. - MOVOU gather<>(SB), X6 - MOVOU scatterAndMulBy0x101<>(SB), X8 - MOVOU fxAlmost65536<>(SB), X9 - MOVOU inverseFFFF<>(SB), X10 - - // offset := XMM(0x00000000 repeated four times) // Cumulative sum. - XORPS X7, X7 - - // i := 0 - MOVQ $0, R9 - -fxAccOpOverLoop4: - // for i < (len(src) &^ 3) - CMPQ R9, R10 - JAE fxAccOpOverLoop1 - - // x = XMM(s0, s1, s2, s3) - // - // Where s0 is src[i+0], s1 is src[i+1], etc. - MOVOU (SI), X1 - - // scratch = XMM(0, s0, s1, s2) - // x += scratch // yields x == XMM(s0, s0+s1, s1+s2, s2+s3) - MOVOU X1, X0 - PSLLO $4, X0 - PADDD X0, X1 - - // scratch = XMM(0, 0, 0, 0) - // scratch = XMM(scratch@0, scratch@0, x@0, x@1) // yields scratch == XMM(0, 0, s0, s0+s1) - // x += scratch // yields x == XMM(s0, s0+s1, s0+s1+s2, s0+s1+s2+s3) - XORPS X0, X0 - SHUFPS $0x40, X1, X0 - PADDD X0, X1 - - // x += offset - PADDD X7, X1 - - // y = abs(x) - // y >>= 2 // Shift by 2*ϕ - 16. - // y = min(y, fxAlmost65536) - // - // pabsd %xmm1,%xmm2 - // psrld $0x2,%xmm2 - // pminud %xmm5,%xmm2 - // - // Hopefully we'll get these opcode mnemonics into the assembler for Go - // 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but - // it's similar. - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1 - BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02 - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5 - - // z = convertToInt32(y) - // No-op. - - // Blend over the dst's prior value. SIMD for i in 0..3: - // - // dstA := uint32(dst[i]) * 0x101 - // maskA := z@i - // outA := dstA*(0xffff-maskA)/0xffff + maskA - // dst[i] = uint8(outA >> 8) - // - // First, set X0 to dstA*(0xfff-maskA). - MOVL (DI), X0 - PSHUFB X8, X0 - MOVOU X9, X11 - PSUBL X2, X11 - PMULLD X11, X0 - - // We implement uint32 division by 0xffff as multiplication by a magic - // constant (0x800080001) and then a shift by a magic constant (47). - // See TestDivideByFFFF for a justification. - // - // That multiplication widens from uint32 to uint64, so we have to - // duplicate and shift our four uint32s from one XMM register (X0) to - // two XMM registers (X0 and X11). - // - // Move the second and fourth uint32s in X0 to be the first and third - // uint32s in X11. - MOVOU X0, X11 - PSRLQ $32, X11 - - // Multiply by magic, shift by magic. - // - // pmuludq %xmm10,%xmm0 - // pmuludq %xmm10,%xmm11 - BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2 - BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda - PSRLQ $47, X0 - PSRLQ $47, X11 - - // Merge the two registers back to one, X11, and add maskA. - PSLLQ $32, X11 - XORPS X0, X11 - PADDD X11, X2 - - // As per opSrcStore4, shuffle and copy the 4 second-lowest bytes. - PSHUFB X6, X2 - MOVL X2, (DI) - - // offset = XMM(x@3, x@3, x@3, x@3) - MOVOU X1, X7 - SHUFPS $0xff, X1, X7 - - // i += 4 - // dst = dst[4:] - // src = src[4:] - ADDQ $4, R9 - ADDQ $4, DI - ADDQ $16, SI - JMP fxAccOpOverLoop4 - -fxAccOpOverLoop1: - // for i < len(src) - CMPQ R9, R11 - JAE fxAccOpOverEnd - - // x = src[i] + offset - MOVL (SI), X1 - PADDD X7, X1 - - // y = abs(x) - // y >>= 2 // Shift by 2*ϕ - 16. - // y = min(y, fxAlmost65536) - // - // pabsd %xmm1,%xmm2 - // psrld $0x2,%xmm2 - // pminud %xmm5,%xmm2 - // - // Hopefully we'll get these opcode mnemonics into the assembler for Go - // 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but - // it's similar. - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1 - BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02 - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5 - - // z = convertToInt32(y) - // No-op. - - // Blend over the dst's prior value. - // - // dstA := uint32(dst[0]) * 0x101 - // maskA := z - // outA := dstA*(0xffff-maskA)/0xffff + maskA - // dst[0] = uint8(outA >> 8) - MOVBLZX (DI), R12 - IMULL $0x101, R12 - MOVL X2, R13 - MOVL $0xffff, AX - SUBL R13, AX - MULL R12 // MULL's implicit arg is AX, and the result is stored in DX:AX. - MOVL $0x80008001, BX // Divide by 0xffff is to first multiply by a magic constant... - MULL BX // MULL's implicit arg is AX, and the result is stored in DX:AX. - SHRL $15, DX // ...and then shift by another magic constant (47 - 32 = 15). - ADDL DX, R13 - SHRL $8, R13 - MOVB R13, (DI) - - // offset = x - MOVOU X1, X7 - - // i += 1 - // dst = dst[1:] - // src = src[1:] - ADDQ $1, R9 - ADDQ $1, DI - ADDQ $4, SI - JMP fxAccOpOverLoop1 - -fxAccOpOverEnd: - RET - -// ---------------------------------------------------------------------------- - -// func fixedAccumulateOpSrcSIMD(dst []uint8, src []uint32) -// -// XMM registers. Variable names are per -// https://github.com/google/font-rs/blob/master/src/accumulate.c -// -// xmm0 scratch -// xmm1 x -// xmm2 y, z -// xmm3 - -// xmm4 - -// xmm5 fxAlmost65536 -// xmm6 gather -// xmm7 offset -// xmm8 - -// xmm9 - -// xmm10 - -TEXT ·fixedAccumulateOpSrcSIMD(SB), NOSPLIT, $0-48 - - MOVQ dst_base+0(FP), DI - MOVQ dst_len+8(FP), BX - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R10 - - // Sanity check that len(dst) >= len(src). - CMPQ BX, R10 - JLT fxAccOpSrcEnd - - // R10 = len(src) &^ 3 - // R11 = len(src) - MOVQ R10, R11 - ANDQ $-4, R10 - - // fxAlmost65536 := XMM(0x0000ffff repeated four times) // Maximum of an uint16. - MOVOU fxAlmost65536<>(SB), X5 - - // gather := XMM(see above) // PSHUFB shuffle mask. - MOVOU gather<>(SB), X6 - - // offset := XMM(0x00000000 repeated four times) // Cumulative sum. - XORPS X7, X7 - - // i := 0 - MOVQ $0, R9 - -fxAccOpSrcLoop4: - // for i < (len(src) &^ 3) - CMPQ R9, R10 - JAE fxAccOpSrcLoop1 - - // x = XMM(s0, s1, s2, s3) - // - // Where s0 is src[i+0], s1 is src[i+1], etc. - MOVOU (SI), X1 - - // scratch = XMM(0, s0, s1, s2) - // x += scratch // yields x == XMM(s0, s0+s1, s1+s2, s2+s3) - MOVOU X1, X0 - PSLLO $4, X0 - PADDD X0, X1 - - // scratch = XMM(0, 0, 0, 0) - // scratch = XMM(scratch@0, scratch@0, x@0, x@1) // yields scratch == XMM(0, 0, s0, s0+s1) - // x += scratch // yields x == XMM(s0, s0+s1, s0+s1+s2, s0+s1+s2+s3) - XORPS X0, X0 - SHUFPS $0x40, X1, X0 - PADDD X0, X1 - - // x += offset - PADDD X7, X1 - - // y = abs(x) - // y >>= 2 // Shift by 2*ϕ - 16. - // y = min(y, fxAlmost65536) - // - // pabsd %xmm1,%xmm2 - // psrld $0x2,%xmm2 - // pminud %xmm5,%xmm2 - // - // Hopefully we'll get these opcode mnemonics into the assembler for Go - // 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but - // it's similar. - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1 - BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02 - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5 - - // z = convertToInt32(y) - // No-op. - - // z = shuffleTheSecondLowestBytesOfEach4ByteElement(z) - // copy(dst[:4], low4BytesOf(z)) - PSHUFB X6, X2 - MOVL X2, (DI) - - // offset = XMM(x@3, x@3, x@3, x@3) - MOVOU X1, X7 - SHUFPS $0xff, X1, X7 - - // i += 4 - // dst = dst[4:] - // src = src[4:] - ADDQ $4, R9 - ADDQ $4, DI - ADDQ $16, SI - JMP fxAccOpSrcLoop4 - -fxAccOpSrcLoop1: - // for i < len(src) - CMPQ R9, R11 - JAE fxAccOpSrcEnd - - // x = src[i] + offset - MOVL (SI), X1 - PADDD X7, X1 - - // y = abs(x) - // y >>= 2 // Shift by 2*ϕ - 16. - // y = min(y, fxAlmost65536) - // - // pabsd %xmm1,%xmm2 - // psrld $0x2,%xmm2 - // pminud %xmm5,%xmm2 - // - // Hopefully we'll get these opcode mnemonics into the assembler for Go - // 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but - // it's similar. - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1 - BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02 - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5 - - // z = convertToInt32(y) - // No-op. - - // dst[0] = uint8(z>>8) - MOVL X2, BX - SHRL $8, BX - MOVB BX, (DI) - - // offset = x - MOVOU X1, X7 - - // i += 1 - // dst = dst[1:] - // src = src[1:] - ADDQ $1, R9 - ADDQ $1, DI - ADDQ $4, SI - JMP fxAccOpSrcLoop1 - -fxAccOpSrcEnd: - RET - -// ---------------------------------------------------------------------------- - -// func fixedAccumulateMaskSIMD(buf []uint32) -// -// XMM registers. Variable names are per -// https://github.com/google/font-rs/blob/master/src/accumulate.c -// -// xmm0 scratch -// xmm1 x -// xmm2 y, z -// xmm3 - -// xmm4 - -// xmm5 fxAlmost65536 -// xmm6 - -// xmm7 offset -// xmm8 - -// xmm9 - -// xmm10 - -TEXT ·fixedAccumulateMaskSIMD(SB), NOSPLIT, $0-24 - - MOVQ buf_base+0(FP), DI - MOVQ buf_len+8(FP), BX - MOVQ buf_base+0(FP), SI - MOVQ buf_len+8(FP), R10 - - // R10 = len(src) &^ 3 - // R11 = len(src) - MOVQ R10, R11 - ANDQ $-4, R10 - - // fxAlmost65536 := XMM(0x0000ffff repeated four times) // Maximum of an uint16. - MOVOU fxAlmost65536<>(SB), X5 - - // offset := XMM(0x00000000 repeated four times) // Cumulative sum. - XORPS X7, X7 - - // i := 0 - MOVQ $0, R9 - -fxAccMaskLoop4: - // for i < (len(src) &^ 3) - CMPQ R9, R10 - JAE fxAccMaskLoop1 - - // x = XMM(s0, s1, s2, s3) - // - // Where s0 is src[i+0], s1 is src[i+1], etc. - MOVOU (SI), X1 - - // scratch = XMM(0, s0, s1, s2) - // x += scratch // yields x == XMM(s0, s0+s1, s1+s2, s2+s3) - MOVOU X1, X0 - PSLLO $4, X0 - PADDD X0, X1 - - // scratch = XMM(0, 0, 0, 0) - // scratch = XMM(scratch@0, scratch@0, x@0, x@1) // yields scratch == XMM(0, 0, s0, s0+s1) - // x += scratch // yields x == XMM(s0, s0+s1, s0+s1+s2, s0+s1+s2+s3) - XORPS X0, X0 - SHUFPS $0x40, X1, X0 - PADDD X0, X1 - - // x += offset - PADDD X7, X1 - - // y = abs(x) - // y >>= 2 // Shift by 2*ϕ - 16. - // y = min(y, fxAlmost65536) - // - // pabsd %xmm1,%xmm2 - // psrld $0x2,%xmm2 - // pminud %xmm5,%xmm2 - // - // Hopefully we'll get these opcode mnemonics into the assembler for Go - // 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but - // it's similar. - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1 - BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02 - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5 - - // z = convertToInt32(y) - // No-op. - - // copy(dst[:4], z) - MOVOU X2, (DI) - - // offset = XMM(x@3, x@3, x@3, x@3) - MOVOU X1, X7 - SHUFPS $0xff, X1, X7 - - // i += 4 - // dst = dst[4:] - // src = src[4:] - ADDQ $4, R9 - ADDQ $16, DI - ADDQ $16, SI - JMP fxAccMaskLoop4 - -fxAccMaskLoop1: - // for i < len(src) - CMPQ R9, R11 - JAE fxAccMaskEnd - - // x = src[i] + offset - MOVL (SI), X1 - PADDD X7, X1 - - // y = abs(x) - // y >>= 2 // Shift by 2*ϕ - 16. - // y = min(y, fxAlmost65536) - // - // pabsd %xmm1,%xmm2 - // psrld $0x2,%xmm2 - // pminud %xmm5,%xmm2 - // - // Hopefully we'll get these opcode mnemonics into the assembler for Go - // 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but - // it's similar. - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1 - BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02 - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5 - - // z = convertToInt32(y) - // No-op. - - // dst[0] = uint32(z) - MOVL X2, (DI) - - // offset = x - MOVOU X1, X7 - - // i += 1 - // dst = dst[1:] - // src = src[1:] - ADDQ $1, R9 - ADDQ $4, DI - ADDQ $4, SI - JMP fxAccMaskLoop1 - -fxAccMaskEnd: - RET - -// ---------------------------------------------------------------------------- - -// func floatingAccumulateOpOverSIMD(dst []uint8, src []float32) -// -// XMM registers. Variable names are per -// https://github.com/google/font-rs/blob/master/src/accumulate.c -// -// xmm0 scratch -// xmm1 x -// xmm2 y, z -// xmm3 flSignMask -// xmm4 flOne -// xmm5 flAlmost65536 -// xmm6 gather -// xmm7 offset -// xmm8 scatterAndMulBy0x101 -// xmm9 fxAlmost65536 -// xmm10 inverseFFFF -TEXT ·floatingAccumulateOpOverSIMD(SB), NOSPLIT, $8-48 - - MOVQ dst_base+0(FP), DI - MOVQ dst_len+8(FP), BX - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R10 - - // Sanity check that len(dst) >= len(src). - CMPQ BX, R10 - JLT flAccOpOverEnd - - // R10 = len(src) &^ 3 - // R11 = len(src) - MOVQ R10, R11 - ANDQ $-4, R10 - - // Prepare to set MXCSR bits 13 and 14, so that the CVTPS2PL below is - // "Round To Zero". - STMXCSR mxcsrOrig-8(SP) - MOVL mxcsrOrig-8(SP), AX - ORL $0x6000, AX - MOVL AX, mxcsrNew-4(SP) - - // flSignMask := XMM(0x7fffffff repeated four times) // All but the sign bit of a float32. - // flOne := XMM(0x3f800000 repeated four times) // 1 as a float32. - // flAlmost65536 := XMM(0x477fffff repeated four times) // 255.99998 * 256 as a float32. - MOVOU flSignMask<>(SB), X3 - MOVOU flOne<>(SB), X4 - MOVOU flAlmost65536<>(SB), X5 - - // gather := XMM(see above) // PSHUFB shuffle mask. - // scatterAndMulBy0x101 := XMM(see above) // PSHUFB shuffle mask. - // fxAlmost65536 := XMM(0x0000ffff repeated four times) // 0xffff. - // inverseFFFF := XMM(0x80008001 repeated four times) // Magic constant for dividing by 0xffff. - MOVOU gather<>(SB), X6 - MOVOU scatterAndMulBy0x101<>(SB), X8 - MOVOU fxAlmost65536<>(SB), X9 - MOVOU inverseFFFF<>(SB), X10 - - // offset := XMM(0x00000000 repeated four times) // Cumulative sum. - XORPS X7, X7 - - // i := 0 - MOVQ $0, R9 - -flAccOpOverLoop4: - // for i < (len(src) &^ 3) - CMPQ R9, R10 - JAE flAccOpOverLoop1 - - // x = XMM(s0, s1, s2, s3) - // - // Where s0 is src[i+0], s1 is src[i+1], etc. - MOVOU (SI), X1 - - // scratch = XMM(0, s0, s1, s2) - // x += scratch // yields x == XMM(s0, s0+s1, s1+s2, s2+s3) - MOVOU X1, X0 - PSLLO $4, X0 - ADDPS X0, X1 - - // scratch = XMM(0, 0, 0, 0) - // scratch = XMM(scratch@0, scratch@0, x@0, x@1) // yields scratch == XMM(0, 0, s0, s0+s1) - // x += scratch // yields x == XMM(s0, s0+s1, s0+s1+s2, s0+s1+s2+s3) - XORPS X0, X0 - SHUFPS $0x40, X1, X0 - ADDPS X0, X1 - - // x += offset - ADDPS X7, X1 - - // y = x & flSignMask - // y = min(y, flOne) - // y = mul(y, flAlmost65536) - MOVOU X3, X2 - ANDPS X1, X2 - MINPS X4, X2 - MULPS X5, X2 - - // z = convertToInt32(y) - LDMXCSR mxcsrNew-4(SP) - CVTPS2PL X2, X2 - LDMXCSR mxcsrOrig-8(SP) - - // Blend over the dst's prior value. SIMD for i in 0..3: - // - // dstA := uint32(dst[i]) * 0x101 - // maskA := z@i - // outA := dstA*(0xffff-maskA)/0xffff + maskA - // dst[i] = uint8(outA >> 8) - // - // First, set X0 to dstA*(0xfff-maskA). - MOVL (DI), X0 - PSHUFB X8, X0 - MOVOU X9, X11 - PSUBL X2, X11 - PMULLD X11, X0 - - // We implement uint32 division by 0xffff as multiplication by a magic - // constant (0x800080001) and then a shift by a magic constant (47). - // See TestDivideByFFFF for a justification. - // - // That multiplication widens from uint32 to uint64, so we have to - // duplicate and shift our four uint32s from one XMM register (X0) to - // two XMM registers (X0 and X11). - // - // Move the second and fourth uint32s in X0 to be the first and third - // uint32s in X11. - MOVOU X0, X11 - PSRLQ $32, X11 - - // Multiply by magic, shift by magic. - // - // pmuludq %xmm10,%xmm0 - // pmuludq %xmm10,%xmm11 - BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2 - BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda - PSRLQ $47, X0 - PSRLQ $47, X11 - - // Merge the two registers back to one, X11, and add maskA. - PSLLQ $32, X11 - XORPS X0, X11 - PADDD X11, X2 - - // As per opSrcStore4, shuffle and copy the 4 second-lowest bytes. - PSHUFB X6, X2 - MOVL X2, (DI) - - // offset = XMM(x@3, x@3, x@3, x@3) - MOVOU X1, X7 - SHUFPS $0xff, X1, X7 - - // i += 4 - // dst = dst[4:] - // src = src[4:] - ADDQ $4, R9 - ADDQ $4, DI - ADDQ $16, SI - JMP flAccOpOverLoop4 - -flAccOpOverLoop1: - // for i < len(src) - CMPQ R9, R11 - JAE flAccOpOverEnd - - // x = src[i] + offset - MOVL (SI), X1 - ADDPS X7, X1 - - // y = x & flSignMask - // y = min(y, flOne) - // y = mul(y, flAlmost65536) - MOVOU X3, X2 - ANDPS X1, X2 - MINPS X4, X2 - MULPS X5, X2 - - // z = convertToInt32(y) - LDMXCSR mxcsrNew-4(SP) - CVTPS2PL X2, X2 - LDMXCSR mxcsrOrig-8(SP) - - // Blend over the dst's prior value. - // - // dstA := uint32(dst[0]) * 0x101 - // maskA := z - // outA := dstA*(0xffff-maskA)/0xffff + maskA - // dst[0] = uint8(outA >> 8) - MOVBLZX (DI), R12 - IMULL $0x101, R12 - MOVL X2, R13 - MOVL $0xffff, AX - SUBL R13, AX - MULL R12 // MULL's implicit arg is AX, and the result is stored in DX:AX. - MOVL $0x80008001, BX // Divide by 0xffff is to first multiply by a magic constant... - MULL BX // MULL's implicit arg is AX, and the result is stored in DX:AX. - SHRL $15, DX // ...and then shift by another magic constant (47 - 32 = 15). - ADDL DX, R13 - SHRL $8, R13 - MOVB R13, (DI) - - // offset = x - MOVOU X1, X7 - - // i += 1 - // dst = dst[1:] - // src = src[1:] - ADDQ $1, R9 - ADDQ $1, DI - ADDQ $4, SI - JMP flAccOpOverLoop1 - -flAccOpOverEnd: - RET - -// ---------------------------------------------------------------------------- - -// func floatingAccumulateOpSrcSIMD(dst []uint8, src []float32) -// -// XMM registers. Variable names are per -// https://github.com/google/font-rs/blob/master/src/accumulate.c -// -// xmm0 scratch -// xmm1 x -// xmm2 y, z -// xmm3 flSignMask -// xmm4 flOne -// xmm5 flAlmost65536 -// xmm6 gather -// xmm7 offset -// xmm8 - -// xmm9 - -// xmm10 - -TEXT ·floatingAccumulateOpSrcSIMD(SB), NOSPLIT, $8-48 - - MOVQ dst_base+0(FP), DI - MOVQ dst_len+8(FP), BX - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R10 - - // Sanity check that len(dst) >= len(src). - CMPQ BX, R10 - JLT flAccOpSrcEnd - - // R10 = len(src) &^ 3 - // R11 = len(src) - MOVQ R10, R11 - ANDQ $-4, R10 - - // Prepare to set MXCSR bits 13 and 14, so that the CVTPS2PL below is - // "Round To Zero". - STMXCSR mxcsrOrig-8(SP) - MOVL mxcsrOrig-8(SP), AX - ORL $0x6000, AX - MOVL AX, mxcsrNew-4(SP) - - // flSignMask := XMM(0x7fffffff repeated four times) // All but the sign bit of a float32. - // flOne := XMM(0x3f800000 repeated four times) // 1 as a float32. - // flAlmost65536 := XMM(0x477fffff repeated four times) // 255.99998 * 256 as a float32. - MOVOU flSignMask<>(SB), X3 - MOVOU flOne<>(SB), X4 - MOVOU flAlmost65536<>(SB), X5 - - // gather := XMM(see above) // PSHUFB shuffle mask. - MOVOU gather<>(SB), X6 - - // offset := XMM(0x00000000 repeated four times) // Cumulative sum. - XORPS X7, X7 - - // i := 0 - MOVQ $0, R9 - -flAccOpSrcLoop4: - // for i < (len(src) &^ 3) - CMPQ R9, R10 - JAE flAccOpSrcLoop1 - - // x = XMM(s0, s1, s2, s3) - // - // Where s0 is src[i+0], s1 is src[i+1], etc. - MOVOU (SI), X1 - - // scratch = XMM(0, s0, s1, s2) - // x += scratch // yields x == XMM(s0, s0+s1, s1+s2, s2+s3) - MOVOU X1, X0 - PSLLO $4, X0 - ADDPS X0, X1 - - // scratch = XMM(0, 0, 0, 0) - // scratch = XMM(scratch@0, scratch@0, x@0, x@1) // yields scratch == XMM(0, 0, s0, s0+s1) - // x += scratch // yields x == XMM(s0, s0+s1, s0+s1+s2, s0+s1+s2+s3) - XORPS X0, X0 - SHUFPS $0x40, X1, X0 - ADDPS X0, X1 - - // x += offset - ADDPS X7, X1 - - // y = x & flSignMask - // y = min(y, flOne) - // y = mul(y, flAlmost65536) - MOVOU X3, X2 - ANDPS X1, X2 - MINPS X4, X2 - MULPS X5, X2 - - // z = convertToInt32(y) - LDMXCSR mxcsrNew-4(SP) - CVTPS2PL X2, X2 - LDMXCSR mxcsrOrig-8(SP) - - // z = shuffleTheSecondLowestBytesOfEach4ByteElement(z) - // copy(dst[:4], low4BytesOf(z)) - PSHUFB X6, X2 - MOVL X2, (DI) - - // offset = XMM(x@3, x@3, x@3, x@3) - MOVOU X1, X7 - SHUFPS $0xff, X1, X7 - - // i += 4 - // dst = dst[4:] - // src = src[4:] - ADDQ $4, R9 - ADDQ $4, DI - ADDQ $16, SI - JMP flAccOpSrcLoop4 - -flAccOpSrcLoop1: - // for i < len(src) - CMPQ R9, R11 - JAE flAccOpSrcEnd - - // x = src[i] + offset - MOVL (SI), X1 - ADDPS X7, X1 - - // y = x & flSignMask - // y = min(y, flOne) - // y = mul(y, flAlmost65536) - MOVOU X3, X2 - ANDPS X1, X2 - MINPS X4, X2 - MULPS X5, X2 - - // z = convertToInt32(y) - LDMXCSR mxcsrNew-4(SP) - CVTPS2PL X2, X2 - LDMXCSR mxcsrOrig-8(SP) - - // dst[0] = uint8(z>>8) - MOVL X2, BX - SHRL $8, BX - MOVB BX, (DI) - - // offset = x - MOVOU X1, X7 - - // i += 1 - // dst = dst[1:] - // src = src[1:] - ADDQ $1, R9 - ADDQ $1, DI - ADDQ $4, SI - JMP flAccOpSrcLoop1 - -flAccOpSrcEnd: - RET - -// ---------------------------------------------------------------------------- - -// func floatingAccumulateMaskSIMD(dst []uint32, src []float32) -// -// XMM registers. Variable names are per -// https://github.com/google/font-rs/blob/master/src/accumulate.c -// -// xmm0 scratch -// xmm1 x -// xmm2 y, z -// xmm3 flSignMask -// xmm4 flOne -// xmm5 flAlmost65536 -// xmm6 - -// xmm7 offset -// xmm8 - -// xmm9 - -// xmm10 - -TEXT ·floatingAccumulateMaskSIMD(SB), NOSPLIT, $8-48 - - MOVQ dst_base+0(FP), DI - MOVQ dst_len+8(FP), BX - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R10 - - // Sanity check that len(dst) >= len(src). - CMPQ BX, R10 - JLT flAccMaskEnd - - // R10 = len(src) &^ 3 - // R11 = len(src) - MOVQ R10, R11 - ANDQ $-4, R10 - - // Prepare to set MXCSR bits 13 and 14, so that the CVTPS2PL below is - // "Round To Zero". - STMXCSR mxcsrOrig-8(SP) - MOVL mxcsrOrig-8(SP), AX - ORL $0x6000, AX - MOVL AX, mxcsrNew-4(SP) - - // flSignMask := XMM(0x7fffffff repeated four times) // All but the sign bit of a float32. - // flOne := XMM(0x3f800000 repeated four times) // 1 as a float32. - // flAlmost65536 := XMM(0x477fffff repeated four times) // 255.99998 * 256 as a float32. - MOVOU flSignMask<>(SB), X3 - MOVOU flOne<>(SB), X4 - MOVOU flAlmost65536<>(SB), X5 - - // offset := XMM(0x00000000 repeated four times) // Cumulative sum. - XORPS X7, X7 - - // i := 0 - MOVQ $0, R9 - -flAccMaskLoop4: - // for i < (len(src) &^ 3) - CMPQ R9, R10 - JAE flAccMaskLoop1 - - // x = XMM(s0, s1, s2, s3) - // - // Where s0 is src[i+0], s1 is src[i+1], etc. - MOVOU (SI), X1 - - // scratch = XMM(0, s0, s1, s2) - // x += scratch // yields x == XMM(s0, s0+s1, s1+s2, s2+s3) - MOVOU X1, X0 - PSLLO $4, X0 - ADDPS X0, X1 - - // scratch = XMM(0, 0, 0, 0) - // scratch = XMM(scratch@0, scratch@0, x@0, x@1) // yields scratch == XMM(0, 0, s0, s0+s1) - // x += scratch // yields x == XMM(s0, s0+s1, s0+s1+s2, s0+s1+s2+s3) - XORPS X0, X0 - SHUFPS $0x40, X1, X0 - ADDPS X0, X1 - - // x += offset - ADDPS X7, X1 - - // y = x & flSignMask - // y = min(y, flOne) - // y = mul(y, flAlmost65536) - MOVOU X3, X2 - ANDPS X1, X2 - MINPS X4, X2 - MULPS X5, X2 - - // z = convertToInt32(y) - LDMXCSR mxcsrNew-4(SP) - CVTPS2PL X2, X2 - LDMXCSR mxcsrOrig-8(SP) - - // copy(dst[:4], z) - MOVOU X2, (DI) - - // offset = XMM(x@3, x@3, x@3, x@3) - MOVOU X1, X7 - SHUFPS $0xff, X1, X7 - - // i += 4 - // dst = dst[4:] - // src = src[4:] - ADDQ $4, R9 - ADDQ $16, DI - ADDQ $16, SI - JMP flAccMaskLoop4 - -flAccMaskLoop1: - // for i < len(src) - CMPQ R9, R11 - JAE flAccMaskEnd - - // x = src[i] + offset - MOVL (SI), X1 - ADDPS X7, X1 - - // y = x & flSignMask - // y = min(y, flOne) - // y = mul(y, flAlmost65536) - MOVOU X3, X2 - ANDPS X1, X2 - MINPS X4, X2 - MULPS X5, X2 - - // z = convertToInt32(y) - LDMXCSR mxcsrNew-4(SP) - CVTPS2PL X2, X2 - LDMXCSR mxcsrOrig-8(SP) - - // dst[0] = uint32(z) - MOVL X2, (DI) - - // offset = x - MOVOU X1, X7 - - // i += 1 - // dst = dst[1:] - // src = src[1:] - ADDQ $1, R9 - ADDQ $4, DI - ADDQ $4, SI - JMP flAccMaskLoop1 - -flAccMaskEnd: - RET diff --git a/vendor/golang.org/x/image/vector/acc_other.go b/vendor/golang.org/x/image/vector/acc_other.go deleted file mode 100644 index 30425beed..000000000 --- a/vendor/golang.org/x/image/vector/acc_other.go +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64 appengine !gc !go1.6 noasm - -package vector - -const haveFixedAccumulateSIMD = false -const haveFloatingAccumulateSIMD = false - -func fixedAccumulateOpOverSIMD(dst []uint8, src []uint32) {} -func fixedAccumulateOpSrcSIMD(dst []uint8, src []uint32) {} -func fixedAccumulateMaskSIMD(buf []uint32) {} -func floatingAccumulateOpOverSIMD(dst []uint8, src []float32) {} -func floatingAccumulateOpSrcSIMD(dst []uint8, src []float32) {} -func floatingAccumulateMaskSIMD(dst []uint32, src []float32) {} diff --git a/vendor/golang.org/x/image/vector/acc_test.go b/vendor/golang.org/x/image/vector/acc_test.go deleted file mode 100644 index d80f7651c..000000000 --- a/vendor/golang.org/x/image/vector/acc_test.go +++ /dev/null @@ -1,651 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package vector - -import ( - "bytes" - "fmt" - "math/rand" - "testing" -) - -// TestDivideByFFFF tests that dividing by 0xffff is equivalent to multiplying -// and then shifting by magic constants. The Go compiler itself issues this -// multiply-and-shift for a division by the constant value 0xffff. This trick -// is used in the asm code as the GOARCH=amd64 SIMD instructions have parallel -// multiply but not parallel divide. -// -// There's undoubtedly a justification somewhere in Hacker's Delight chapter 10 -// "Integer Division by Constants", but I don't have a more specific link. -// -// http://www.hackersdelight.org/divcMore.pdf and -// http://www.hackersdelight.org/magic.htm -func TestDivideByFFFF(t *testing.T) { - const mul, shift = 0x80008001, 47 - rng := rand.New(rand.NewSource(1)) - for i := 0; i < 20000; i++ { - u := rng.Uint32() - got := uint32((uint64(u) * mul) >> shift) - want := u / 0xffff - if got != want { - t.Fatalf("i=%d, u=%#08x: got %#08x, want %#08x", i, u, got, want) - } - } -} - -// TestXxxSIMDUnaligned tests that unaligned SIMD loads/stores don't crash. - -func TestFixedAccumulateSIMDUnaligned(t *testing.T) { - if !haveFixedAccumulateSIMD { - t.Skip("No SIMD implemention") - } - - dst := make([]uint8, 64) - src := make([]uint32, 64) - for d := 0; d < 16; d++ { - for s := 0; s < 16; s++ { - fixedAccumulateOpSrcSIMD(dst[d:d+32], src[s:s+32]) - } - } -} - -func TestFloatingAccumulateSIMDUnaligned(t *testing.T) { - if !haveFloatingAccumulateSIMD { - t.Skip("No SIMD implemention") - } - - dst := make([]uint8, 64) - src := make([]float32, 64) - for d := 0; d < 16; d++ { - for s := 0; s < 16; s++ { - floatingAccumulateOpSrcSIMD(dst[d:d+32], src[s:s+32]) - } - } -} - -// TestXxxSIMDShortDst tests that the SIMD implementations don't write past the -// end of the dst buffer. - -func TestFixedAccumulateSIMDShortDst(t *testing.T) { - if !haveFixedAccumulateSIMD { - t.Skip("No SIMD implemention") - } - - const oneQuarter = uint32(int2ϕ(fxOne*fxOne)) / 4 - src := []uint32{oneQuarter, oneQuarter, oneQuarter, oneQuarter} - for i := 0; i < 4; i++ { - dst := make([]uint8, 4) - fixedAccumulateOpSrcSIMD(dst[:i], src[:i]) - for j := range dst { - if j < i { - if got := dst[j]; got == 0 { - t.Errorf("i=%d, j=%d: got %#02x, want non-zero", i, j, got) - } - } else { - if got := dst[j]; got != 0 { - t.Errorf("i=%d, j=%d: got %#02x, want zero", i, j, got) - } - } - } - } -} - -func TestFloatingAccumulateSIMDShortDst(t *testing.T) { - if !haveFloatingAccumulateSIMD { - t.Skip("No SIMD implemention") - } - - const oneQuarter = 0.25 - src := []float32{oneQuarter, oneQuarter, oneQuarter, oneQuarter} - for i := 0; i < 4; i++ { - dst := make([]uint8, 4) - floatingAccumulateOpSrcSIMD(dst[:i], src[:i]) - for j := range dst { - if j < i { - if got := dst[j]; got == 0 { - t.Errorf("i=%d, j=%d: got %#02x, want non-zero", i, j, got) - } - } else { - if got := dst[j]; got != 0 { - t.Errorf("i=%d, j=%d: got %#02x, want zero", i, j, got) - } - } - } - } -} - -func TestFixedAccumulateOpOverShort(t *testing.T) { testAcc(t, fxInShort, fxMaskShort, "over") } -func TestFixedAccumulateOpSrcShort(t *testing.T) { testAcc(t, fxInShort, fxMaskShort, "src") } -func TestFixedAccumulateMaskShort(t *testing.T) { testAcc(t, fxInShort, fxMaskShort, "mask") } -func TestFloatingAccumulateOpOverShort(t *testing.T) { testAcc(t, flInShort, flMaskShort, "over") } -func TestFloatingAccumulateOpSrcShort(t *testing.T) { testAcc(t, flInShort, flMaskShort, "src") } -func TestFloatingAccumulateMaskShort(t *testing.T) { testAcc(t, flInShort, flMaskShort, "mask") } - -func TestFixedAccumulateOpOver16(t *testing.T) { testAcc(t, fxIn16, fxMask16, "over") } -func TestFixedAccumulateOpSrc16(t *testing.T) { testAcc(t, fxIn16, fxMask16, "src") } -func TestFixedAccumulateMask16(t *testing.T) { testAcc(t, fxIn16, fxMask16, "mask") } -func TestFloatingAccumulateOpOver16(t *testing.T) { testAcc(t, flIn16, flMask16, "over") } -func TestFloatingAccumulateOpSrc16(t *testing.T) { testAcc(t, flIn16, flMask16, "src") } -func TestFloatingAccumulateMask16(t *testing.T) { testAcc(t, flIn16, flMask16, "mask") } - -func testAcc(t *testing.T, in interface{}, mask []uint32, op string) { - for _, simd := range []bool{false, true} { - maxN := 0 - switch in := in.(type) { - case []uint32: - if simd && !haveFixedAccumulateSIMD { - continue - } - maxN = len(in) - case []float32: - if simd && !haveFloatingAccumulateSIMD { - continue - } - maxN = len(in) - } - - for _, n := range []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 33, 55, 79, 96, 120, 165, 256, maxN} { - - if n > maxN { - continue - } - - var ( - got8, want8 []uint8 - got32, want32 []uint32 - ) - switch op { - case "over": - const background = 0x40 - got8 = make([]uint8, n) - for i := range got8 { - got8[i] = background - } - want8 = make([]uint8, n) - for i := range want8 { - dstA := uint32(background * 0x101) - maskA := mask[i] - outA := dstA*(0xffff-maskA)/0xffff + maskA - want8[i] = uint8(outA >> 8) - } - - case "src": - got8 = make([]uint8, n) - want8 = make([]uint8, n) - for i := range want8 { - want8[i] = uint8(mask[i] >> 8) - } - - case "mask": - got32 = make([]uint32, n) - want32 = mask[:n] - } - - switch in := in.(type) { - case []uint32: - switch op { - case "over": - if simd { - fixedAccumulateOpOverSIMD(got8, in[:n]) - } else { - fixedAccumulateOpOver(got8, in[:n]) - } - case "src": - if simd { - fixedAccumulateOpSrcSIMD(got8, in[:n]) - } else { - fixedAccumulateOpSrc(got8, in[:n]) - } - case "mask": - copy(got32, in[:n]) - if simd { - fixedAccumulateMaskSIMD(got32) - } else { - fixedAccumulateMask(got32) - } - } - case []float32: - switch op { - case "over": - if simd { - floatingAccumulateOpOverSIMD(got8, in[:n]) - } else { - floatingAccumulateOpOver(got8, in[:n]) - } - case "src": - if simd { - floatingAccumulateOpSrcSIMD(got8, in[:n]) - } else { - floatingAccumulateOpSrc(got8, in[:n]) - } - case "mask": - if simd { - floatingAccumulateMaskSIMD(got32, in[:n]) - } else { - floatingAccumulateMask(got32, in[:n]) - } - } - } - - if op != "mask" { - if !bytes.Equal(got8, want8) { - t.Errorf("simd=%t, n=%d:\ngot: % x\nwant: % x", simd, n, got8, want8) - } - } else { - if !uint32sEqual(got32, want32) { - t.Errorf("simd=%t, n=%d:\ngot: % x\nwant: % x", simd, n, got32, want32) - } - } - } - } -} - -func uint32sEqual(xs, ys []uint32) bool { - if len(xs) != len(ys) { - return false - } - for i := range xs { - if xs[i] != ys[i] { - return false - } - } - return true -} - -func float32sEqual(xs, ys []float32) bool { - if len(xs) != len(ys) { - return false - } - for i := range xs { - if xs[i] != ys[i] { - return false - } - } - return true -} - -func BenchmarkFixedAccumulateOpOver16(b *testing.B) { benchAcc(b, fxIn16, "over", false) } -func BenchmarkFixedAccumulateOpOverSIMD16(b *testing.B) { benchAcc(b, fxIn16, "over", true) } -func BenchmarkFixedAccumulateOpSrc16(b *testing.B) { benchAcc(b, fxIn16, "src", false) } -func BenchmarkFixedAccumulateOpSrcSIMD16(b *testing.B) { benchAcc(b, fxIn16, "src", true) } -func BenchmarkFixedAccumulateMask16(b *testing.B) { benchAcc(b, fxIn16, "mask", false) } -func BenchmarkFixedAccumulateMaskSIMD16(b *testing.B) { benchAcc(b, fxIn16, "mask", true) } -func BenchmarkFloatingAccumulateOpOver16(b *testing.B) { benchAcc(b, flIn16, "over", false) } -func BenchmarkFloatingAccumulateOpOverSIMD16(b *testing.B) { benchAcc(b, flIn16, "over", true) } -func BenchmarkFloatingAccumulateOpSrc16(b *testing.B) { benchAcc(b, flIn16, "src", false) } -func BenchmarkFloatingAccumulateOpSrcSIMD16(b *testing.B) { benchAcc(b, flIn16, "src", true) } -func BenchmarkFloatingAccumulateMask16(b *testing.B) { benchAcc(b, flIn16, "mask", false) } -func BenchmarkFloatingAccumulateMaskSIMD16(b *testing.B) { benchAcc(b, flIn16, "mask", true) } - -func BenchmarkFixedAccumulateOpOver64(b *testing.B) { benchAcc(b, fxIn64, "over", false) } -func BenchmarkFixedAccumulateOpOverSIMD64(b *testing.B) { benchAcc(b, fxIn64, "over", true) } -func BenchmarkFixedAccumulateOpSrc64(b *testing.B) { benchAcc(b, fxIn64, "src", false) } -func BenchmarkFixedAccumulateOpSrcSIMD64(b *testing.B) { benchAcc(b, fxIn64, "src", true) } -func BenchmarkFixedAccumulateMask64(b *testing.B) { benchAcc(b, fxIn64, "mask", false) } -func BenchmarkFixedAccumulateMaskSIMD64(b *testing.B) { benchAcc(b, fxIn64, "mask", true) } -func BenchmarkFloatingAccumulateOpOver64(b *testing.B) { benchAcc(b, flIn64, "over", false) } -func BenchmarkFloatingAccumulateOpOverSIMD64(b *testing.B) { benchAcc(b, flIn64, "over", true) } -func BenchmarkFloatingAccumulateOpSrc64(b *testing.B) { benchAcc(b, flIn64, "src", false) } -func BenchmarkFloatingAccumulateOpSrcSIMD64(b *testing.B) { benchAcc(b, flIn64, "src", true) } -func BenchmarkFloatingAccumulateMask64(b *testing.B) { benchAcc(b, flIn64, "mask", false) } -func BenchmarkFloatingAccumulateMaskSIMD64(b *testing.B) { benchAcc(b, flIn64, "mask", true) } - -func benchAcc(b *testing.B, in interface{}, op string, simd bool) { - var f func() - - switch in := in.(type) { - case []uint32: - if simd && !haveFixedAccumulateSIMD { - b.Skip("No SIMD implemention") - } - - switch op { - case "over": - dst := make([]uint8, len(in)) - if simd { - f = func() { fixedAccumulateOpOverSIMD(dst, in) } - } else { - f = func() { fixedAccumulateOpOver(dst, in) } - } - case "src": - dst := make([]uint8, len(in)) - if simd { - f = func() { fixedAccumulateOpSrcSIMD(dst, in) } - } else { - f = func() { fixedAccumulateOpSrc(dst, in) } - } - case "mask": - buf := make([]uint32, len(in)) - copy(buf, in) - if simd { - f = func() { fixedAccumulateMaskSIMD(buf) } - } else { - f = func() { fixedAccumulateMask(buf) } - } - } - - case []float32: - if simd && !haveFloatingAccumulateSIMD { - b.Skip("No SIMD implemention") - } - - switch op { - case "over": - dst := make([]uint8, len(in)) - if simd { - f = func() { floatingAccumulateOpOverSIMD(dst, in) } - } else { - f = func() { floatingAccumulateOpOver(dst, in) } - } - case "src": - dst := make([]uint8, len(in)) - if simd { - f = func() { floatingAccumulateOpSrcSIMD(dst, in) } - } else { - f = func() { floatingAccumulateOpSrc(dst, in) } - } - case "mask": - dst := make([]uint32, len(in)) - if simd { - f = func() { floatingAccumulateMaskSIMD(dst, in) } - } else { - f = func() { floatingAccumulateMask(dst, in) } - } - } - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - f() - } -} - -// itou exists because "uint32(int2ϕ(-1))" doesn't compile: constant -1 -// overflows uint32. -func itou(i int2ϕ) uint32 { - return uint32(i) -} - -var fxInShort = []uint32{ - itou(+0x08000), // +0.125, // Running sum: +0.125 - itou(-0x20000), // -0.500, // Running sum: -0.375 - itou(+0x10000), // +0.250, // Running sum: -0.125 - itou(+0x18000), // +0.375, // Running sum: +0.250 - itou(+0x08000), // +0.125, // Running sum: +0.375 - itou(+0x00000), // +0.000, // Running sum: +0.375 - itou(-0x40000), // -1.000, // Running sum: -0.625 - itou(-0x20000), // -0.500, // Running sum: -1.125 - itou(+0x10000), // +0.250, // Running sum: -0.875 - itou(+0x38000), // +0.875, // Running sum: +0.000 - itou(+0x10000), // +0.250, // Running sum: +0.250 - itou(+0x30000), // +0.750, // Running sum: +1.000 -} - -var flInShort = []float32{ - +0.125, // Running sum: +0.125 - -0.500, // Running sum: -0.375 - +0.250, // Running sum: -0.125 - +0.375, // Running sum: +0.250 - +0.125, // Running sum: +0.375 - +0.000, // Running sum: +0.375 - -1.000, // Running sum: -0.625 - -0.500, // Running sum: -1.125 - +0.250, // Running sum: -0.875 - +0.875, // Running sum: +0.000 - +0.250, // Running sum: +0.250 - +0.750, // Running sum: +1.000 -} - -// It's OK for fxMaskShort and flMaskShort to have slightly different values. -// Both the fixed and floating point implementations already have (different) -// rounding errors in the xxxLineTo methods before we get to accumulation. It's -// OK for 50% coverage (in ideal math) to be approximated by either 0x7fff or -// 0x8000. Both slices do contain checks that 0% and 100% map to 0x0000 and -// 0xffff, as does checkCornersCenter in vector_test.go. -// -// It is important, though, for the SIMD and non-SIMD fixed point -// implementations to give the exact same output, and likewise for the floating -// point implementations. - -var fxMaskShort = []uint32{ - 0x2000, - 0x6000, - 0x2000, - 0x4000, - 0x6000, - 0x6000, - 0xa000, - 0xffff, - 0xe000, - 0x0000, - 0x4000, - 0xffff, -} - -var flMaskShort = []uint32{ - 0x1fff, - 0x5fff, - 0x1fff, - 0x3fff, - 0x5fff, - 0x5fff, - 0x9fff, - 0xffff, - 0xdfff, - 0x0000, - 0x3fff, - 0xffff, -} - -func TestMakeFxInXxx(t *testing.T) { - dump := func(us []uint32) string { - var b bytes.Buffer - for i, u := range us { - if i%8 == 0 { - b.WriteByte('\n') - } - fmt.Fprintf(&b, "%#08x, ", u) - } - return b.String() - } - - if !uint32sEqual(fxIn16, hardCodedFxIn16) { - t.Errorf("height 16: got:%v\nwant:%v", dump(fxIn16), dump(hardCodedFxIn16)) - } -} - -func TestMakeFlInXxx(t *testing.T) { - dump := func(fs []float32) string { - var b bytes.Buffer - for i, f := range fs { - if i%8 == 0 { - b.WriteByte('\n') - } - fmt.Fprintf(&b, "%v, ", f) - } - return b.String() - } - - if !float32sEqual(flIn16, hardCodedFlIn16) { - t.Errorf("height 16: got:%v\nwant:%v", dump(flIn16), dump(hardCodedFlIn16)) - } -} - -func makeInXxx(height int, useFloatingPointMath bool) *Rasterizer { - width, data := scaledBenchmarkGlyphData(height) - z := NewRasterizer(width, height) - z.setUseFloatingPointMath(useFloatingPointMath) - for _, d := range data { - switch d.n { - case 0: - z.MoveTo(d.px, d.py) - case 1: - z.LineTo(d.px, d.py) - case 2: - z.QuadTo(d.px, d.py, d.qx, d.qy) - } - } - return z -} - -func makeFxInXxx(height int) []uint32 { - z := makeInXxx(height, false) - return z.bufU32 -} - -func makeFlInXxx(height int) []float32 { - z := makeInXxx(height, true) - return z.bufF32 -} - -// fxInXxx and flInXxx are the z.bufU32 and z.bufF32 inputs to the accumulate -// functions when rasterizing benchmarkGlyphData at a height of Xxx pixels. -// -// fxMaskXxx and flMaskXxx are the corresponding golden outputs of those -// accumulateMask functions. -// -// The hardCodedEtc versions are a sanity check for unexpected changes in the -// rasterization implementations up to but not including accumulation. - -var ( - fxIn16 = makeFxInXxx(16) - fxIn64 = makeFxInXxx(64) - flIn16 = makeFlInXxx(16) - flIn64 = makeFlInXxx(64) -) - -var hardCodedFxIn16 = []uint32{ - 0x00000000, 0x00000000, 0xffffe91d, 0xfffe7c4a, 0xfffeaa9f, 0xffff4e33, 0xffffc1c5, 0x00007782, - 0x00009619, 0x0001a857, 0x000129e9, 0x00000028, 0x00000000, 0x00000000, 0xffff6e70, 0xfffd3199, - 0xffff5ff8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00014b29, - 0x0002acf3, 0x000007e2, 0xffffca5a, 0xfffcab73, 0xffff8a34, 0x00001b55, 0x0001b334, 0x0001449e, - 0x0000434d, 0xffff62ec, 0xfffe1443, 0xffff325d, 0x00000000, 0x0002234a, 0x0001dcb6, 0xfffe2948, - 0xfffdd6b8, 0x00000000, 0x00028cc0, 0x00017340, 0x00000000, 0x00000000, 0x00000000, 0xffffd2d6, - 0xfffcadd0, 0xffff7f5c, 0x00007400, 0x00038c00, 0xfffe9260, 0xffff2da0, 0x0000023a, 0x0002259b, - 0x0000182a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffdc600, 0xfffe3a00, 0x00000059, - 0x0003a44d, 0x00005b59, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0xfffe33f3, 0xfffdcc0d, 0x00000000, 0x00033c02, 0x0000c3fe, 0x00000000, - 0x00000000, 0xffffa13d, 0xfffeeec8, 0xffff8c02, 0xffff8c48, 0xffffc7b5, 0x00000000, 0xffff5b68, - 0xffff3498, 0x00000000, 0x00033c00, 0x0000c400, 0xffff9bc4, 0xfffdf4a3, 0xfffe8df3, 0xffffe1a8, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00033c00, - 0x000092c7, 0xfffcf373, 0xffff3dc7, 0x00000fcc, 0x00011ae7, 0x000130c3, 0x0000680d, 0x00004a59, - 0x00000a20, 0xfffe9dc4, 0xfffe4a3c, 0x00000000, 0x00033c00, 0xfffe87ef, 0xfffe3c11, 0x0000105e, - 0x0002b9c4, 0x000135dc, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffe3600, 0xfffdca00, - 0x00000000, 0x00033c00, 0xfffd9000, 0xffff3400, 0x0000e400, 0x00031c00, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0xfffe3600, 0xfffdca00, 0x00000000, 0x00033c00, 0xfffcf9a5, - 0xffffca5b, 0x000120e6, 0x0002df1a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0xfffdb195, 0xfffe4e6b, 0x00000000, 0x00033c00, 0xfffd9e00, 0xffff2600, 0x00002f0e, 0x00033ea3, - 0x0000924d, 0x00000000, 0x00000000, 0x00000000, 0xfffe83b3, 0xfffd881d, 0xfffff431, 0x00000000, - 0x00031f60, 0xffff297a, 0xfffdb726, 0x00000000, 0x000053a7, 0x0001b506, 0x0000a24b, 0xffffa32d, - 0xfffead9b, 0xffff0479, 0xffffffc9, 0x00000000, 0x00000000, 0x0002d800, 0x0001249d, 0xfffd67bb, - 0xfffe9baa, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ac03, 0x0001448b, - 0xfffe0f70, 0x00000000, 0x000229ea, 0x0001d616, 0xffffff8c, 0xfffebf76, 0xfffe54d9, 0xffff5d9e, - 0xffffd3eb, 0x0000c65e, 0x0000fc15, 0x0001d491, 0xffffb566, 0xfffd9433, 0x00000000, 0x0000e4ec, -} - -var hardCodedFlIn16 = []float32{ - 0, 0, -0.022306755, -0.3782405, -0.33334962, -0.1741521, -0.0607556, 0.11660573, - 0.14664596, 0.41462868, 0.2907673, 0.0001568835, 0, 0, -0.14239307, -0.7012868, - -0.15632017, 0, 0, 0, 0, 0, 0, 0.3230303, - 0.6690931, 0.007876594, -0.05189419, -0.832786, -0.11531975, 0.026225802, 0.42518616, 0.3154636, - 0.06598757, -0.15304244, -0.47969276, -0.20012794, 0, 0.5327272, 0.46727282, -0.45950258, - -0.5404974, 0, 0.63484025, 0.36515975, 0, 0, 0, -0.04351709, - -0.8293345, -0.12714837, 0.11087036, 0.88912964, -0.35792422, -0.2053554, 0.0022513224, 0.5374398, - 0.023588525, 0, 0, 0, 0, -0.55346966, -0.44653034, 0.0002531938, - 0.9088273, 0.090919495, 0, 0, 0, 0, 0, 0, - 0, 0, -0.44745448, -0.5525455, 0, 0.80748945, 0.19251058, 0, - 0, -0.092476256, -0.2661464, -0.11322958, -0.11298219, -0.055094406, 0, -0.16045958, - -0.1996116, 0, 0.80748653, 0.19251347, -0.09804727, -0.51129663, -0.3610403, -0.029615778, - 0, 0, 0, 0, 0, 0, 0, 0.80748653, - 0.14411622, -0.76251525, -0.1890875, 0.01527351, 0.27528667, 0.29730347, 0.101477206, 0.07259522, - 0.009900213, -0.34395567, -0.42788061, 0, 0.80748653, -0.3648737, -0.44261283, 0.015778137, - 0.6826565, 0.30156538, 0, 0, 0, 0, -0.44563293, -0.55436707, - 0, 0.80748653, -0.60703933, -0.20044717, 0.22371745, 0.77628255, 0, 0, - 0, 0, 0, -0.44563293, -0.55436707, 0, 0.80748653, -0.7550391, - -0.05244744, 0.2797074, 0.72029257, 0, 0, 0, 0, 0, - -0.57440215, -0.42559785, 0, 0.80748653, -0.59273535, -0.21475118, 0.04544862, 0.81148535, - 0.14306602, 0, 0, 0, -0.369642, -0.61841226, -0.011945802, 0, - 0.7791623, -0.20691396, -0.57224834, 0, 0.08218567, 0.42637306, 0.1586175, -0.089709565, - -0.32935485, -0.24788953, -0.00022224105, 0, 0, 0.7085409, 0.28821066, -0.64765793, - -0.34909368, 0, 0, 0, 0, 0, 0.16679136, 0.31914657, - -0.48593786, 0, 0.537915, 0.462085, -0.00041967133, -0.3120329, -0.41914812, -0.15886839, - -0.042683028, 0.19370951, 0.24624406, 0.45803425, -0.07049577, -0.6091341, 0, 0.22253075, -} - -var fxMask16 = []uint32{ - 0x0000, 0x0000, 0x05b8, 0x66a6, 0xbbfe, 0xe871, 0xf800, 0xda20, 0xb499, 0x4a84, 0x0009, 0x0000, 0x0000, - 0x0000, 0x2463, 0xd7fd, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xad35, 0x01f8, 0x0000, - 0x0d69, 0xe28c, 0xffff, 0xf92a, 0x8c5d, 0x3b36, 0x2a62, 0x51a7, 0xcc97, 0xffff, 0xffff, 0x772d, 0x0000, - 0x75ad, 0xffff, 0xffff, 0x5ccf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0b4a, 0xdfd6, 0xffff, 0xe2ff, 0x0000, - 0x5b67, 0x8fff, 0x8f70, 0x060a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x8e7f, 0xffff, 0xffe9, 0x16d6, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7303, 0xffff, 0xffff, 0x30ff, - 0x0000, 0x0000, 0x0000, 0x17b0, 0x5bfe, 0x78fe, 0x95ec, 0xa3fe, 0xa3fe, 0xcd24, 0xfffe, 0xfffe, 0x30fe, - 0x0001, 0x190d, 0x9be5, 0xf868, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0x30fe, - 0x0c4c, 0xcf6f, 0xfffe, 0xfc0b, 0xb551, 0x6920, 0x4f1d, 0x3c87, 0x39ff, 0x928e, 0xffff, 0xffff, 0x30ff, - 0x8f03, 0xffff, 0xfbe7, 0x4d76, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x727f, 0xffff, 0xffff, 0x30ff, - 0xccff, 0xffff, 0xc6ff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x727f, 0xffff, 0xffff, 0x30ff, - 0xf296, 0xffff, 0xb7c6, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x939a, 0xffff, 0xffff, 0x30ff, - 0xc97f, 0xffff, 0xf43c, 0x2493, 0x0000, 0x0000, 0x0000, 0x0000, 0x5f13, 0xfd0c, 0xffff, 0xffff, 0x3827, - 0x6dc9, 0xffff, 0xffff, 0xeb16, 0x7dd4, 0x5541, 0x6c76, 0xc10f, 0xfff1, 0xffff, 0xffff, 0xffff, 0x49ff, - 0x00d8, 0xa6e9, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xd4fe, 0x83db, 0xffff, 0xffff, 0x7584, - 0x0000, 0x001c, 0x503e, 0xbb08, 0xe3a1, 0xeea6, 0xbd0e, 0x7e09, 0x08e5, 0x1b8b, 0xb67f, 0xb67f, 0x7d44, -} - -var flMask16 = []uint32{ - 0x0000, 0x0000, 0x05b5, 0x668a, 0xbbe0, 0xe875, 0xf803, 0xda29, 0xb49f, 0x4a7a, 0x000a, 0x0000, 0x0000, - 0x0000, 0x2473, 0xd7fb, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xad4d, 0x0204, 0x0000, - 0x0d48, 0xe27a, 0xffff, 0xf949, 0x8c70, 0x3bae, 0x2ac9, 0x51f7, 0xccc4, 0xffff, 0xffff, 0x779f, 0x0000, - 0x75a1, 0xffff, 0xffff, 0x5d7b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0b23, 0xdf73, 0xffff, 0xe39d, 0x0000, - 0x5ba0, 0x9033, 0x8f9f, 0x0609, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x8db0, 0xffff, 0xffef, 0x1746, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x728c, 0xffff, 0xffff, 0x3148, - 0x0000, 0x0000, 0x0000, 0x17ac, 0x5bce, 0x78cb, 0x95b7, 0xa3d2, 0xa3d2, 0xcce6, 0xffff, 0xffff, 0x3148, - 0x0000, 0x1919, 0x9bfd, 0xf86b, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x3148, - 0x0c63, 0xcf97, 0xffff, 0xfc17, 0xb59d, 0x6981, 0x4f87, 0x3cf1, 0x3a68, 0x9276, 0xffff, 0xffff, 0x3148, - 0x8eb0, 0xffff, 0xfbf5, 0x4d33, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7214, 0xffff, 0xffff, 0x3148, - 0xccaf, 0xffff, 0xc6ba, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7214, 0xffff, 0xffff, 0x3148, - 0xf292, 0xffff, 0xb865, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x930c, 0xffff, 0xffff, 0x3148, - 0xc906, 0xffff, 0xf45d, 0x249f, 0x0000, 0x0000, 0x0000, 0x0000, 0x5ea0, 0xfcf1, 0xffff, 0xffff, 0x3888, - 0x6d81, 0xffff, 0xffff, 0xeaf5, 0x7dcf, 0x5533, 0x6c2b, 0xc07b, 0xfff1, 0xffff, 0xffff, 0xffff, 0x4a9d, - 0x00d4, 0xa6a1, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xd54d, 0x8399, 0xffff, 0xffff, 0x764b, - 0x0000, 0x001b, 0x4ffc, 0xbb4a, 0xe3f5, 0xeee3, 0xbd4c, 0x7e42, 0x0900, 0x1b0c, 0xb6fc, 0xb6fc, 0x7e04, -} - -// TestFixedFloatingCloseness compares the closeness of the fixed point and -// floating point rasterizer. -func TestFixedFloatingCloseness(t *testing.T) { - if len(fxMask16) != len(flMask16) { - t.Fatalf("len(fxMask16) != len(flMask16)") - } - - total := uint32(0) - for i := range fxMask16 { - a := fxMask16[i] - b := flMask16[i] - if a > b { - total += a - b - } else { - total += b - a - } - } - n := len(fxMask16) - - // This log message is useful when changing the fixed point rasterizer - // implementation, such as by changing ϕ. Assuming that the floating point - // rasterizer is accurate, the average difference is a measure of how - // inaccurate the (faster) fixed point rasterizer is. - // - // Smaller is better. - percent := float64(total*100) / float64(n*65535) - t.Logf("Comparing closeness of the fixed point and floating point rasterizer.\n"+ - "Specifically, the elements of fxMask16 and flMask16.\n"+ - "Total diff = %d, n = %d, avg = %.5f out of 65535, or %.5f%%.\n", - total, n, float64(total)/float64(n), percent) - - const thresholdPercent = 1.0 - if percent > thresholdPercent { - t.Errorf("average difference: got %.5f%%, want <= %.5f%%", percent, thresholdPercent) - } -} diff --git a/vendor/golang.org/x/image/vector/gen.go b/vendor/golang.org/x/image/vector/gen.go deleted file mode 100644 index 28b298b5e..000000000 --- a/vendor/golang.org/x/image/vector/gen.go +++ /dev/null @@ -1,447 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "bytes" - "io/ioutil" - "log" - "strings" - "text/template" -) - -const ( - copyright = "" + - "// Copyright 2016 The Go Authors. All rights reserved.\n" + - "// Use of this source code is governed by a BSD-style\n" + - "// license that can be found in the LICENSE file.\n" - - doNotEdit = "// generated by go run gen.go; DO NOT EDIT\n" - - dashDashDash = "// --------" -) - -func main() { - tmpl, err := ioutil.ReadFile("gen_acc_amd64.s.tmpl") - if err != nil { - log.Fatalf("ReadFile: %v", err) - } - if !bytes.HasPrefix(tmpl, []byte(copyright)) { - log.Fatal("source template did not start with the copyright header") - } - tmpl = tmpl[len(copyright):] - - preamble := []byte(nil) - if i := bytes.Index(tmpl, []byte(dashDashDash)); i < 0 { - log.Fatalf("source template did not contain %q", dashDashDash) - } else { - preamble, tmpl = tmpl[:i], tmpl[i:] - } - - t, err := template.New("").Parse(string(tmpl)) - if err != nil { - log.Fatalf("Parse: %v", err) - } - - out := bytes.NewBuffer(nil) - out.WriteString(doNotEdit) - out.Write(preamble) - - for i, v := range instances { - if i != 0 { - out.WriteString("\n") - } - if strings.Contains(v.LoadArgs, "{{.ShortName}}") { - v.LoadArgs = strings.Replace(v.LoadArgs, "{{.ShortName}}", v.ShortName, -1) - } - if err := t.Execute(out, v); err != nil { - log.Fatalf("Execute(%q): %v", v.ShortName, err) - } - } - - if err := ioutil.WriteFile("acc_amd64.s", out.Bytes(), 0666); err != nil { - log.Fatalf("WriteFile: %v", err) - } -} - -var instances = []struct { - LongName string - ShortName string - FrameSize string - ArgsSize string - Args string - DstElemSize1 int - DstElemSize4 int - XMM3 string - XMM4 string - XMM5 string - XMM6 string - XMM8 string - XMM9 string - XMM10 string - LoadArgs string - Setup string - LoadXMMRegs string - Add string - ClampAndScale string - ConvertToInt32 string - Store4 string - Store1 string -}{{ - LongName: "fixedAccumulateOpOver", - ShortName: "fxAccOpOver", - FrameSize: fxFrameSize, - ArgsSize: twoArgArgsSize, - Args: "dst []uint8, src []uint32", - DstElemSize1: 1 * sizeOfUint8, - DstElemSize4: 4 * sizeOfUint8, - XMM3: fxXMM3, - XMM4: fxXMM4, - XMM5: fxXMM5, - XMM6: opOverXMM6, - XMM8: opOverXMM8, - XMM9: opOverXMM9, - XMM10: opOverXMM10, - LoadArgs: twoArgLoadArgs, - Setup: fxSetup, - LoadXMMRegs: fxLoadXMMRegs + "\n" + opOverLoadXMMRegs, - Add: fxAdd, - ClampAndScale: fxClampAndScale, - ConvertToInt32: fxConvertToInt32, - Store4: opOverStore4, - Store1: opOverStore1, -}, { - LongName: "fixedAccumulateOpSrc", - ShortName: "fxAccOpSrc", - FrameSize: fxFrameSize, - ArgsSize: twoArgArgsSize, - Args: "dst []uint8, src []uint32", - DstElemSize1: 1 * sizeOfUint8, - DstElemSize4: 4 * sizeOfUint8, - XMM3: fxXMM3, - XMM4: fxXMM4, - XMM5: fxXMM5, - XMM6: opSrcXMM6, - XMM8: opSrcXMM8, - XMM9: opSrcXMM9, - XMM10: opSrcXMM10, - LoadArgs: twoArgLoadArgs, - Setup: fxSetup, - LoadXMMRegs: fxLoadXMMRegs + "\n" + opSrcLoadXMMRegs, - Add: fxAdd, - ClampAndScale: fxClampAndScale, - ConvertToInt32: fxConvertToInt32, - Store4: opSrcStore4, - Store1: opSrcStore1, -}, { - LongName: "fixedAccumulateMask", - ShortName: "fxAccMask", - FrameSize: fxFrameSize, - ArgsSize: oneArgArgsSize, - Args: "buf []uint32", - DstElemSize1: 1 * sizeOfUint32, - DstElemSize4: 4 * sizeOfUint32, - XMM3: fxXMM3, - XMM4: fxXMM4, - XMM5: fxXMM5, - XMM6: maskXMM6, - XMM8: maskXMM8, - XMM9: maskXMM9, - XMM10: maskXMM10, - LoadArgs: oneArgLoadArgs, - Setup: fxSetup, - LoadXMMRegs: fxLoadXMMRegs + "\n" + maskLoadXMMRegs, - Add: fxAdd, - ClampAndScale: fxClampAndScale, - ConvertToInt32: fxConvertToInt32, - Store4: maskStore4, - Store1: maskStore1, -}, { - LongName: "floatingAccumulateOpOver", - ShortName: "flAccOpOver", - FrameSize: flFrameSize, - ArgsSize: twoArgArgsSize, - Args: "dst []uint8, src []float32", - DstElemSize1: 1 * sizeOfUint8, - DstElemSize4: 4 * sizeOfUint8, - XMM3: flXMM3, - XMM4: flXMM4, - XMM5: flXMM5, - XMM6: opOverXMM6, - XMM8: opOverXMM8, - XMM9: opOverXMM9, - XMM10: opOverXMM10, - LoadArgs: twoArgLoadArgs, - Setup: flSetup, - LoadXMMRegs: flLoadXMMRegs + "\n" + opOverLoadXMMRegs, - Add: flAdd, - ClampAndScale: flClampAndScale, - ConvertToInt32: flConvertToInt32, - Store4: opOverStore4, - Store1: opOverStore1, -}, { - LongName: "floatingAccumulateOpSrc", - ShortName: "flAccOpSrc", - FrameSize: flFrameSize, - ArgsSize: twoArgArgsSize, - Args: "dst []uint8, src []float32", - DstElemSize1: 1 * sizeOfUint8, - DstElemSize4: 4 * sizeOfUint8, - XMM3: flXMM3, - XMM4: flXMM4, - XMM5: flXMM5, - XMM6: opSrcXMM6, - XMM8: opSrcXMM8, - XMM9: opSrcXMM9, - XMM10: opSrcXMM10, - LoadArgs: twoArgLoadArgs, - Setup: flSetup, - LoadXMMRegs: flLoadXMMRegs + "\n" + opSrcLoadXMMRegs, - Add: flAdd, - ClampAndScale: flClampAndScale, - ConvertToInt32: flConvertToInt32, - Store4: opSrcStore4, - Store1: opSrcStore1, -}, { - LongName: "floatingAccumulateMask", - ShortName: "flAccMask", - FrameSize: flFrameSize, - ArgsSize: twoArgArgsSize, - Args: "dst []uint32, src []float32", - DstElemSize1: 1 * sizeOfUint32, - DstElemSize4: 4 * sizeOfUint32, - XMM3: flXMM3, - XMM4: flXMM4, - XMM5: flXMM5, - XMM6: maskXMM6, - XMM8: maskXMM8, - XMM9: maskXMM9, - XMM10: maskXMM10, - LoadArgs: twoArgLoadArgs, - Setup: flSetup, - LoadXMMRegs: flLoadXMMRegs + "\n" + maskLoadXMMRegs, - Add: flAdd, - ClampAndScale: flClampAndScale, - ConvertToInt32: flConvertToInt32, - Store4: maskStore4, - Store1: maskStore1, -}} - -const ( - fxFrameSize = `0` - flFrameSize = `8` - - oneArgArgsSize = `24` - twoArgArgsSize = `48` - - sizeOfUint8 = 1 - sizeOfUint32 = 4 - - fxXMM3 = `-` - flXMM3 = `flSignMask` - - fxXMM4 = `-` - flXMM4 = `flOne` - - fxXMM5 = `fxAlmost65536` - flXMM5 = `flAlmost65536` - - oneArgLoadArgs = ` - MOVQ buf_base+0(FP), DI - MOVQ buf_len+8(FP), BX - MOVQ buf_base+0(FP), SI - MOVQ buf_len+8(FP), R10 - ` - twoArgLoadArgs = ` - MOVQ dst_base+0(FP), DI - MOVQ dst_len+8(FP), BX - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R10 - // Sanity check that len(dst) >= len(src). - CMPQ BX, R10 - JLT {{.ShortName}}End - ` - - fxSetup = `` - flSetup = ` - // Prepare to set MXCSR bits 13 and 14, so that the CVTPS2PL below is - // "Round To Zero". - STMXCSR mxcsrOrig-8(SP) - MOVL mxcsrOrig-8(SP), AX - ORL $0x6000, AX - MOVL AX, mxcsrNew-4(SP) - ` - - fxLoadXMMRegs = ` - // fxAlmost65536 := XMM(0x0000ffff repeated four times) // Maximum of an uint16. - MOVOU fxAlmost65536<>(SB), X5 - ` - flLoadXMMRegs = ` - // flSignMask := XMM(0x7fffffff repeated four times) // All but the sign bit of a float32. - // flOne := XMM(0x3f800000 repeated four times) // 1 as a float32. - // flAlmost65536 := XMM(0x477fffff repeated four times) // 255.99998 * 256 as a float32. - MOVOU flSignMask<>(SB), X3 - MOVOU flOne<>(SB), X4 - MOVOU flAlmost65536<>(SB), X5 - ` - - fxAdd = `PADDD` - flAdd = `ADDPS` - - fxClampAndScale = ` - // y = abs(x) - // y >>= 2 // Shift by 2*ϕ - 16. - // y = min(y, fxAlmost65536) - // - // pabsd %xmm1,%xmm2 - // psrld $0x2,%xmm2 - // pminud %xmm5,%xmm2 - // - // Hopefully we'll get these opcode mnemonics into the assembler for Go - // 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but - // it's similar. - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1 - BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02 - BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5 - ` - flClampAndScale = ` - // y = x & flSignMask - // y = min(y, flOne) - // y = mul(y, flAlmost65536) - MOVOU X3, X2 - ANDPS X1, X2 - MINPS X4, X2 - MULPS X5, X2 - ` - - fxConvertToInt32 = ` - // z = convertToInt32(y) - // No-op. - ` - flConvertToInt32 = ` - // z = convertToInt32(y) - LDMXCSR mxcsrNew-4(SP) - CVTPS2PL X2, X2 - LDMXCSR mxcsrOrig-8(SP) - ` - - opOverStore4 = ` - // Blend over the dst's prior value. SIMD for i in 0..3: - // - // dstA := uint32(dst[i]) * 0x101 - // maskA := z@i - // outA := dstA*(0xffff-maskA)/0xffff + maskA - // dst[i] = uint8(outA >> 8) - // - // First, set X0 to dstA*(0xfff-maskA). - MOVL (DI), X0 - PSHUFB X8, X0 - MOVOU X9, X11 - PSUBL X2, X11 - PMULLD X11, X0 - // We implement uint32 division by 0xffff as multiplication by a magic - // constant (0x800080001) and then a shift by a magic constant (47). - // See TestDivideByFFFF for a justification. - // - // That multiplication widens from uint32 to uint64, so we have to - // duplicate and shift our four uint32s from one XMM register (X0) to - // two XMM registers (X0 and X11). - // - // Move the second and fourth uint32s in X0 to be the first and third - // uint32s in X11. - MOVOU X0, X11 - PSRLQ $32, X11 - // Multiply by magic, shift by magic. - // - // pmuludq %xmm10,%xmm0 - // pmuludq %xmm10,%xmm11 - BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2 - BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda - PSRLQ $47, X0 - PSRLQ $47, X11 - // Merge the two registers back to one, X11, and add maskA. - PSLLQ $32, X11 - XORPS X0, X11 - PADDD X11, X2 - // As per opSrcStore4, shuffle and copy the 4 second-lowest bytes. - PSHUFB X6, X2 - MOVL X2, (DI) - ` - opSrcStore4 = ` - // z = shuffleTheSecondLowestBytesOfEach4ByteElement(z) - // copy(dst[:4], low4BytesOf(z)) - PSHUFB X6, X2 - MOVL X2, (DI) - ` - maskStore4 = ` - // copy(dst[:4], z) - MOVOU X2, (DI) - ` - - opOverStore1 = ` - // Blend over the dst's prior value. - // - // dstA := uint32(dst[0]) * 0x101 - // maskA := z - // outA := dstA*(0xffff-maskA)/0xffff + maskA - // dst[0] = uint8(outA >> 8) - MOVBLZX (DI), R12 - IMULL $0x101, R12 - MOVL X2, R13 - MOVL $0xffff, AX - SUBL R13, AX - MULL R12 // MULL's implicit arg is AX, and the result is stored in DX:AX. - MOVL $0x80008001, BX // Divide by 0xffff is to first multiply by a magic constant... - MULL BX // MULL's implicit arg is AX, and the result is stored in DX:AX. - SHRL $15, DX // ...and then shift by another magic constant (47 - 32 = 15). - ADDL DX, R13 - SHRL $8, R13 - MOVB R13, (DI) - ` - opSrcStore1 = ` - // dst[0] = uint8(z>>8) - MOVL X2, BX - SHRL $8, BX - MOVB BX, (DI) - ` - maskStore1 = ` - // dst[0] = uint32(z) - MOVL X2, (DI) - ` - - opOverXMM6 = `gather` - opSrcXMM6 = `gather` - maskXMM6 = `-` - - opOverXMM8 = `scatterAndMulBy0x101` - opSrcXMM8 = `-` - maskXMM8 = `-` - - opOverXMM9 = `fxAlmost65536` - opSrcXMM9 = `-` - maskXMM9 = `-` - - opOverXMM10 = `inverseFFFF` - opSrcXMM10 = `-` - maskXMM10 = `-` - - opOverLoadXMMRegs = ` - // gather := XMM(see above) // PSHUFB shuffle mask. - // scatterAndMulBy0x101 := XMM(see above) // PSHUFB shuffle mask. - // fxAlmost65536 := XMM(0x0000ffff repeated four times) // 0xffff. - // inverseFFFF := XMM(0x80008001 repeated four times) // Magic constant for dividing by 0xffff. - MOVOU gather<>(SB), X6 - MOVOU scatterAndMulBy0x101<>(SB), X8 - MOVOU fxAlmost65536<>(SB), X9 - MOVOU inverseFFFF<>(SB), X10 - ` - opSrcLoadXMMRegs = ` - // gather := XMM(see above) // PSHUFB shuffle mask. - MOVOU gather<>(SB), X6 - ` - maskLoadXMMRegs = `` -) diff --git a/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl b/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl deleted file mode 100644 index 66b21a13d..000000000 --- a/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl +++ /dev/null @@ -1,171 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build go1.6 -// +build !noasm - -#include "textflag.h" - -// fl is short for floating point math. fx is short for fixed point math. - -DATA flAlmost65536<>+0x00(SB)/8, $0x477fffff477fffff -DATA flAlmost65536<>+0x08(SB)/8, $0x477fffff477fffff -DATA flOne<>+0x00(SB)/8, $0x3f8000003f800000 -DATA flOne<>+0x08(SB)/8, $0x3f8000003f800000 -DATA flSignMask<>+0x00(SB)/8, $0x7fffffff7fffffff -DATA flSignMask<>+0x08(SB)/8, $0x7fffffff7fffffff - -// scatterAndMulBy0x101 is a PSHUFB mask that brings the low four bytes of an -// XMM register to the low byte of that register's four uint32 values. It -// duplicates those bytes, effectively multiplying each uint32 by 0x101. -// -// It transforms a little-endian 16-byte XMM value from -// ijkl???????????? -// to -// ii00jj00kk00ll00 -DATA scatterAndMulBy0x101<>+0x00(SB)/8, $0x8080010180800000 -DATA scatterAndMulBy0x101<>+0x08(SB)/8, $0x8080030380800202 - -// gather is a PSHUFB mask that brings the second-lowest byte of the XMM -// register's four uint32 values to the low four bytes of that register. -// -// It transforms a little-endian 16-byte XMM value from -// ?i???j???k???l?? -// to -// ijkl000000000000 -DATA gather<>+0x00(SB)/8, $0x808080800d090501 -DATA gather<>+0x08(SB)/8, $0x8080808080808080 - -DATA fxAlmost65536<>+0x00(SB)/8, $0x0000ffff0000ffff -DATA fxAlmost65536<>+0x08(SB)/8, $0x0000ffff0000ffff -DATA inverseFFFF<>+0x00(SB)/8, $0x8000800180008001 -DATA inverseFFFF<>+0x08(SB)/8, $0x8000800180008001 - -GLOBL flAlmost65536<>(SB), (NOPTR+RODATA), $16 -GLOBL flOne<>(SB), (NOPTR+RODATA), $16 -GLOBL flSignMask<>(SB), (NOPTR+RODATA), $16 -GLOBL scatterAndMulBy0x101<>(SB), (NOPTR+RODATA), $16 -GLOBL gather<>(SB), (NOPTR+RODATA), $16 -GLOBL fxAlmost65536<>(SB), (NOPTR+RODATA), $16 -GLOBL inverseFFFF<>(SB), (NOPTR+RODATA), $16 - -// func haveSSE4_1() bool -TEXT ·haveSSE4_1(SB), NOSPLIT, $0 - MOVQ $1, AX - CPUID - SHRQ $19, CX - ANDQ $1, CX - MOVB CX, ret+0(FP) - RET - -// ---------------------------------------------------------------------------- - -// func {{.LongName}}SIMD({{.Args}}) -// -// XMM registers. Variable names are per -// https://github.com/google/font-rs/blob/master/src/accumulate.c -// -// xmm0 scratch -// xmm1 x -// xmm2 y, z -// xmm3 {{.XMM3}} -// xmm4 {{.XMM4}} -// xmm5 {{.XMM5}} -// xmm6 {{.XMM6}} -// xmm7 offset -// xmm8 {{.XMM8}} -// xmm9 {{.XMM9}} -// xmm10 {{.XMM10}} -TEXT ·{{.LongName}}SIMD(SB), NOSPLIT, ${{.FrameSize}}-{{.ArgsSize}} - {{.LoadArgs}} - - // R10 = len(src) &^ 3 - // R11 = len(src) - MOVQ R10, R11 - ANDQ $-4, R10 - - {{.Setup}} - - {{.LoadXMMRegs}} - - // offset := XMM(0x00000000 repeated four times) // Cumulative sum. - XORPS X7, X7 - - // i := 0 - MOVQ $0, R9 - -{{.ShortName}}Loop4: - // for i < (len(src) &^ 3) - CMPQ R9, R10 - JAE {{.ShortName}}Loop1 - - // x = XMM(s0, s1, s2, s3) - // - // Where s0 is src[i+0], s1 is src[i+1], etc. - MOVOU (SI), X1 - - // scratch = XMM(0, s0, s1, s2) - // x += scratch // yields x == XMM(s0, s0+s1, s1+s2, s2+s3) - MOVOU X1, X0 - PSLLO $4, X0 - {{.Add}} X0, X1 - - // scratch = XMM(0, 0, 0, 0) - // scratch = XMM(scratch@0, scratch@0, x@0, x@1) // yields scratch == XMM(0, 0, s0, s0+s1) - // x += scratch // yields x == XMM(s0, s0+s1, s0+s1+s2, s0+s1+s2+s3) - XORPS X0, X0 - SHUFPS $0x40, X1, X0 - {{.Add}} X0, X1 - - // x += offset - {{.Add}} X7, X1 - - {{.ClampAndScale}} - - {{.ConvertToInt32}} - - {{.Store4}} - - // offset = XMM(x@3, x@3, x@3, x@3) - MOVOU X1, X7 - SHUFPS $0xff, X1, X7 - - // i += 4 - // dst = dst[4:] - // src = src[4:] - ADDQ $4, R9 - ADDQ ${{.DstElemSize4}}, DI - ADDQ $16, SI - JMP {{.ShortName}}Loop4 - -{{.ShortName}}Loop1: - // for i < len(src) - CMPQ R9, R11 - JAE {{.ShortName}}End - - // x = src[i] + offset - MOVL (SI), X1 - {{.Add}} X7, X1 - - {{.ClampAndScale}} - - {{.ConvertToInt32}} - - {{.Store1}} - - // offset = x - MOVOU X1, X7 - - // i += 1 - // dst = dst[1:] - // src = src[1:] - ADDQ $1, R9 - ADDQ ${{.DstElemSize1}}, DI - ADDQ $4, SI - JMP {{.ShortName}}Loop1 - -{{.ShortName}}End: - RET diff --git a/vendor/golang.org/x/image/vector/raster_fixed.go b/vendor/golang.org/x/image/vector/raster_fixed.go deleted file mode 100644 index 5b0fe7a7e..000000000 --- a/vendor/golang.org/x/image/vector/raster_fixed.go +++ /dev/null @@ -1,327 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package vector - -// This file contains a fixed point math implementation of the vector -// graphics rasterizer. - -const ( - // ϕ is the number of binary digits after the fixed point. - // - // For example, if ϕ == 10 (and int1ϕ is based on the int32 type) then we - // are using 22.10 fixed point math. - // - // When changing this number, also change the assembly code (search for ϕ - // in the .s files). - ϕ = 9 - - fxOne int1ϕ = 1 << ϕ - fxOneAndAHalf int1ϕ = 1<<ϕ + 1<<(ϕ-1) - fxOneMinusIota int1ϕ = 1<<ϕ - 1 // Used for rounding up. -) - -// int1ϕ is a signed fixed-point number with 1*ϕ binary digits after the fixed -// point. -type int1ϕ int32 - -// int2ϕ is a signed fixed-point number with 2*ϕ binary digits after the fixed -// point. -// -// The Rasterizer's bufU32 field, nominally of type []uint32 (since that slice -// is also used by other code), can be thought of as a []int2ϕ during the -// fixedLineTo method. Lines of code that are actually like: -// buf[i] += uint32(etc) // buf has type []uint32. -// can be thought of as -// buf[i] += int2ϕ(etc) // buf has type []int2ϕ. -type int2ϕ int32 - -func fixedMax(x, y int1ϕ) int1ϕ { - if x > y { - return x - } - return y -} - -func fixedMin(x, y int1ϕ) int1ϕ { - if x < y { - return x - } - return y -} - -func fixedFloor(x int1ϕ) int32 { return int32(x >> ϕ) } -func fixedCeil(x int1ϕ) int32 { return int32((x + fxOneMinusIota) >> ϕ) } - -func (z *Rasterizer) fixedLineTo(bx, by float32) { - ax, ay := z.penX, z.penY - z.penX, z.penY = bx, by - dir := int1ϕ(1) - if ay > by { - dir, ax, ay, bx, by = -1, bx, by, ax, ay - } - // Horizontal line segments yield no change in coverage. Almost horizontal - // segments would yield some change, in ideal math, but the computation - // further below, involving 1 / (by - ay), is unstable in fixed point math, - // so we treat the segment as if it was perfectly horizontal. - if by-ay <= 0.000001 { - return - } - dxdy := (bx - ax) / (by - ay) - - ayϕ := int1ϕ(ay * float32(fxOne)) - byϕ := int1ϕ(by * float32(fxOne)) - - x := int1ϕ(ax * float32(fxOne)) - y := fixedFloor(ayϕ) - yMax := fixedCeil(byϕ) - if yMax > int32(z.size.Y) { - yMax = int32(z.size.Y) - } - width := int32(z.size.X) - - for ; y < yMax; y++ { - dy := fixedMin(int1ϕ(y+1)<<ϕ, byϕ) - fixedMax(int1ϕ(y)<<ϕ, ayϕ) - xNext := x + int1ϕ(float32(dy)*dxdy) - if y < 0 { - x = xNext - continue - } - buf := z.bufU32[y*width:] - d := dy * dir // d ranges up to ±1<<(1*ϕ). - x0, x1 := x, xNext - if x > xNext { - x0, x1 = x1, x0 - } - x0i := fixedFloor(x0) - x0Floor := int1ϕ(x0i) << ϕ - x1i := fixedCeil(x1) - x1Ceil := int1ϕ(x1i) << ϕ - - if x1i <= x0i+1 { - xmf := (x+xNext)>>1 - x0Floor - if i := clamp(x0i+0, width); i < uint(len(buf)) { - buf[i] += uint32(d * (fxOne - xmf)) - } - if i := clamp(x0i+1, width); i < uint(len(buf)) { - buf[i] += uint32(d * xmf) - } - } else { - oneOverS := x1 - x0 - twoOverS := 2 * oneOverS - x0f := x0 - x0Floor - oneMinusX0f := fxOne - x0f - oneMinusX0fSquared := oneMinusX0f * oneMinusX0f - x1f := x1 - x1Ceil + fxOne - x1fSquared := x1f * x1f - - // These next two variables are unused, as rounding errors are - // minimized when we delay the division by oneOverS for as long as - // possible. These lines of code (and the "In ideal math" comments - // below) are commented out instead of deleted in order to aid the - // comparison with the floating point version of the rasterizer. - // - // a0 := ((oneMinusX0f * oneMinusX0f) >> 1) / oneOverS - // am := ((x1f * x1f) >> 1) / oneOverS - - if i := clamp(x0i, width); i < uint(len(buf)) { - // In ideal math: buf[i] += uint32(d * a0) - D := oneMinusX0fSquared // D ranges up to ±1<<(2*ϕ). - D *= d // D ranges up to ±1<<(3*ϕ). - D /= twoOverS - buf[i] += uint32(D) - } - - if x1i == x0i+2 { - if i := clamp(x0i+1, width); i < uint(len(buf)) { - // In ideal math: buf[i] += uint32(d * (fxOne - a0 - am)) - // - // (x1i == x0i+2) and (twoOverS == 2 * (x1 - x0)) implies - // that twoOverS ranges up to +1<<(1*ϕ+2). - D := twoOverS<<ϕ - oneMinusX0fSquared - x1fSquared // D ranges up to ±1<<(2*ϕ+2). - D *= d // D ranges up to ±1<<(3*ϕ+2). - D /= twoOverS - buf[i] += uint32(D) - } - } else { - // This is commented out for the same reason as a0 and am. - // - // a1 := ((fxOneAndAHalf - x0f) << ϕ) / oneOverS - - if i := clamp(x0i+1, width); i < uint(len(buf)) { - // In ideal math: - // buf[i] += uint32(d * (a1 - a0)) - // or equivalently (but better in non-ideal, integer math, - // with respect to rounding errors), - // buf[i] += uint32(A * d / twoOverS) - // where - // A = (a1 - a0) * twoOverS - // = a1*twoOverS - a0*twoOverS - // Noting that twoOverS/oneOverS equals 2, substituting for - // a0 and then a1, given above, yields: - // A = a1*twoOverS - oneMinusX0fSquared - // = (fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared - // = fxOneAndAHalf<<(ϕ+1) - x0f<<(ϕ+1) - oneMinusX0fSquared - // - // This is a positive number minus two non-negative - // numbers. For an upper bound on A, the positive number is - // P = fxOneAndAHalf<<(ϕ+1) - // < (2*fxOne)<<(ϕ+1) - // = fxOne<<(ϕ+2) - // = 1<<(2*ϕ+2) - // - // For a lower bound on A, the two non-negative numbers are - // N = x0f<<(ϕ+1) + oneMinusX0fSquared - // ≤ x0f<<(ϕ+1) + fxOne*fxOne - // = x0f<<(ϕ+1) + 1<<(2*ϕ) - // < x0f<<(ϕ+1) + 1<<(2*ϕ+1) - // ≤ fxOne<<(ϕ+1) + 1<<(2*ϕ+1) - // = 1<<(2*ϕ+1) + 1<<(2*ϕ+1) - // = 1<<(2*ϕ+2) - // - // Thus, A ranges up to ±1<<(2*ϕ+2). It is possible to - // derive a tighter bound, but this bound is sufficient to - // reason about overflow. - D := (fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared // D ranges up to ±1<<(2*ϕ+2). - D *= d // D ranges up to ±1<<(3*ϕ+2). - D /= twoOverS - buf[i] += uint32(D) - } - dTimesS := uint32((d << (2 * ϕ)) / oneOverS) - for xi := x0i + 2; xi < x1i-1; xi++ { - if i := clamp(xi, width); i < uint(len(buf)) { - buf[i] += dTimesS - } - } - - // This is commented out for the same reason as a0 and am. - // - // a2 := a1 + (int1ϕ(x1i-x0i-3)<<(2*ϕ))/oneOverS - - if i := clamp(x1i-1, width); i < uint(len(buf)) { - // In ideal math: - // buf[i] += uint32(d * (fxOne - a2 - am)) - // or equivalently (but better in non-ideal, integer math, - // with respect to rounding errors), - // buf[i] += uint32(A * d / twoOverS) - // where - // A = (fxOne - a2 - am) * twoOverS - // = twoOverS<<ϕ - a2*twoOverS - am*twoOverS - // Noting that twoOverS/oneOverS equals 2, substituting for - // am and then a2, given above, yields: - // A = twoOverS<<ϕ - a2*twoOverS - x1f*x1f - // = twoOverS<<ϕ - a1*twoOverS - (int1ϕ(x1i-x0i-3)<<(2*ϕ))*2 - x1f*x1f - // = twoOverS<<ϕ - a1*twoOverS - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f - // Substituting for a1, given above, yields: - // A = twoOverS<<ϕ - ((fxOneAndAHalf-x0f)<<ϕ)*2 - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f - // = twoOverS<<ϕ - (fxOneAndAHalf-x0f)<<(ϕ+1) - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f - // = B<<ϕ - x1f*x1f - // where - // B = twoOverS - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) - // = (x1-x0)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) - // - // Re-arranging the defintions given above: - // x0Floor := int1ϕ(x0i) << ϕ - // x0f := x0 - x0Floor - // x1Ceil := int1ϕ(x1i) << ϕ - // x1f := x1 - x1Ceil + fxOne - // combined with fxOne = 1<<ϕ yields: - // x0 = x0f + int1ϕ(x0i)<<ϕ - // x1 = x1f + int1ϕ(x1i-1)<<ϕ - // so that expanding (x1-x0) yields: - // B = (x1f-x0f + int1ϕ(x1i-x0i-1)<<ϕ)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) - // = (x1f-x0f)<<1 + int1ϕ(x1i-x0i-1)<<(ϕ+1) - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) - // A large part of the second and fourth terms cancel: - // B = (x1f-x0f)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(-2)<<(ϕ+1) - // = (x1f-x0f)<<1 - (fxOneAndAHalf-x0f)<<1 + 1<<(ϕ+2) - // = (x1f - fxOneAndAHalf)<<1 + 1<<(ϕ+2) - // The first term, (x1f - fxOneAndAHalf)<<1, is a negative - // number, bounded below by -fxOneAndAHalf<<1, which is - // greater than -fxOne<<2, or -1<<(ϕ+2). Thus, B ranges up - // to ±1<<(ϕ+2). One final simplification: - // B = x1f<<1 + (1<<(ϕ+2) - fxOneAndAHalf<<1) - const C = 1<<(ϕ+2) - fxOneAndAHalf<<1 - D := x1f<<1 + C // D ranges up to ±1<<(1*ϕ+2). - D <<= ϕ // D ranges up to ±1<<(2*ϕ+2). - D -= x1fSquared // D ranges up to ±1<<(2*ϕ+3). - D *= d // D ranges up to ±1<<(3*ϕ+3). - D /= twoOverS - buf[i] += uint32(D) - } - } - - if i := clamp(x1i, width); i < uint(len(buf)) { - // In ideal math: buf[i] += uint32(d * am) - D := x1fSquared // D ranges up to ±1<<(2*ϕ). - D *= d // D ranges up to ±1<<(3*ϕ). - D /= twoOverS - buf[i] += uint32(D) - } - } - - x = xNext - } -} - -func fixedAccumulateOpOver(dst []uint8, src []uint32) { - // Sanity check that len(dst) >= len(src). - if len(dst) < len(src) { - return - } - - acc := int2ϕ(0) - for i, v := range src { - acc += int2ϕ(v) - a := acc - if a < 0 { - a = -a - } - a >>= 2*ϕ - 16 - if a > 0xffff { - a = 0xffff - } - // This algorithm comes from the standard library's image/draw package. - dstA := uint32(dst[i]) * 0x101 - maskA := uint32(a) - outA := dstA*(0xffff-maskA)/0xffff + maskA - dst[i] = uint8(outA >> 8) - } -} - -func fixedAccumulateOpSrc(dst []uint8, src []uint32) { - // Sanity check that len(dst) >= len(src). - if len(dst) < len(src) { - return - } - - acc := int2ϕ(0) - for i, v := range src { - acc += int2ϕ(v) - a := acc - if a < 0 { - a = -a - } - a >>= 2*ϕ - 8 - if a > 0xff { - a = 0xff - } - dst[i] = uint8(a) - } -} - -func fixedAccumulateMask(buf []uint32) { - acc := int2ϕ(0) - for i, v := range buf { - acc += int2ϕ(v) - a := acc - if a < 0 { - a = -a - } - a >>= 2*ϕ - 16 - if a > 0xffff { - a = 0xffff - } - buf[i] = uint32(a) - } -} diff --git a/vendor/golang.org/x/image/vector/raster_floating.go b/vendor/golang.org/x/image/vector/raster_floating.go deleted file mode 100644 index fd11db1b4..000000000 --- a/vendor/golang.org/x/image/vector/raster_floating.go +++ /dev/null @@ -1,220 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package vector - -// This file contains a floating point math implementation of the vector -// graphics rasterizer. - -import ( - "math" -) - -func floatingMax(x, y float32) float32 { - if x > y { - return x - } - return y -} - -func floatingMin(x, y float32) float32 { - if x < y { - return x - } - return y -} - -func floatingFloor(x float32) int32 { return int32(math.Floor(float64(x))) } -func floatingCeil(x float32) int32 { return int32(math.Ceil(float64(x))) } - -func (z *Rasterizer) floatingLineTo(bx, by float32) { - ax, ay := z.penX, z.penY - z.penX, z.penY = bx, by - dir := float32(1) - if ay > by { - dir, ax, ay, bx, by = -1, bx, by, ax, ay - } - // Horizontal line segments yield no change in coverage. Almost horizontal - // segments would yield some change, in ideal math, but the computation - // further below, involving 1 / (by - ay), is unstable in floating point - // math, so we treat the segment as if it was perfectly horizontal. - if by-ay <= 0.000001 { - return - } - dxdy := (bx - ax) / (by - ay) - - x := ax - y := floatingFloor(ay) - yMax := floatingCeil(by) - if yMax > int32(z.size.Y) { - yMax = int32(z.size.Y) - } - width := int32(z.size.X) - - for ; y < yMax; y++ { - dy := floatingMin(float32(y+1), by) - floatingMax(float32(y), ay) - - // The "float32" in expressions like "float32(foo*bar)" here and below - // look redundant, since foo and bar already have type float32, but are - // explicit in order to disable the compiler's Fused Multiply Add (FMA) - // instruction selection, which can improve performance but can result - // in different rounding errors in floating point computations. - // - // This package aims to have bit-exact identical results across all - // GOARCHes, and across pure Go code and assembly, so it disables FMA. - // - // See the discussion at - // https://groups.google.com/d/topic/golang-dev/Sti0bl2xUXQ/discussion - xNext := x + float32(dy*dxdy) - if y < 0 { - x = xNext - continue - } - buf := z.bufF32[y*width:] - d := float32(dy * dir) - x0, x1 := x, xNext - if x > xNext { - x0, x1 = x1, x0 - } - x0i := floatingFloor(x0) - x0Floor := float32(x0i) - x1i := floatingCeil(x1) - x1Ceil := float32(x1i) - - if x1i <= x0i+1 { - xmf := float32(0.5*(x+xNext)) - x0Floor - if i := clamp(x0i+0, width); i < uint(len(buf)) { - buf[i] += d - float32(d*xmf) - } - if i := clamp(x0i+1, width); i < uint(len(buf)) { - buf[i] += float32(d * xmf) - } - } else { - s := 1 / (x1 - x0) - x0f := x0 - x0Floor - oneMinusX0f := 1 - x0f - a0 := float32(0.5 * s * oneMinusX0f * oneMinusX0f) - x1f := x1 - x1Ceil + 1 - am := float32(0.5 * s * x1f * x1f) - - if i := clamp(x0i, width); i < uint(len(buf)) { - buf[i] += float32(d * a0) - } - - if x1i == x0i+2 { - if i := clamp(x0i+1, width); i < uint(len(buf)) { - buf[i] += float32(d * (1 - a0 - am)) - } - } else { - a1 := float32(s * (1.5 - x0f)) - if i := clamp(x0i+1, width); i < uint(len(buf)) { - buf[i] += float32(d * (a1 - a0)) - } - dTimesS := float32(d * s) - for xi := x0i + 2; xi < x1i-1; xi++ { - if i := clamp(xi, width); i < uint(len(buf)) { - buf[i] += dTimesS - } - } - a2 := a1 + float32(s*float32(x1i-x0i-3)) - if i := clamp(x1i-1, width); i < uint(len(buf)) { - buf[i] += float32(d * (1 - a2 - am)) - } - } - - if i := clamp(x1i, width); i < uint(len(buf)) { - buf[i] += float32(d * am) - } - } - - x = xNext - } -} - -const ( - // almost256 scales a floating point value in the range [0, 1] to a uint8 - // value in the range [0x00, 0xff]. - // - // 255 is too small. Floating point math accumulates rounding errors, so a - // fully covered src value that would in ideal math be float32(1) might be - // float32(1-ε), and uint8(255 * (1-ε)) would be 0xfe instead of 0xff. The - // uint8 conversion rounds to zero, not to nearest. - // - // 256 is too big. If we multiplied by 256, below, then a fully covered src - // value of float32(1) would translate to uint8(256 * 1), which can be 0x00 - // instead of the maximal value 0xff. - // - // math.Float32bits(almost256) is 0x437fffff. - almost256 = 255.99998 - - // almost65536 scales a floating point value in the range [0, 1] to a - // uint16 value in the range [0x0000, 0xffff]. - // - // math.Float32bits(almost65536) is 0x477fffff. - almost65536 = almost256 * 256 -) - -func floatingAccumulateOpOver(dst []uint8, src []float32) { - // Sanity check that len(dst) >= len(src). - if len(dst) < len(src) { - return - } - - acc := float32(0) - for i, v := range src { - acc += v - a := acc - if a < 0 { - a = -a - } - if a > 1 { - a = 1 - } - // This algorithm comes from the standard library's image/draw package. - dstA := uint32(dst[i]) * 0x101 - maskA := uint32(almost65536 * a) - outA := dstA*(0xffff-maskA)/0xffff + maskA - dst[i] = uint8(outA >> 8) - } -} - -func floatingAccumulateOpSrc(dst []uint8, src []float32) { - // Sanity check that len(dst) >= len(src). - if len(dst) < len(src) { - return - } - - acc := float32(0) - for i, v := range src { - acc += v - a := acc - if a < 0 { - a = -a - } - if a > 1 { - a = 1 - } - dst[i] = uint8(almost256 * a) - } -} - -func floatingAccumulateMask(dst []uint32, src []float32) { - // Sanity check that len(dst) >= len(src). - if len(dst) < len(src) { - return - } - - acc := float32(0) - for i, v := range src { - acc += v - a := acc - if a < 0 { - a = -a - } - if a > 1 { - a = 1 - } - dst[i] = uint32(almost65536 * a) - } -} diff --git a/vendor/golang.org/x/image/vector/vector.go b/vendor/golang.org/x/image/vector/vector.go deleted file mode 100644 index 852a4f8b7..000000000 --- a/vendor/golang.org/x/image/vector/vector.go +++ /dev/null @@ -1,472 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:generate go run gen.go -//go:generate asmfmt -w acc_amd64.s - -// asmfmt is https://github.com/klauspost/asmfmt - -// Package vector provides a rasterizer for 2-D vector graphics. -package vector // import "golang.org/x/image/vector" - -// The rasterizer's design follows -// https://medium.com/@raphlinus/inside-the-fastest-font-renderer-in-the-world-75ae5270c445 -// -// Proof of concept code is in -// https://github.com/google/font-go -// -// See also: -// http://nothings.org/gamedev/rasterize/ -// http://projects.tuxee.net/cl-vectors/section-the-cl-aa-algorithm -// https://people.gnome.org/~mathieu/libart/internals.html#INTERNALS-SCANLINE - -import ( - "image" - "image/color" - "image/draw" - "math" -) - -// floatingPointMathThreshold is the width or height above which the rasterizer -// chooses to used floating point math instead of fixed point math. -// -// Both implementations of line segmentation rasterization (see raster_fixed.go -// and raster_floating.go) implement the same algorithm (in ideal, infinite -// precision math) but they perform differently in practice. The fixed point -// math version is roughtly 1.25x faster (on GOARCH=amd64) on the benchmarks, -// but at sufficiently large scales, the computations will overflow and hence -// show rendering artifacts. The floating point math version has more -// consistent quality over larger scales, but it is significantly slower. -// -// This constant determines when to use the faster implementation and when to -// use the better quality implementation. -// -// The rationale for this particular value is that TestRasterizePolygon in -// vector_test.go checks the rendering quality of polygon edges at various -// angles, inscribed in a circle of diameter 512. It may be that a higher value -// would still produce acceptable quality, but 512 seems to work. -const floatingPointMathThreshold = 512 - -func lerp(t, px, py, qx, qy float32) (x, y float32) { - return px + t*(qx-px), py + t*(qy-py) -} - -func clamp(i, width int32) uint { - if i < 0 { - return 0 - } - if i < width { - return uint(i) - } - return uint(width) -} - -// NewRasterizer returns a new Rasterizer whose rendered mask image is bounded -// by the given width and height. -func NewRasterizer(w, h int) *Rasterizer { - z := &Rasterizer{} - z.Reset(w, h) - return z -} - -// Raster is a 2-D vector graphics rasterizer. -// -// The zero value is usable, in that it is a Rasterizer whose rendered mask -// image has zero width and zero height. Call Reset to change its bounds. -type Rasterizer struct { - // bufXxx are buffers of float32 or uint32 values, holding either the - // individual or cumulative area values. - // - // We don't actually need both values at any given time, and to conserve - // memory, the integration of the individual to the cumulative could modify - // the buffer in place. In other words, we could use a single buffer, say - // of type []uint32, and add some math.Float32bits and math.Float32frombits - // calls to satisfy the compiler's type checking. As of Go 1.7, though, - // there is a performance penalty between: - // bufF32[i] += x - // and - // bufU32[i] = math.Float32bits(x + math.Float32frombits(bufU32[i])) - // - // See golang.org/issue/17220 for some discussion. - bufF32 []float32 - bufU32 []uint32 - - useFloatingPointMath bool - - size image.Point - firstX float32 - firstY float32 - penX float32 - penY float32 - - // DrawOp is the operator used for the Draw method. - // - // The zero value is draw.Over. - DrawOp draw.Op - - // TODO: an exported field equivalent to the mask point in the - // draw.DrawMask function in the stdlib image/draw package? -} - -// Reset resets a Rasterizer as if it was just returned by NewRasterizer. -// -// This includes setting z.DrawOp to draw.Over. -func (z *Rasterizer) Reset(w, h int) { - z.size = image.Point{w, h} - z.firstX = 0 - z.firstY = 0 - z.penX = 0 - z.penY = 0 - z.DrawOp = draw.Over - - z.setUseFloatingPointMath(w > floatingPointMathThreshold || h > floatingPointMathThreshold) -} - -func (z *Rasterizer) setUseFloatingPointMath(b bool) { - z.useFloatingPointMath = b - - // Make z.bufF32 or z.bufU32 large enough to hold width * height samples. - if z.useFloatingPointMath { - if n := z.size.X * z.size.Y; n > cap(z.bufF32) { - z.bufF32 = make([]float32, n) - } else { - z.bufF32 = z.bufF32[:n] - for i := range z.bufF32 { - z.bufF32[i] = 0 - } - } - } else { - if n := z.size.X * z.size.Y; n > cap(z.bufU32) { - z.bufU32 = make([]uint32, n) - } else { - z.bufU32 = z.bufU32[:n] - for i := range z.bufU32 { - z.bufU32[i] = 0 - } - } - } -} - -// Size returns the width and height passed to NewRasterizer or Reset. -func (z *Rasterizer) Size() image.Point { - return z.size -} - -// Bounds returns the rectangle from (0, 0) to the width and height passed to -// NewRasterizer or Reset. -func (z *Rasterizer) Bounds() image.Rectangle { - return image.Rectangle{Max: z.size} -} - -// Pen returns the location of the path-drawing pen: the last argument to the -// most recent XxxTo call. -func (z *Rasterizer) Pen() (x, y float32) { - return z.penX, z.penY -} - -// ClosePath closes the current path. -func (z *Rasterizer) ClosePath() { - z.LineTo(z.firstX, z.firstY) -} - -// MoveTo starts a new path and moves the pen to (ax, ay). -// -// The coordinates are allowed to be out of the Rasterizer's bounds. -func (z *Rasterizer) MoveTo(ax, ay float32) { - z.firstX = ax - z.firstY = ay - z.penX = ax - z.penY = ay -} - -// LineTo adds a line segment, from the pen to (bx, by), and moves the pen to -// (bx, by). -// -// The coordinates are allowed to be out of the Rasterizer's bounds. -func (z *Rasterizer) LineTo(bx, by float32) { - if z.useFloatingPointMath { - z.floatingLineTo(bx, by) - } else { - z.fixedLineTo(bx, by) - } -} - -// QuadTo adds a quadratic Bézier segment, from the pen via (bx, by) to (cx, -// cy), and moves the pen to (cx, cy). -// -// The coordinates are allowed to be out of the Rasterizer's bounds. -func (z *Rasterizer) QuadTo(bx, by, cx, cy float32) { - ax, ay := z.penX, z.penY - devsq := devSquared(ax, ay, bx, by, cx, cy) - if devsq >= 0.333 { - const tol = 3 - n := 1 + int(math.Sqrt(math.Sqrt(tol*float64(devsq)))) - t, nInv := float32(0), 1/float32(n) - for i := 0; i < n-1; i++ { - t += nInv - abx, aby := lerp(t, ax, ay, bx, by) - bcx, bcy := lerp(t, bx, by, cx, cy) - z.LineTo(lerp(t, abx, aby, bcx, bcy)) - } - } - z.LineTo(cx, cy) -} - -// CubeTo adds a cubic Bézier segment, from the pen via (bx, by) and (cx, cy) -// to (dx, dy), and moves the pen to (dx, dy). -// -// The coordinates are allowed to be out of the Rasterizer's bounds. -func (z *Rasterizer) CubeTo(bx, by, cx, cy, dx, dy float32) { - ax, ay := z.penX, z.penY - devsq := devSquared(ax, ay, bx, by, dx, dy) - if devsqAlt := devSquared(ax, ay, cx, cy, dx, dy); devsq < devsqAlt { - devsq = devsqAlt - } - if devsq >= 0.333 { - const tol = 3 - n := 1 + int(math.Sqrt(math.Sqrt(tol*float64(devsq)))) - t, nInv := float32(0), 1/float32(n) - for i := 0; i < n-1; i++ { - t += nInv - abx, aby := lerp(t, ax, ay, bx, by) - bcx, bcy := lerp(t, bx, by, cx, cy) - cdx, cdy := lerp(t, cx, cy, dx, dy) - abcx, abcy := lerp(t, abx, aby, bcx, bcy) - bcdx, bcdy := lerp(t, bcx, bcy, cdx, cdy) - z.LineTo(lerp(t, abcx, abcy, bcdx, bcdy)) - } - } - z.LineTo(dx, dy) -} - -// devSquared returns a measure of how curvy the sequence (ax, ay) to (bx, by) -// to (cx, cy) is. It determines how many line segments will approximate a -// Bézier curve segment. -// -// http://lists.nongnu.org/archive/html/freetype-devel/2016-08/msg00080.html -// gives the rationale for this evenly spaced heuristic instead of a recursive -// de Casteljau approach: -// -// The reason for the subdivision by n is that I expect the "flatness" -// computation to be semi-expensive (it's done once rather than on each -// potential subdivision) and also because you'll often get fewer subdivisions. -// Taking a circular arc as a simplifying assumption (ie a spherical cow), -// where I get n, a recursive approach would get 2^⌈lg n⌉, which, if I haven't -// made any horrible mistakes, is expected to be 33% more in the limit. -func devSquared(ax, ay, bx, by, cx, cy float32) float32 { - devx := ax - 2*bx + cx - devy := ay - 2*by + cy - return devx*devx + devy*devy -} - -// Draw implements the Drawer interface from the standard library's image/draw -// package. -// -// The vector paths previously added via the XxxTo calls become the mask for -// drawing src onto dst. -func (z *Rasterizer) Draw(dst draw.Image, r image.Rectangle, src image.Image, sp image.Point) { - // TODO: adjust r and sp (and mp?) if src.Bounds() doesn't contain - // r.Add(sp.Sub(r.Min)). - - if src, ok := src.(*image.Uniform); ok { - srcR, srcG, srcB, srcA := src.RGBA() - switch dst := dst.(type) { - case *image.Alpha: - // Fast path for glyph rendering. - if srcA == 0xffff { - if z.DrawOp == draw.Over { - z.rasterizeDstAlphaSrcOpaqueOpOver(dst, r) - } else { - z.rasterizeDstAlphaSrcOpaqueOpSrc(dst, r) - } - return - } - case *image.RGBA: - if z.DrawOp == draw.Over { - z.rasterizeDstRGBASrcUniformOpOver(dst, r, srcR, srcG, srcB, srcA) - } else { - z.rasterizeDstRGBASrcUniformOpSrc(dst, r, srcR, srcG, srcB, srcA) - } - return - } - } - - if z.DrawOp == draw.Over { - z.rasterizeOpOver(dst, r, src, sp) - } else { - z.rasterizeOpSrc(dst, r, src, sp) - } -} - -func (z *Rasterizer) accumulateMask() { - if z.useFloatingPointMath { - if n := z.size.X * z.size.Y; n > cap(z.bufU32) { - z.bufU32 = make([]uint32, n) - } else { - z.bufU32 = z.bufU32[:n] - } - if haveFloatingAccumulateSIMD { - floatingAccumulateMaskSIMD(z.bufU32, z.bufF32) - } else { - floatingAccumulateMask(z.bufU32, z.bufF32) - } - } else { - if haveFixedAccumulateSIMD { - fixedAccumulateMaskSIMD(z.bufU32) - } else { - fixedAccumulateMask(z.bufU32) - } - } -} - -func (z *Rasterizer) rasterizeDstAlphaSrcOpaqueOpOver(dst *image.Alpha, r image.Rectangle) { - // TODO: non-zero vs even-odd winding? - if r == dst.Bounds() && r == z.Bounds() { - // We bypass the z.accumulateMask step and convert straight from - // z.bufF32 or z.bufU32 to dst.Pix. - if z.useFloatingPointMath { - if haveFloatingAccumulateSIMD { - floatingAccumulateOpOverSIMD(dst.Pix, z.bufF32) - } else { - floatingAccumulateOpOver(dst.Pix, z.bufF32) - } - } else { - if haveFixedAccumulateSIMD { - fixedAccumulateOpOverSIMD(dst.Pix, z.bufU32) - } else { - fixedAccumulateOpOver(dst.Pix, z.bufU32) - } - } - return - } - - z.accumulateMask() - pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):] - for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { - for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { - ma := z.bufU32[y*z.size.X+x] - i := y*dst.Stride + x - - // This formula is like rasterizeOpOver's, simplified for the - // concrete dst type and opaque src assumption. - a := 0xffff - ma - pix[i] = uint8((uint32(pix[i])*0x101*a/0xffff + ma) >> 8) - } - } -} - -func (z *Rasterizer) rasterizeDstAlphaSrcOpaqueOpSrc(dst *image.Alpha, r image.Rectangle) { - // TODO: non-zero vs even-odd winding? - if r == dst.Bounds() && r == z.Bounds() { - // We bypass the z.accumulateMask step and convert straight from - // z.bufF32 or z.bufU32 to dst.Pix. - if z.useFloatingPointMath { - if haveFloatingAccumulateSIMD { - floatingAccumulateOpSrcSIMD(dst.Pix, z.bufF32) - } else { - floatingAccumulateOpSrc(dst.Pix, z.bufF32) - } - } else { - if haveFixedAccumulateSIMD { - fixedAccumulateOpSrcSIMD(dst.Pix, z.bufU32) - } else { - fixedAccumulateOpSrc(dst.Pix, z.bufU32) - } - } - return - } - - z.accumulateMask() - pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):] - for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { - for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { - ma := z.bufU32[y*z.size.X+x] - - // This formula is like rasterizeOpSrc's, simplified for the - // concrete dst type and opaque src assumption. - pix[y*dst.Stride+x] = uint8(ma >> 8) - } - } -} - -func (z *Rasterizer) rasterizeDstRGBASrcUniformOpOver(dst *image.RGBA, r image.Rectangle, sr, sg, sb, sa uint32) { - z.accumulateMask() - pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):] - for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { - for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { - ma := z.bufU32[y*z.size.X+x] - - // This formula is like rasterizeOpOver's, simplified for the - // concrete dst type and uniform src assumption. - a := 0xffff - (sa * ma / 0xffff) - i := y*dst.Stride + 4*x - pix[i+0] = uint8(((uint32(pix[i+0])*0x101*a + sr*ma) / 0xffff) >> 8) - pix[i+1] = uint8(((uint32(pix[i+1])*0x101*a + sg*ma) / 0xffff) >> 8) - pix[i+2] = uint8(((uint32(pix[i+2])*0x101*a + sb*ma) / 0xffff) >> 8) - pix[i+3] = uint8(((uint32(pix[i+3])*0x101*a + sa*ma) / 0xffff) >> 8) - } - } -} - -func (z *Rasterizer) rasterizeDstRGBASrcUniformOpSrc(dst *image.RGBA, r image.Rectangle, sr, sg, sb, sa uint32) { - z.accumulateMask() - pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):] - for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { - for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { - ma := z.bufU32[y*z.size.X+x] - - // This formula is like rasterizeOpSrc's, simplified for the - // concrete dst type and uniform src assumption. - i := y*dst.Stride + 4*x - pix[i+0] = uint8((sr * ma / 0xffff) >> 8) - pix[i+1] = uint8((sg * ma / 0xffff) >> 8) - pix[i+2] = uint8((sb * ma / 0xffff) >> 8) - pix[i+3] = uint8((sa * ma / 0xffff) >> 8) - } - } -} - -func (z *Rasterizer) rasterizeOpOver(dst draw.Image, r image.Rectangle, src image.Image, sp image.Point) { - z.accumulateMask() - out := color.RGBA64{} - outc := color.Color(&out) - for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { - for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { - sr, sg, sb, sa := src.At(sp.X+x, sp.Y+y).RGBA() - ma := z.bufU32[y*z.size.X+x] - - // This algorithm comes from the standard library's image/draw - // package. - dr, dg, db, da := dst.At(r.Min.X+x, r.Min.Y+y).RGBA() - a := 0xffff - (sa * ma / 0xffff) - out.R = uint16((dr*a + sr*ma) / 0xffff) - out.G = uint16((dg*a + sg*ma) / 0xffff) - out.B = uint16((db*a + sb*ma) / 0xffff) - out.A = uint16((da*a + sa*ma) / 0xffff) - - dst.Set(r.Min.X+x, r.Min.Y+y, outc) - } - } -} - -func (z *Rasterizer) rasterizeOpSrc(dst draw.Image, r image.Rectangle, src image.Image, sp image.Point) { - z.accumulateMask() - out := color.RGBA64{} - outc := color.Color(&out) - for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ { - for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ { - sr, sg, sb, sa := src.At(sp.X+x, sp.Y+y).RGBA() - ma := z.bufU32[y*z.size.X+x] - - // This algorithm comes from the standard library's image/draw - // package. - out.R = uint16(sr * ma / 0xffff) - out.G = uint16(sg * ma / 0xffff) - out.B = uint16(sb * ma / 0xffff) - out.A = uint16(sa * ma / 0xffff) - - dst.Set(r.Min.X+x, r.Min.Y+y, outc) - } - } -} diff --git a/vendor/golang.org/x/image/vector/vector_test.go b/vendor/golang.org/x/image/vector/vector_test.go deleted file mode 100644 index 012968e02..000000000 --- a/vendor/golang.org/x/image/vector/vector_test.go +++ /dev/null @@ -1,519 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package vector - -// TODO: add tests for NaN and Inf coordinates. - -import ( - "fmt" - "image" - "image/color" - "image/draw" - "image/png" - "math" - "math/rand" - "os" - "path/filepath" - "testing" -) - -// encodePNG is useful for manually debugging the tests. -func encodePNG(dstFilename string, src image.Image) error { - f, err := os.Create(dstFilename) - if err != nil { - return err - } - encErr := png.Encode(f, src) - closeErr := f.Close() - if encErr != nil { - return encErr - } - return closeErr -} - -func pointOnCircle(center, radius, index, number int) (x, y float32) { - c := float64(center) - r := float64(radius) - i := float64(index) - n := float64(number) - return float32(c + r*(math.Cos(2*math.Pi*i/n))), - float32(c + r*(math.Sin(2*math.Pi*i/n))) -} - -func TestRasterizeOutOfBounds(t *testing.T) { - // Set this to a non-empty string such as "/tmp" to manually inspect the - // rasterization. - // - // If empty, this test simply checks that calling LineTo with points out of - // the rasterizer's bounds doesn't panic. - const tmpDir = "" - - const center, radius, n = 16, 20, 16 - var z Rasterizer - for i := 0; i < n; i++ { - for j := 1; j < n/2; j++ { - z.Reset(2*center, 2*center) - z.MoveTo(1*center, 1*center) - z.LineTo(pointOnCircle(center, radius, i+0, n)) - z.LineTo(pointOnCircle(center, radius, i+j, n)) - z.ClosePath() - - z.MoveTo(0*center, 0*center) - z.LineTo(0*center, 2*center) - z.LineTo(2*center, 2*center) - z.LineTo(2*center, 0*center) - z.ClosePath() - - dst := image.NewAlpha(z.Bounds()) - z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{}) - - if tmpDir == "" { - continue - } - - filename := filepath.Join(tmpDir, fmt.Sprintf("out-%02d-%02d.png", i, j)) - if err := encodePNG(filename, dst); err != nil { - t.Error(err) - } - t.Logf("wrote %s", filename) - } - } -} - -func TestRasterizePolygon(t *testing.T) { - var z Rasterizer - for radius := 4; radius <= 256; radius *= 2 { - for n := 3; n <= 19; n += 4 { - z.Reset(2*radius, 2*radius) - z.MoveTo(float32(2*radius), float32(1*radius)) - for i := 1; i < n; i++ { - z.LineTo(pointOnCircle(radius, radius, i, n)) - } - z.ClosePath() - - dst := image.NewAlpha(z.Bounds()) - z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{}) - - if err := checkCornersCenter(dst); err != nil { - t.Errorf("radius=%d, n=%d: %v", radius, n, err) - } - } - } -} - -func TestRasterizeAlmostAxisAligned(t *testing.T) { - z := NewRasterizer(8, 8) - z.MoveTo(2, 2) - z.LineTo(6, math.Nextafter32(2, 0)) - z.LineTo(6, 6) - z.LineTo(math.Nextafter32(2, 0), 6) - z.ClosePath() - - dst := image.NewAlpha(z.Bounds()) - z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{}) - - if err := checkCornersCenter(dst); err != nil { - t.Error(err) - } -} - -func TestRasterizeWideAlmostHorizontalLines(t *testing.T) { - var z Rasterizer - for i := uint(3); i < 16; i++ { - x := float32(int(1 << i)) - - z.Reset(8, 8) - z.MoveTo(-x, 3) - z.LineTo(+x, 4) - z.LineTo(+x, 6) - z.LineTo(-x, 6) - z.ClosePath() - - dst := image.NewAlpha(z.Bounds()) - z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{}) - - if err := checkCornersCenter(dst); err != nil { - t.Errorf("i=%d: %v", i, err) - } - } -} - -func TestRasterize30Degrees(t *testing.T) { - z := NewRasterizer(8, 8) - z.MoveTo(4, 4) - z.LineTo(8, 4) - z.LineTo(4, 6) - z.ClosePath() - - dst := image.NewAlpha(z.Bounds()) - z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{}) - - if err := checkCornersCenter(dst); err != nil { - t.Error(err) - } -} - -func TestRasterizeRandomLineTos(t *testing.T) { - var z Rasterizer - for i := 5; i < 50; i++ { - n, rng := 0, rand.New(rand.NewSource(int64(i))) - - z.Reset(i+2, i+2) - z.MoveTo(float32(i/2), float32(i/2)) - for ; rng.Intn(16) != 0; n++ { - x := 1 + rng.Intn(i) - y := 1 + rng.Intn(i) - z.LineTo(float32(x), float32(y)) - } - z.ClosePath() - - dst := image.NewAlpha(z.Bounds()) - z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{}) - - if err := checkCorners(dst); err != nil { - t.Errorf("i=%d (%d nodes): %v", i, n, err) - } - } -} - -// checkCornersCenter checks that the corners of the image are all 0x00 and the -// center is 0xff. -func checkCornersCenter(m *image.Alpha) error { - if err := checkCorners(m); err != nil { - return err - } - size := m.Bounds().Size() - center := m.Pix[(size.Y/2)*m.Stride+(size.X/2)] - if center != 0xff { - return fmt.Errorf("center: got %#02x, want 0xff", center) - } - return nil -} - -// checkCorners checks that the corners of the image are all 0x00. -func checkCorners(m *image.Alpha) error { - size := m.Bounds().Size() - corners := [4]uint8{ - m.Pix[(0*size.Y+0)*m.Stride+(0*size.X+0)], - m.Pix[(0*size.Y+0)*m.Stride+(1*size.X-1)], - m.Pix[(1*size.Y-1)*m.Stride+(0*size.X+0)], - m.Pix[(1*size.Y-1)*m.Stride+(1*size.X-1)], - } - if corners != [4]uint8{} { - return fmt.Errorf("corners were not all zero: %v", corners) - } - return nil -} - -var basicMask = []byte{ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe3, 0xaa, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x5f, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x24, 0x00, 0x00, 0x00, - 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa1, 0x00, 0x00, 0x00, - 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x14, 0x00, 0x00, - 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x4a, 0x00, 0x00, - 0x00, 0x00, 0xcc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x81, 0x00, 0x00, - 0x00, 0x00, 0x66, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xe4, 0xff, 0xff, 0xff, 0xb6, 0x00, 0x00, - 0x00, 0x00, 0x0c, 0xf2, 0xff, 0xff, 0xfe, 0x9e, 0x15, 0x00, 0x15, 0x96, 0xff, 0xce, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x88, 0xfc, 0xe3, 0x43, 0x00, 0x00, 0x00, 0x00, 0x06, 0xcd, 0xdc, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x25, 0xde, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -} - -func testBasicPath(t *testing.T, prefix string, dst draw.Image, src image.Image, op draw.Op, want []byte) { - z := NewRasterizer(16, 16) - z.MoveTo(2, 2) - z.LineTo(8, 2) - z.QuadTo(14, 2, 14, 14) - z.CubeTo(8, 2, 5, 20, 2, 8) - z.ClosePath() - - z.DrawOp = op - z.Draw(dst, z.Bounds(), src, image.Point{}) - - var got []byte - switch dst := dst.(type) { - case *image.Alpha: - got = dst.Pix - case *image.RGBA: - got = dst.Pix - default: - t.Errorf("%s: unrecognized dst image type %T", prefix, dst) - } - - if len(got) != len(want) { - t.Errorf("%s: len(got)=%d and len(want)=%d differ", prefix, len(got), len(want)) - return - } - for i := range got { - delta := int(got[i]) - int(want[i]) - // The +/- 2 allows different implementations to give different - // rounding errors. - if delta < -2 || +2 < delta { - t.Errorf("%s: i=%d: got %#02x, want %#02x", prefix, i, got[i], want[i]) - return - } - } -} - -func TestBasicPathDstAlpha(t *testing.T) { - for _, background := range []uint8{0x00, 0x80} { - for _, op := range []draw.Op{draw.Over, draw.Src} { - for _, xPadding := range []int{0, 7} { - bounds := image.Rect(0, 0, 16+xPadding, 16) - dst := image.NewAlpha(bounds) - for i := range dst.Pix { - dst.Pix[i] = background - } - - want := make([]byte, len(dst.Pix)) - copy(want, dst.Pix) - - if op == draw.Over && background == 0x80 { - for y := 0; y < 16; y++ { - for x := 0; x < 16; x++ { - ma := basicMask[16*y+x] - i := dst.PixOffset(x, y) - want[i] = 0xff - (0xff-ma)/2 - } - } - } else { - for y := 0; y < 16; y++ { - for x := 0; x < 16; x++ { - ma := basicMask[16*y+x] - i := dst.PixOffset(x, y) - want[i] = ma - } - } - } - - prefix := fmt.Sprintf("background=%#02x, op=%v, xPadding=%d", background, op, xPadding) - testBasicPath(t, prefix, dst, image.Opaque, op, want) - } - } - } -} - -func TestBasicPathDstRGBA(t *testing.T) { - blue := image.NewUniform(color.RGBA{0x00, 0x00, 0xff, 0xff}) - - for _, op := range []draw.Op{draw.Over, draw.Src} { - for _, xPadding := range []int{0, 7} { - bounds := image.Rect(0, 0, 16+xPadding, 16) - dst := image.NewRGBA(bounds) - for y := bounds.Min.Y; y < bounds.Max.Y; y++ { - for x := bounds.Min.X; x < bounds.Max.X; x++ { - dst.SetRGBA(x, y, color.RGBA{ - R: uint8(y * 0x07), - G: uint8(x * 0x05), - B: 0x00, - A: 0x80, - }) - } - } - - want := make([]byte, len(dst.Pix)) - copy(want, dst.Pix) - - if op == draw.Over { - for y := 0; y < 16; y++ { - for x := 0; x < 16; x++ { - ma := basicMask[16*y+x] - i := dst.PixOffset(x, y) - want[i+0] = uint8((uint32(0xff-ma) * uint32(y*0x07)) / 0xff) - want[i+1] = uint8((uint32(0xff-ma) * uint32(x*0x05)) / 0xff) - want[i+2] = ma - want[i+3] = ma/2 + 0x80 - } - } - } else { - for y := 0; y < 16; y++ { - for x := 0; x < 16; x++ { - ma := basicMask[16*y+x] - i := dst.PixOffset(x, y) - want[i+0] = 0x00 - want[i+1] = 0x00 - want[i+2] = ma - want[i+3] = ma - } - } - } - - prefix := fmt.Sprintf("op=%v, xPadding=%d", op, xPadding) - testBasicPath(t, prefix, dst, blue, op, want) - } - } -} - -const ( - benchmarkGlyphWidth = 893 - benchmarkGlyphHeight = 1122 -) - -type benchmarkGlyphDatum struct { - // n being 0, 1 or 2 means moveTo, lineTo or quadTo. - n uint32 - px float32 - py float32 - qx float32 - qy float32 -} - -// benchmarkGlyphData is the 'a' glyph from the Roboto Regular font, translated -// so that its top left corner is (0, 0). -var benchmarkGlyphData = []benchmarkGlyphDatum{ - {0, 699, 1102, 0, 0}, - {2, 683, 1070, 673, 988}, - {2, 544, 1122, 365, 1122}, - {2, 205, 1122, 102.5, 1031.5}, - {2, 0, 941, 0, 802}, - {2, 0, 633, 128.5, 539.5}, - {2, 257, 446, 490, 446}, - {1, 670, 446, 0, 0}, - {1, 670, 361, 0, 0}, - {2, 670, 264, 612, 206.5}, - {2, 554, 149, 441, 149}, - {2, 342, 149, 275, 199}, - {2, 208, 249, 208, 320}, - {1, 22, 320, 0, 0}, - {2, 22, 239, 79.5, 163.5}, - {2, 137, 88, 235.5, 44}, - {2, 334, 0, 452, 0}, - {2, 639, 0, 745, 93.5}, - {2, 851, 187, 855, 351}, - {1, 855, 849, 0, 0}, - {2, 855, 998, 893, 1086}, - {1, 893, 1102, 0, 0}, - {1, 699, 1102, 0, 0}, - {0, 392, 961, 0, 0}, - {2, 479, 961, 557, 916}, - {2, 635, 871, 670, 799}, - {1, 670, 577, 0, 0}, - {1, 525, 577, 0, 0}, - {2, 185, 577, 185, 776}, - {2, 185, 863, 243, 912}, - {2, 301, 961, 392, 961}, -} - -func scaledBenchmarkGlyphData(height int) (width int, data []benchmarkGlyphDatum) { - scale := float32(height) / benchmarkGlyphHeight - - // Clone the benchmarkGlyphData slice and scale its coordinates. - data = append(data, benchmarkGlyphData...) - for i := range data { - data[i].px *= scale - data[i].py *= scale - data[i].qx *= scale - data[i].qy *= scale - } - - return int(math.Ceil(float64(benchmarkGlyphWidth * scale))), data -} - -// benchGlyph benchmarks rasterizing a TrueType glyph. -// -// Note that, compared to the github.com/google/font-go prototype, the height -// here is the height of the bounding box, not the pixels per em used to scale -// a glyph's vectors. A height of 64 corresponds to a ppem greater than 64. -func benchGlyph(b *testing.B, colorModel byte, loose bool, height int, op draw.Op) { - width, data := scaledBenchmarkGlyphData(height) - z := NewRasterizer(width, height) - - bounds := z.Bounds() - if loose { - bounds.Max.X++ - } - dst, src := draw.Image(nil), image.Image(nil) - switch colorModel { - case 'A': - dst = image.NewAlpha(bounds) - src = image.Opaque - case 'N': - dst = image.NewNRGBA(bounds) - src = image.NewUniform(color.NRGBA{0x40, 0x80, 0xc0, 0xff}) - case 'R': - dst = image.NewRGBA(bounds) - src = image.NewUniform(color.RGBA{0x40, 0x80, 0xc0, 0xff}) - default: - b.Fatal("unsupported color model") - } - bounds = z.Bounds() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - z.Reset(width, height) - z.DrawOp = op - for _, d := range data { - switch d.n { - case 0: - z.MoveTo(d.px, d.py) - case 1: - z.LineTo(d.px, d.py) - case 2: - z.QuadTo(d.px, d.py, d.qx, d.qy) - } - } - z.Draw(dst, bounds, src, image.Point{}) - } -} - -// The heights 16, 32, 64, 128, 256, 1024 include numbers both above and below -// the floatingPointMathThreshold constant (512). - -func BenchmarkGlyphAlpha16Over(b *testing.B) { benchGlyph(b, 'A', false, 16, draw.Over) } -func BenchmarkGlyphAlpha16Src(b *testing.B) { benchGlyph(b, 'A', false, 16, draw.Src) } -func BenchmarkGlyphAlpha32Over(b *testing.B) { benchGlyph(b, 'A', false, 32, draw.Over) } -func BenchmarkGlyphAlpha32Src(b *testing.B) { benchGlyph(b, 'A', false, 32, draw.Src) } -func BenchmarkGlyphAlpha64Over(b *testing.B) { benchGlyph(b, 'A', false, 64, draw.Over) } -func BenchmarkGlyphAlpha64Src(b *testing.B) { benchGlyph(b, 'A', false, 64, draw.Src) } -func BenchmarkGlyphAlpha128Over(b *testing.B) { benchGlyph(b, 'A', false, 128, draw.Over) } -func BenchmarkGlyphAlpha128Src(b *testing.B) { benchGlyph(b, 'A', false, 128, draw.Src) } -func BenchmarkGlyphAlpha256Over(b *testing.B) { benchGlyph(b, 'A', false, 256, draw.Over) } -func BenchmarkGlyphAlpha256Src(b *testing.B) { benchGlyph(b, 'A', false, 256, draw.Src) } -func BenchmarkGlyphAlpha1024Over(b *testing.B) { benchGlyph(b, 'A', false, 1024, draw.Over) } -func BenchmarkGlyphAlpha1024Src(b *testing.B) { benchGlyph(b, 'A', false, 1024, draw.Src) } - -func BenchmarkGlyphAlphaLoose16Over(b *testing.B) { benchGlyph(b, 'A', true, 16, draw.Over) } -func BenchmarkGlyphAlphaLoose16Src(b *testing.B) { benchGlyph(b, 'A', true, 16, draw.Src) } -func BenchmarkGlyphAlphaLoose32Over(b *testing.B) { benchGlyph(b, 'A', true, 32, draw.Over) } -func BenchmarkGlyphAlphaLoose32Src(b *testing.B) { benchGlyph(b, 'A', true, 32, draw.Src) } -func BenchmarkGlyphAlphaLoose64Over(b *testing.B) { benchGlyph(b, 'A', true, 64, draw.Over) } -func BenchmarkGlyphAlphaLoose64Src(b *testing.B) { benchGlyph(b, 'A', true, 64, draw.Src) } -func BenchmarkGlyphAlphaLoose128Over(b *testing.B) { benchGlyph(b, 'A', true, 128, draw.Over) } -func BenchmarkGlyphAlphaLoose128Src(b *testing.B) { benchGlyph(b, 'A', true, 128, draw.Src) } -func BenchmarkGlyphAlphaLoose256Over(b *testing.B) { benchGlyph(b, 'A', true, 256, draw.Over) } -func BenchmarkGlyphAlphaLoose256Src(b *testing.B) { benchGlyph(b, 'A', true, 256, draw.Src) } -func BenchmarkGlyphAlphaLoose1024Over(b *testing.B) { benchGlyph(b, 'A', true, 1024, draw.Over) } -func BenchmarkGlyphAlphaLoose1024Src(b *testing.B) { benchGlyph(b, 'A', true, 1024, draw.Src) } - -func BenchmarkGlyphRGBA16Over(b *testing.B) { benchGlyph(b, 'R', false, 16, draw.Over) } -func BenchmarkGlyphRGBA16Src(b *testing.B) { benchGlyph(b, 'R', false, 16, draw.Src) } -func BenchmarkGlyphRGBA32Over(b *testing.B) { benchGlyph(b, 'R', false, 32, draw.Over) } -func BenchmarkGlyphRGBA32Src(b *testing.B) { benchGlyph(b, 'R', false, 32, draw.Src) } -func BenchmarkGlyphRGBA64Over(b *testing.B) { benchGlyph(b, 'R', false, 64, draw.Over) } -func BenchmarkGlyphRGBA64Src(b *testing.B) { benchGlyph(b, 'R', false, 64, draw.Src) } -func BenchmarkGlyphRGBA128Over(b *testing.B) { benchGlyph(b, 'R', false, 128, draw.Over) } -func BenchmarkGlyphRGBA128Src(b *testing.B) { benchGlyph(b, 'R', false, 128, draw.Src) } -func BenchmarkGlyphRGBA256Over(b *testing.B) { benchGlyph(b, 'R', false, 256, draw.Over) } -func BenchmarkGlyphRGBA256Src(b *testing.B) { benchGlyph(b, 'R', false, 256, draw.Src) } -func BenchmarkGlyphRGBA1024Over(b *testing.B) { benchGlyph(b, 'R', false, 1024, draw.Over) } -func BenchmarkGlyphRGBA1024Src(b *testing.B) { benchGlyph(b, 'R', false, 1024, draw.Src) } - -func BenchmarkGlyphNRGBA16Over(b *testing.B) { benchGlyph(b, 'N', false, 16, draw.Over) } -func BenchmarkGlyphNRGBA16Src(b *testing.B) { benchGlyph(b, 'N', false, 16, draw.Src) } -func BenchmarkGlyphNRGBA32Over(b *testing.B) { benchGlyph(b, 'N', false, 32, draw.Over) } -func BenchmarkGlyphNRGBA32Src(b *testing.B) { benchGlyph(b, 'N', false, 32, draw.Src) } -func BenchmarkGlyphNRGBA64Over(b *testing.B) { benchGlyph(b, 'N', false, 64, draw.Over) } -func BenchmarkGlyphNRGBA64Src(b *testing.B) { benchGlyph(b, 'N', false, 64, draw.Src) } -func BenchmarkGlyphNRGBA128Over(b *testing.B) { benchGlyph(b, 'N', false, 128, draw.Over) } -func BenchmarkGlyphNRGBA128Src(b *testing.B) { benchGlyph(b, 'N', false, 128, draw.Src) } -func BenchmarkGlyphNRGBA256Over(b *testing.B) { benchGlyph(b, 'N', false, 256, draw.Over) } -func BenchmarkGlyphNRGBA256Src(b *testing.B) { benchGlyph(b, 'N', false, 256, draw.Src) } -func BenchmarkGlyphNRGBA1024Over(b *testing.B) { benchGlyph(b, 'N', false, 1024, draw.Over) } -func BenchmarkGlyphNRGBA1024Src(b *testing.B) { benchGlyph(b, 'N', false, 1024, draw.Src) } |